In [2]:
### THIS SCRIPT CLEANS THE COLLECTED SEP ###   

In [3]:
import pymongo 
import tqdm
import requests
import json

from bson.objectid import ObjectId
from pprint import pprint

##### INIT GLOBAL VARIABLES#####

#init Mongo
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

#create database
db = client.visualizing_sep

In [6]:
def update_sep_json(id_title,inpho_api,collection_to_update):
    """ This function updates the JSON data for individually passed articles. """

    #create correct api endoint url
    inpho_api = f'http://inpho.cogs.indiana.edu/{api_endpoint}.json'

    #get JSON data from InPhO
    request_inpho = requests.get(inpho_api)
    inpho_json = json.loads(request_inpho.text)

    #create udpate string
    new_values = {
        'inpho_api': inpho_api,
        'inpho_json': inpho_json
    }

    #udpate MongoDB
    result = collection_to_update.update_one(
        { 'title': id_title },
        { '$set': new_values}
    )

    # boolean confirmation that the API call went through
    print (f"acknowledged: {inpho_api}\n", result.acknowledged)

    #get Projection to test update
    doc = list(collection_to_update.find(
            filter={'title': title},
            projection=['title','inpho_api', 'inpho_json']))

    pprint(doc)

In [18]:
##### Cleaning Sep_Data ########

#1. Check for errors with InPhO apis.

sep_no_inpho = list(db.sep_data.find( filter={'inpho_api': 'Error: No InPhO entry'},
                                        projection = ['title'],
                                        sort=[('title',1)]
                                        ))
pprint(sep_no_inpho)

#There were 15 SEP articles that originally didn't link into InPhO at all. I went into InPhO and found the appropriate API endpoint for those, and indicated the URL for those articles that do have InPhO entries. Not all of them had InPhO entries though. When we run this test now. We only see 4 articles left that don't link into InPhO

#Corrected Articles:
# 'Algebraic Propositional Logic', inpho: idea/6267
# 'Basil [Cardinal] Bessarion', thinker/6297
# 'Dynamic Epistemic Logic', idea/5642
# 'Ibn Sina [Avicenna]', thinker/2577
# 'Identity and Individuality in Quantum Theory', idea/1191
# 'Legalism in Chinese Philosophy', idea/5595
# 'Martin Buber', thinker/2718
# 'Religious Daoism', idea/5663
# 'Supererogation', idea/1128
# 'Divine Illumination', idea/1383
# 'Kumārila' thinker/5375

#Articles that don't link
# {'title': 'Hegel’s Dialectics'}, NONE
# {'title': 'Ibn Sina’s Natural Philosophy'}, NONE
# {'title': 'Nietzsche’s Life and Works'} NONE
# {'title': 'Sophie de Grouchy'}, NONE


[{'_id': ObjectId('5e95471e8d5640f2c15ad91c'), 'title': 'Hegel’s Dialectics'},
{'_id':ObjectId('5e9547f18d5640f2c15ad94c'),
'title':'Ibn Sina’s Natural Philosophy'},
{'_id':ObjectId('5e954f218d5640f2c15adae6'),
'title':'Nietzsche’s Life and Works'},
{'_id': ObjectId('5e95db3616876e26996d53e3'), 'title': 'Sophie de Grouchy'}]


In [17]:
#Update the individual records identified above
title = 'Kumārila'
api_endpoint = 'thinker/5375'
collection_to_update = db.sep_data

update_sep_json(title,api_endpoint,collection_to_update)

acknowledged: http://inpho.cogs.indiana.edu/thinker/5375.json
True
[{'_id':ObjectId('5e954a138d5640f2c15ad9c7'),
'inpho_api':'http://inpho.cogs.indiana.edu/thinker/5375.json',
'inpho_json':{'ID':5375,
'aliases':[],
'birth':[{'day': 0, 'month': 0, 'year': 700}],
'birth_strings':['700'],
'death':[],
'death_strings':[],
'influenced':[],
'influenced_by':[],
'label':'Kumārila  Bhaṭṭa',
'nationalities':[],
'professions':[],
'related_ideas':[646,
1545,
480,
1157,
1343,
1426,
1824,
1493,
807],
'related_thinkers':[3060, 4180, 3260, 3179],
'sep_dir':'kumaarila',
'students':[],
'teachers':[],
'type':'thinker',
'url':'/thinker/5375',
'wiki':'Kumārila_Bhaṭṭa'},
'title':'Kumārila'}]


In [None]:
##### Cleaning the Winter 2019 Data ########

#2. Check for duplicate InPhO JSON data.

#There was also an article that linked to the WRONG article in InPho.
#Mencius (SEP) properly linked to Mencius (InPho) 
#Wang Yangming (SEP) improperly linked to Mencius (InPho) -- this has been updated

In [19]:
#Update the individual records identified above
title = 'Wang Yangming'
api_endpoint = 'thinker/4092'
collection_to_update = db.sep_data

update_sep_json(title,api_endpoint,collection_to_update)

acknowledged: http://inpho.cogs.indiana.edu/thinker/4092.json
True
[{'_id':ObjectId('5e95ddaa16876e26996d546e'),
'inpho_api':'http://inpho.cogs.indiana.edu/thinker/4092.json',
'inpho_json':{'ID':4092,
'aliases':[],
'birth':[{'day': 0, 'month': 0, 'year': 1472}],
'birth_strings':['1472'],
'death':[{'day': 0, 'month': 0, 'year': 1529}],
'death_strings':['1529'],
'influenced':[],
'influenced_by':[4173],
'label':'Wang Yangming',
'nationalities':['Chinese'],
'professions':[],
'related_ideas':[6347,
148,
1870,
1343,
5338,
5510,
5602,
1544,
681],
'related_thinkers':[4173,
3563,
2844,
4203,
4195,
3060,
3177,
4198],
'sep_dir':'wang-yangming',
'students':[],
'teachers':[],
'type':'thinker',
'url':'/thinker/4092',
'wiki':'Wang_Yangming'},
'title':'Wang Yangming'}]


In [24]:
total_articles = db.sep_data.count_documents({})
articles_ids = db.sep_data.count_documents({'inpho_json.ID': {'$exists':True}})
articles_noids = db.sep_data.count_documents({'inpho_json.ID':{'$exists':False}})
distinct_ids = db.sep_data.distinct('inpho_json.ID')

print(total_articles)
print(articles_ids)
print(articles_noids)
print(len(distinct_ids))

1680
1676
4
1676


In [7]:
# Move the JSON data for each SEP entry into its own collection.#####

# We do this because we want to download additional API data from InPhO for those entities that are NOT in SEP. This will give us a broader picture of the history of philosophy than simply what exists in SEP.

#get all SEP entries
sep_collection = db.sep_entries
inpho_collection = db.inpho_data

sep_entries = list(sep_collection.find({}))

for entry in sep_entries:
    #get "inpho_json" object from SEP
    json_data = entry['inpho_json']

    #some SEP entries didn't have an InPhO reference, so we note that here.
    if json_data == 'Error: No InPhO entry':
        final_json_data = {'Error': 'No InPhO entry'}

    #process main data elements
    else:
        final_json_data = json_data

    #commented out to prevent accidental movement
    #inpho_collection.insert_one(final_json_data)
