In [61]:
### THIS SCRIPT CLEANS THE COLLECTED DATA AND ORGANZIES MULTIPLE MONGODB-RELATED DATABASE OPERATIONS ###   

In [169]:
import pymongo 
import tqdm
import requests
import json

from bson.objectid import ObjectId
from pprint import pprint

##### INIT GLOBAL VARIABLES#####

#init Mongo
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

#create database
db = client.visualizing_sep

#collection to hold SEP TOC
toc_collection = db.sep_toc_win2019

#collection to hold entry objects
entries_collection = db.sep_entries

#collection to hold InPhO data 
inpho_collection = db.inpho_apidata

In [197]:
#After downloading all of the JSON data from InPhO and storing it inside the related SEP entry's document, we have to test the downloaded data for any errors or mistakes. 

#First thing to do is identify any records in SEP that didn't have any associated InPho entries.
sep_no_inpho = entries_collection.find( filter={'inpho_api': 'Error: No InPhO entry'},
                                        projection = ['title'],
                                        sort=[('title',1)]
                                        )
list_sep_no_inpho = list(sep_no_inpho)
pprint(list_sep_no_inpho)

#These  14 sep articles didn't link into InPhO at all. I went into InPhO and found the appropriate API endpoint for those, and indicated the URL for those articles that do have InPhO entries. Not all of them had InPhO entries though.

# 'Algebraic Propositional Logic', inpho: idea/6267
#  'Basil [Cardinal] Bessarion'}, thinker/6297
# {'title': 'Dynamic Epistemic Logic'}, idea/5642
# {'title': 'Hegel’s Dialectics'}, NONE
# {'title': 'Ibn Sina [Avicenna]'}, thinker/2577
# {'title': 'Ibn Sina’s Natural Philosophy'}, NONE
# {'title': 'Identity and Individuality in Quantum Theory'}, idea/1191
# {'title': 'Legalism in Chinese Philosophy'}, idea/5595
# {'title': 'Martin Buber'}, thinker/2718
# {'title': 'Mental Causation'}, idea/1240
# {'title': 'Nietzsche’s Life and Works'} NONE,
# {'title': 'Religious Daoism'}, idea/5663
# {'title': 'Sophie de Grouchy'}, NONE
# {'title': 'Supererogation'}] idea/1128.html



[{'_id':ObjectId('5e82960380ca4dfd7911cf52'),
'title':'Dynamic Epistemic Logic'},
{'_id': ObjectId('5e8298d880ca4dfd7911d03e'), 'title': 'Hegel’s Dialectics'},
{'_id': ObjectId('5e82997680ca4dfd7911d06f'), 'title': 'Ibn Sina [Avicenna]'},
{'_id':ObjectId('5e82997380ca4dfd7911d06e'),
'title':'Ibn Sina’s Natural Philosophy'},
{'_id':ObjectId('5e83953742ea6bed61c10a80'),
'title':'Identity and Individuality in Quantum Theory'},
{'_id':ObjectId('5e8293a180ca4dfd7911ce8d'),
'title':'Legalism in Chinese Philosophy'},
{'_id': ObjectId('5e8292f980ca4dfd7911ce61'), 'title': 'Martin Buber'},
{'_id': ObjectId('5e829c6580ca4dfd7911d1a4'), 'title': 'Mental Causation'},
{'_id':ObjectId('5e8392bc42ea6bed61c109b5'),
'title':'Nietzsche’s Life and Works'},
{'_id': ObjectId('5e82951180ca4dfd7911cf02'), 'title': 'Religious Daoism'},
{'_id': ObjectId('5e83977c42ea6bed61c10b33'), 'title': 'Sophie de Grouchy'},
{'_id': ObjectId('5e83980e42ea6bed61c10b61'), 'title': 'Supererogation'}]


In [185]:
#make a list of apropriate endpionts, and use 0 to indicate those without endpoints. These are sorted in the same order as the entries.
inpho_apis = [  'idea/6267',
                'thinker/6297',
                'idea/5642',0,
                'thinker/2577',0,
                'idea/1191', 
                'idea/5595',
                'thinker/2718',
                'idea/1240',0,
                'idea/5663',0,
                'idea/1128' ]

#zip mongo projection and new endpoints together
zipped_sep_inpho = zip(list_sep_no_inpho[0:2],inpho_apis[0:2])


In [188]:
def update_sep_json(id_value,api_endpoint):
    inpho_api = f'http://inpho.cogs.indiana.edu/{api_endpoint}.json'
    print(inpho_api)
    request_inpho = requests.get(inpho_api)
    inpho_json = json.loads(request_inpho.text)

    new_values = {
        'inpho_api': inpho_api,
        'inpho_json': inpho_json
    }

    entries_collection.update_one(
        { '_id': ObjectId(id_value) },
        { '$set': new_val}
    )

    # boolean confirmation that the API call went through
    print ("acknowledged:", result.acknowledged)

In [189]:
# for zipped in zipped_sep_inpho:
#     mongoid = zipped[0]['_id']
#     mongotitle = zipped[0]['title']
#     api_end = zipped[1]
#     update_sep_json(mongoid,api_end)

acknowledged:True


In [194]:
update_sep_json('5e829b4d80ca4dfd7911d12e','idea/6267')

acknowledged:True


In [196]:
doc = list(entries_collection.find(
    filter={'_id': ObjectId('5e829b4d80ca4dfd7911d12e')},
    projection=['inpho_api','inpho_json','title']
    ))

print(doc)

[{'_id': ObjectId('5e829b4d80ca4dfd7911d12e'), 'title': 'Algebraic Propositional Logic', 'inpho_api': 'testing insert', 'inpho_json': 'Error: No InPhO entry'}]


In [58]:
# Move the JSON data for each SEP entry into its own collection. We do this because we want to download additional API data from InPhO for those entities that are NOT in SEP. This will give us a broader picture of the history of philosophy than simply what exists in SEP.
 
#HOWEVER: we also have to clean up the JSON. In 95 cases, the JSON data from InPhO was structured slightly different, in that those data were wrapped inside ["responseData"]["result"] keys, when the rest of the data was not. We want to create a uniform dataset, so we remove ["responseData"]["result"] from those data so that every InPhO object has the same structure. 

#get all SEP entries
sep_entries = list(entries_collection.find({}))

for entry in sep_entries:
    #get "inpho_json" object from SEP
    json_data = entry['inpho_json']

    #some SEP entries didn't have an InPhO reference, so we note that here.
    if json_data == 'Error: No InPhO entry':
        final_json_data = {'Error': 'No InhPhoEntry'}

    #process main data elements
    else:
        if 'responseData' in json_data:
            #if ["responseData"]["result"] in current object, pull the child references from it
            final_json_data = json_data["responseData"]["result"]
        else:
            #all good here
             final_json_data = json_data

    inpho_collection.insert_one(final_json_data)


In [71]:
#get counts of different aspects of the the dataset
count_ids = inpho_collection.count_documents({'ID': {'$exists': True}})
distinct_ids = inpho_collection.distinct('ID')

print(count_ids)
print(len(distinct_ids))

1657
1656


In [84]:
dups = inpho_collection.aggregate([
    {"$match": {"ID" :{ "$exists" : True } } }, 
    {"$group" : {"_id": "$ID", "count": { "$sum": 1 } } },
    {"$match": {"count" : {"$gt": 1} } } 
    # {"$project": {"ID" : "$_id", "_id" : 0} }
])

for dup in dups:
    print(dup)

{'_id': 3563, 'count': 2}


In [87]:
dup_entry = inpho_collection.find_one({'ID':3563})
print(dup_entry)

{'_id': ObjectId('5e8e5db82763be045e93e73c'), 'wiki': 'Mencius', 'influenced': [], 'death': [{'month': 0, 'day': 0, 'year': 372}], 'teachers': [], 'url': '/thinker/3563', 'influenced_by': [2844], 'professions': [], 'death_strings': ['372'], 'related_thinkers': [2844, 4203, 4195, 4173, 4092, 4192, 3060, 2633, 3345], 'sep_dir': 'mencius', 'students': [], 'birth_strings': ['November 372'], 'birth': [{'month': 11, 'day': 0, 'year': 372}], 'nationalities': ['Chinese'], 'related_ideas': [1694, 1544, 1343, 1695, 148, 1130, 352, 1527, 5338], 'label': 'Mencius', 'type': 'thinker', 'ID': 3563, 'aliases': []}


In [94]:
entries_collection.count_documents({'inpho_json.label': {'$regex': 'Mencius'}})
mencius_entries = entries_collection.find({'inpho_json.label': {'$regex': 'Mencius'}})
for entry in mencius_entries:
    pprint(entry)


er, Zhu Xi, following the interpretation of Cheng Yi, '
'claims\n'
'that\n'
'\n'
'…what is meant by “extending knowledge lies in ge\n'
'wu” is that desiring to extend my knowledge lies in\n'
'encountering things and exhaustively investigating their '
'Pattern.  In\n'
'general, the human mind is sentient and never fails to have '
'knowledge,\n'
'while the things of the world never fail to have the Pattern.  '
'It is\n'
'only because the Pattern is not yet exhaustively investigated '
'that\n'
'knowledge is not fully fathomed.  Consequently, at the beginning '
'of\n'
'education in the Great Learning, the learner must be made to '
'encounter\n'
'the things of the world, and never fail to follow the Pattern '
'that one\n'
'already knows and further exhaust it, seeking to arrive at the\n'
'farthest points.  When one has exerted effort for a long time, '
'one\n'
'day, like something suddenly cracking open, one will know in a '
'manner\n'
'that binds it all together. (Translation slightly modi

In [64]:
#count docs between collections 

sep_yes = entries_collection.count_documents({'inpho_json.ID': {'$exists': True}})
inp_yes = inpho_collection.count_documents({'ID': {'$exists': True}})
sep_no = entries_collection.count_documents({'inpho_json.ID': {'$exists': False}})
inp_no = inpho_collection.count_documents({'inpho_json.ID': {'$exists': False}})
sep_error = entries_collection.count_documents({'inpho_json': 'Error: No InPhO entry'})
sep_noerror = entries_collection.count_documents({'inpho_json': {'$ne': 'Error: No InPhO entry'}})
inp_error = inpho_collection.count_documents({'inpho_json': 'Error: No InPhO entry'})
inp_noerror = inpho_collection.count_documents({'inpho_json': {'$ne': 'Error: No InPhO entry'}})
print(sep_yes)
print(sep_no)
print(inp_yes)
print(inp_no)
print(sep_error)
print(sep_noerror)
print(inp_error)
print(inp_noerror)

1562
109
0
1671
14
1657
0
1671
