In [1]:
import pymongo 
import json 
import pandas as pd
from tqdm import tqdm
from pprint import pprint

In [2]:
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)
db = client.visualizing_sep

In [9]:
# export page_text for every SEP page

sep_entries = list(db.sep_data.find({}).sort('title'))
for sep in sep_entries:
    page_name = sep['title'] + '.txt'
    file_name = 'texttraining/' + page_name.replace("/","_")
    page_text = sep['pagetext'].strip()
    with open(file_name,'w', encoding='UTF-8') as f:
        f.write(page_text)


In [7]:
#export title and id to csv
sep_entries = list(db.sep_data.find( filter={},
                                     projection={'title'},
                                     sort=[('title',1)]))

sep_df = pd.DataFrame(sep_entries)
sep_df.head()
sep_df.to_csv('sep.csv')

In [10]:
#extract unique list of domain tags
domain_tags = list(db.sep_data.find( filter={},
                                     projection={'domain_tags':1, '_id':0}))
domain_tags_individual = []

for tag in domain_tags:
    semisplit_tags = tag['domain_tags'].split(';')
    for semisplit_tag in semisplit_tags:
        commasplit_tags = semisplit_tag.split(',')
        for commasplit_tag in commasplit_tags:
            if commasplit_tag != '':
                domain_tags_individual.append(commasplit_tag.strip())

individual_tags = sorted(set(domain_tags_individual))
pprint(individual_tags)

['Aesthetics',
 'African and African-American Philosophy',
 'Arabic and Islamic Philosophy',
 'Biology',
 'Chinese Philosophy',
 'Computer Science',
 'Economics',
 'Epistemology',
 'Ethics and Morality',
 'Evolution',
 'Existentialism and Phenomenology',
 'Feminism',
 'Genetics',
 'Indian Philosophy',
 'Japanese Philosophy',
 'Language',
 'Latin American Philosophy',
 'Law',
 'Logic',
 'Mathematics',
 'Metaphysics',
 'Mind',
 'Physics',
 'Political and Social Theory',
 'Quantum Mechanics',
 'Religion',
 'Scientific Methods',
 'Thinker']


In [11]:
#make list of tuples for each domain and create domain id
domain_links = [ (x.strip(),'/category/' + x.strip().lower().replace(' ','-') + '/') for x in individual_tags]
pprint(domain_links)

[('Aesthetics', '/category/aesthetics/'),
 ('African and African-American Philosophy',
  '/category/african-and-african-american-philosophy/'),
 ('Arabic and Islamic Philosophy', '/category/arabic-and-islamic-philosophy/'),
 ('Biology', '/category/biology/'),
 ('Chinese Philosophy', '/category/chinese-philosophy/'),
 ('Computer Science', '/category/computer-science/'),
 ('Economics', '/category/economics/'),
 ('Epistemology', '/category/epistemology/'),
 ('Ethics and Morality', '/category/ethics-and-morality/'),
 ('Evolution', '/category/evolution/'),
 ('Existentialism and Phenomenology',
  '/category/existentialism-and-phenomenology/'),
 ('Feminism', '/category/feminism/'),
 ('Genetics', '/category/genetics/'),
 ('Indian Philosophy', '/category/indian-philosophy/'),
 ('Japanese Philosophy', '/category/japanese-philosophy/'),
 ('Language', '/category/language/'),
 ('Latin American Philosophy', '/category/latin-american-philosophy/'),
 ('Law', '/category/law/'),
 ('Logic', '/category/lo

In [12]:
#create node ojbect for domains, and create a list of links for each article in the domain

domain_menu = []
outlinks = []

for domain in domain_links[0:1]:
    domain_title = domain[0]
    domain_id = domain[1]
    article_urls = list(db.sep_data.find(
                        filter={'domain_tags': {'$regex':domain_title}},
                        projection={'page_url':1, '_id':0}))

    domain_nodes = list(db.sep_data.find(
                        filter={'domain_tags': {'$regex':domain_title}},
                        projection={'page_url':1,
                                    'title':1,
                                    'first_paragraph':1,
                                    '_id':0}))
    data_nodes = {'nodes':domain_nodes}
    pprint(data_nodes)
    article_links = [{'source':domain_id, 'target':url['page_url'], 'dir':'out'} for url in article_urls] 
    
    domain_node = { 'title':domain_title,
                     'id':domain_id,
                     'entry_type':'Menu', 
                     'primary_domain':domain_title,
                     'data' : data_nodes,
                     'links':article_links
                     }

    domain_menu.append(domain_node)

# create node object for top level SEP menu

menu_links = [{'source':'/category/', 'target':x[1], 'dir': 'Out'} for x in domain_links if x[1]!='/category/' ]

sep_node = {'id':'/category/',
              'title':'', 
              'entry_type':'Menu', 
              'primary_domain':'',
              'links': menu_links}

# #add SEP main menu as first node in domain_menu
domain_menu.insert(0,sep_node)

domain_object = {'nodes':domain_menu}

 movies in general made it imperative for philosophers to take film seriously as an artform on a par with the more traditional ones like theater, dance, and painting. As a result of this surge in interest in film as a subject for philosophical reflection, the philosophy of film has become an important area of research in aesthetics."}, {'page_url': '/entries/gadamer-aesthetics/', 'title': 'Gadamer’s Aesthetics', 'first_paragraph': '  Gadamer (1900–2002) does not provide an account of the aesthetic in any customary sense. His approach to art runs, in many ways, against conventional philosophical expectations. Aesthetic qualities are not debated in the manner of the analytic tradition of modern philosophy, nor does he concern himself overtly with the problems of aesthetic pleasure. Gadamer’s approach to aesthetic experience stands squarely in the phenomenological tradition. He is primarily concerned with the place of art in our experience of the world. Furthermore, his approach to aesthe

In [32]:
def return_nodelinks(page_url, outlinks, inlinks):
    """ returns the nodes linked to page_url, and identifies the direction of the link """

    nodelinks = []
    # we need to get the common elements between outlinks and inlinks
    # these are the dir="both" links
    outlinks_common = [value for value in outlinks if value in inlinks]
    inlinks_common = [value for value in inlinks if value in outlinks]
    linkdirection_both = set(outlinks_common + inlinks_common)

    for link in linkdirection_both:
        link_object = {'source': page_url,
                       'target': link,
                       'dir':'both'}
        nodelinks.append(link_object)

    # we then need to get the difference between the both links and the out links
    # these are these dir='out links
    linkdirection_out = [value for value in outlinks if value not in linkdirection_both]

    for link in linkdirection_out:
        link_object = {'source': page_url,
                       'target': link,
                       'dir':'out'}
        nodelinks.append(link_object)

    # we then need to get the difference between the both links and the in links
    # these are the dir='in' links
    linkdirection_in = [value for value in inlinks if value not in linkdirection_both]

    for link in linkdirection_in:
        link_object = {'source': page_url,
                       'target': link,
                       'dir':'in'}
        nodelinks.append(link_object)

    # sorted_nodelinks = sorted(nodelinks, key=(1))

    sorted_nodelinks = sorted(nodelinks, key=lambda k: k['target']) 
    
    return sorted_nodelinks


def create_sep_network_json(sep_collection):
    """ exports the JSON that makes up the network graph 

    Things to watch for in the export file: 
    1. entries with 'entries//'
    2. depiction needs a fix at 'reference'
    3. type theory needs a fix for 'russell'
    4. sartre links to himself
    """
    #init empty lists 
    nodes_list = []
    links_list = []

    #loop through all entries
    for entry in tqdm(sep_collection, desc='Processing'):

        page_url = entry['page_url']
        title = entry['title']
        first_paragraph = entry['first_paragraph']
        inpho_data = entry['inpho_api']
        outlinks = [value['link'] for value in entry['outlinks']]
        inlinks = [value['link'] for value in entry['inlinks']]
        primary_domain = entry['primary_domain']
        domain_tags = entry['domain_tags']
        
        if 'thinker' in inpho_data:
            entry_type = 'thinker'
        else:
            entry_type = 'idea'

        num_outlinks = len(outlinks)
        num_inlinks = len(inlinks)

        node_object = { 'id': page_url, 
                        'title': title,
                        'entry_type': entry_type,
                        'first_paragraph': first_paragraph,
                        'num_outlinks': num_outlinks,
                        'num_inlinks': num_inlinks,
                        'primary_domain': primary_domain,
                        'domain_tags': domain_tags,
                        'links': return_nodelinks(page_url, outlinks, inlinks)
                    }

        nodes_list.append(node_object)

        for link in outlinks:
            if '//' not in link:
                links_list.append({'source':entry['page_url'], 'target':link})

    articles_object = {'nodes': nodes_list, 'links':links_list}
    network_object = {'articles' : articles_object, 'domains': domain_object}
    with open('static/sep_network_test.json', 'w', encoding='UTF-8') as f:
        json.dump(network_object,f,ensure_ascii=False, indent=4)

In [33]:
#get sep entries stored in mongo
sep_entries = list(db.sep_data.find({}).sort('title'))
create_sep_network_json(sep_entries)

Processing: 100%|██████████| 1680/1680 [00:00<00:00, 44328.33it/s]
