In [3]:
import requests
from bs4 import BeautifulSoup
import lxml
import pickle
import csv
import re
import datetime
import dateutil
import time
import itertools
import pandas as pd

%run -i philnetfuncs.py

This section sets the prefix for the wikipedia url and the starting nodes unique wiki url. 

In [6]:
prefix = "https://en.wikipedia.org"
descartes = "/wiki/Ren%C3%A9_Descartes"
#print(parse_connections(prefix, descartes))

This function is where we build the network. The prefix and unqiue wiki url are saying to start the crawl at https://en.wikipedia.org/wiki/Ren%C3%A9_Descartes and the last argument is saying to capture all philosophers that are 20 degrees removed from that node. In this case, there is no node in the network farther than 20 degrees from Renes Descartes.

In [3]:
philosophers = iterated_crawl(prefix, descartes, 20)

1


KeyboardInterrupt: 

This saves the network into a pickle file and reopens it.

In [11]:
# saves the network in .pkl file
#f = open(r'philosophers5.pkl', 'wb')
#pickle.dump(philosophers, f)
#f.close()

# loads the fifth version of the network from .pkl file 
# (ensures names in influenced/influences same as in dictionary)
f = open(r'philosophers5.pkl', 'rb')
phil_5 = pickle.load(f)
f.close()


This block enriches the network with the birth and death data. It's written such that if you add more options to vars it can collect more data but currently add_info's functionality isn't able to collect anything else. 

In [12]:
vars = ["Born","Died","School"]
phil_5 = add_info(prefix, phil_5, vars)

In [13]:
phil = list(phil_5.values())
schools = [x[5] for x in phil]
schools = [s for s in schools if s is not None]
schools = list(itertools.chain.from_iterable(schools))
schools = [s.lower() for s in schools]
schools = [re.sub(" philosophy", '', s) for s in schools]
schools = pd.Series(schools).unique()
schools = list(schools)

In [14]:
schools

['rationalism',
 'cartesianism',
 'mechanism',
 'innatism',
 'foundationalism',
 'conceptualism',
 'augustinianism',
 'indirect realism',
 'correspondence theory of truth',
 'corpuscularianism',
 'theological voluntarism',
 'platonism',
 'peripatetic school',
 'aristotelianism',
 'classical republicanism',
 'scholasticism',
 'neoplatonism',
 'thomism',
 'theological intellectualism',
 'philosophical realism',
 'moderate realism',
 'direct realism',
 'virtue ethics',
 'natural law',
 'occamism',
 'nominalism',
 'medieval realism',
 'school of salamanca',
 'pyrrhonism',
 'empiric school',
 'renaissance humanism',
 'renaissance skepticism',
 'spinozism',
 'hegel',
 'pluralistic idealism',
 'optimism',
 'relationism',
 'empiricism',
 'ideational theory of meaning',
 'social contract',
 'classical liberalism',
 'occasionalism',
 'jansenism',
 'fideism',
 'kantianism',
 'enlightenment',
 'german idealism',
 'metaphysical conceptualism',
 'perceptual non-conceptualism',
 'transcendental ideal

In [15]:
for key, value in phil_5.items():

    if value[5] is not None: 
        p_schools = [s.lower() for s in value[5]]
        p_schools = [re.sub(" philosophy", '', s) for s in p_schools]
        value.insert(len(value), [1 if s in p_schools else 0 for s in schools])
        print(p_schools)
        value[5] = ':'.join(p_schools)
    else: value.insert(len(value), [0] * len(schools))


['rationalism', 'cartesianism', 'mechanism', 'innatism', 'foundationalism', 'conceptualism', 'augustinianism', 'indirect realism', 'correspondence theory of truth', 'corpuscularianism', 'theological voluntarism']
['platonism']
['peripatetic school', 'aristotelianism', 'classical republicanism']
['aristotelianism']
['scholasticism', 'neoplatonism', 'augustinianism']
['augustinianism']
['scholasticism', 'thomism', 'aristotelianism', 'theological intellectualism', 'philosophical realism', 'moderate realism', 'direct realism', 'virtue ethics', 'natural law', 'correspondence theory of truth']
['scholasticism', 'occamism', 'nominalism', 'theological voluntarism']
['scholasticism', 'medieval realism', 'school of salamanca']
['pyrrhonism', 'empiric school']
['renaissance humanism', 'renaissance skepticism', 'pyrrhonism']
['rationalism', 'spinozism', 'cartesianism', 'foundationalism', 'hegel', 'conceptualism', 'direct realism', 'correspondence theory of truth']
['rationalism', 'pluralistic idea

This function creates a list of edges based on the connections each philosopher has.

In [17]:
edges = edge_finder(phil_5)

This converts the network and list of edges into a standard format .gdf as well as into two csvs, nodes.csv and edges.csv. For more relevant information on the .gdf file format look here: https://gephi.org/users/supported-graph-formats/gdf-format/

In [18]:

with open("philosophers.gdf", "w", encoding = "utf-8") as f:
    with open("nodes.csv", "w", encoding = "utf-8") as n:
    
        fwrite = csv.writer(f, lineterminator = '\n') 
        nwrite = csv.writer(n, lineterminator = '\n') 
    
        node_list = {}

        fwrite.writerow(["nodedef>name VARCHAR","label VARCHAR","born VARCHAR","died VARCHAR"])
        nwrite.writerow(["id","label","born","died","schools"] + schools)

        ctr = 0
        for key, value in phil_5.items():
            ctr += 1
            node_list[key] = "n" + str(ctr)
            fwrite.writerow(["n"+ str(ctr),key,value[3],value[4]])
            nwrite.writerow(["n"+ str(ctr),key,value[3],value[4],value[5]] + value[6])
            #f.write("\n")

    with open("edges.csv", "w", encoding = "utf-8") as e:

        ewrite = csv.writer(e, lineterminator = '\n') 

        fwrite.writerow(["edgedef>node1 VARCHAR","node2 VARCHAR","directed BOOLEAN"])
        ewrite.writerow(["from","to"])

        for edge in edges:
            fwrite.writerow([node_list[edge[0]], node_list[edge[1]],"true"])
            ewrite.writerow([node_list[edge[0]], node_list[edge[1]]])
