In [28]:
import requests
from bs4 import BeautifulSoup
import lxml
import pandas as pd
import numpy as np
import pickle
import csv

In [29]:
prefix = "https://en.wikipedia.org"
descartes = "/wiki/Ren%C3%A9_Descartes"
#print(parse_connections(prefix, descartes))
ibn = "/wiki/Ibn_al-Haytham"
print(parse_connections(prefix, ibn))

('Ibn al-Haytham', '/wiki/Ibn_al-Haytham', [['Aristotle', '/wiki/Aristotle'], ['Euclid', '/wiki/Euclid'], ['Ptolemy', '/wiki/Ptolemy'], ['Galen', '/wiki/Galen'], ['Banū Mūsā', '/wiki/Ban%C5%AB_M%C5%ABs%C4%81'], ['Thābit ibn Qurra', '/wiki/Th%C4%81bit_ibn_Qurra'], ['Al-Kindi', '/wiki/Al-Kindi'], ['Ibn Sahl', '/wiki/Ibn_Sahl_(mathematician)'], ['Abū Sahl al-Qūhī', '/wiki/Ab%C5%AB_Sahl_al-Q%C5%ABh%C4%AB']], [['Omar Khayyam', '/wiki/Omar_Khayyam'], ["Taqi ad-Din Muhammad ibn Ma'ruf", '/wiki/Taqi_ad-Din_Muhammad_ibn_Ma%27ruf'], ['Kamāl al-Dīn al-Fārisī', '/wiki/Kam%C4%81l_al-D%C4%ABn_al-F%C4%81ris%C4%AB'], ['Averroes', '/wiki/Averroes'], ['Al-Khazini', '/wiki/Al-Khazini'], ['John Peckham', '/wiki/John_Peckham'], ['Vitello', '/wiki/Vitello'], ['Roger Bacon', '/wiki/Roger_Bacon'], ['Johannes Kepler', '/wiki/Johannes_Kepler']])


In [30]:
def parse_connections(prefix, ref):
    page = requests.get(prefix + ref)
    soup = BeautifulSoup(page.content, "lxml")

    influences = soup.find("div", string = "Influences")
    influenced = soup.find("div", string = "Influenced")
    influenced_by = soup.find("div", string = "Influenced\xa0by")

    if influences is None and influenced_by is None: return None, None, None, None
    if influenced is None: return None, None, None, None

    if influenced_by is None: return influences_parse(soup, ref)
    else: return influenced_by_parse(soup, ref)


def influences_parse(soup, ref):

    name = soup.find("h1", id = "firstHeading").text
    divs = soup.find_all("div", class_ = "center")

    infs = divs[0]
    infd = divs[1]

    infs = bs4_list_convert(infs)
    infd = bs4_list_convert(infd)

    return name, ref, infs, infd

def influenced_by_parse(soup, ref):

    name = soup.find("h1", id = "firstHeading").text
    lis = soup.find_all("ul", class_ = "NavContent")

    infs = lis[0]
    infd = lis[1]

    infs = bs4_list_convert(infs)
    infd = bs4_list_convert(infd)

    return name, ref, infs, infd

def bs4_list_convert(div):
    l = []

    for person in div.find_all("a", href = True):
        name = person.get_text()
        ref = person["href"]

        if not "[" in name and not "wikipedia" in ref:
            l.append([name,ref]) 

    return l

In [31]:
def phil_crawl(prefix, ref):

    name, ref, infs, infd = parse_connections(prefix, ref)

    philosophers = {name:[ref, [row[0] for row in infs], [row[0] for row in infd]]}

    for person in infs:
        name, ref, ifs, ifd = parse_connections(prefix, person[1])
        if not name is None: philosophers[name] = [ref, [row[0] for row in ifs], [row[0] for row in ifd]]

    for person in infd: 
        name, ref, ifs, ifd = parse_connections(prefix, person[1])
        if not name is None: philosophers[name] = [ref, [row[0] for row in ifs], [row[0] for row in ifd]]

    return philosophers


In [34]:
def iterated_crawl(prefix, ref, iterations):
    phils = phil_crawl(prefix, ref)
    temp = phils
    searched = []
    print(len(phils))

    for i in range(iterations):
        for key, value in phils.items():
            if not key in searched: 
                temp = {**temp, **phil_crawl(prefix, value[0])}
                searched.append(key)
        phils = temp
        print(len(phils))

    return phils
    
philosophers = iterated_crawl(prefix, descartes, 10)

22
163
479
642
688
708
729
742
760
765
765


In [37]:
#f = open(r'philosophers2.pkl', 'wb')
#pickle.dump(philosophers, f)
#f.close()

f = open(r'philosophers.pkl', 'rb')
phil_1 = pickle.load(f)
f.close()

f = open(r'philosophers2.pkl', 'rb')
phil_2 = pickle.load(f)
f.close()

In [38]:
for p in phil_2.keys():
    if not p in phil_1.keys():
        print(p)

for p in phil_1.keys():
    if not p in phil_2.keys():
        print(p)

Ibn al-Haytham
Omar Khayyam
Al-Khazini
Mulla Sadra
Al-Khalil ibn Ahmad al-Farahidi
Miskawayh
Nasir al-Din al-Tusi
Ibn Tufail
Abu Amr of Basra
Sibawayh
Sam Harris
Michael Sandel
Rumi
Muhammad Asad
Israr Ahmed
Ibn Tumart
Ibn al-Nafis
Abu Madyan
Abu Said al-Baji
Al-Suyuti
Ismail Haqqi Bursevi
Said Nursî
Niftawayh
Abu Turab al-Zahiri
Bayazid Bastami
Muhammad Abduh
Rashid Rida
Ibn Hazm
Abu Hayyan al-Gharnati
Abdul Qadir Gilani
Abd as-Salam ibn Mashish al-Alami
Abul Hasan ash-Shadhili
Ibn Taymiyyah
Al-Kamal ibn al-Humam
Al-Sha'rani
Osman Fazli
Dawud al-Zahiri
Muhammad bin Dawud al-Zahiri
Abu Abd al-Rahman Ibn Aqil al-Zahiri
Al-Hallaj
Jamāl al-Dīn al-Afghānī
Muhammad Nasiruddin al-Albani
Ibn Khaldun
Ibn Mada'
Al-Dhahabi
Muhammad al-Jazuli
Al-Hasan ibn 'Ali al-Barbahari
Ibn Battah
Ibn Qudamah
Muhammad ibn Abd al-Wahhab
Abu Hanifa
Zakariyya al-Ansari
Ibn Hajar al-Haytami
Ibrahim al-Bajuri
Al-Shafiʽi
Ahmad ibn Hanbal
Ibn Kullab
Ishaq ibn Rahwayh
Yaḥya ibn Maʻin
Al-Tabari
Ruwaym
Imadaddin Nasimi


In [85]:
len(philosophers)

668

In [30]:
def edge_finder(network):

    edges = []

    for key, value in network.items():

        for infs in value[1]:
            if infs in network:
                edges.append([infs,key])

        for infd in value[2]:
            if infd in network:
                temp = [key,infd]
                if not temp in edges:
                    edges.append(temp)

    return edges

edges = edge_finder(philosophers)

In [31]:
with open("philosophers.gdf", "w", encoding = "utf-8") as f:
    fwrite = csv.writer(f, lineterminator = '\n') 
    
    node_list = {}

    fwrite.writerow(["nodedef>name VARCHAR","label VARCHAR"])

    ctr = 0
    for key, value in philosophers.items():
        ctr += 1
        node_list[key] = "n" + str(ctr)
        fwrite.writerow(["n"+ str(ctr),key])
        #f.write("\n")

    fwrite.writerow(["edgedef>node1 VARCHAR","node2 VARCHAR","directed BOOLEAN"])

    for edge in edges:
        fwrite.writerow([node_list[edge[0]], node_list[edge[1]],"true"])

In [32]:
philosophers.keys()

dict_keys(['René Descartes', 'Plato', 'Aristotle', 'Averroes', 'Anselm of Canterbury', 'Augustine of Hippo', 'Thomas Aquinas', 'William of Ockham', 'Francisco Suárez', 'Sextus Empiricus', 'Michel de Montaigne', 'Duns Scotus', 'Teresa of Ávila', 'Baruch Spinoza', 'Gottfried Wilhelm Leibniz', 'John Locke', 'Nicolas Malebranche', 'Antoine Arnauld', 'Blaise Pascal', 'Immanuel Kant', 'Edmund Husserl'])