In [128]:
import requests
from bs4 import BeautifulSoup
import lxml
import pickle
import csv
import re
import datetime
import dateutil
import time

%run -i philnetfuncs.py

In [120]:
def table_scrape(prefix, ref, vars):

    page = requests.get(prefix + ref)
    soup = BeautifulSoup(page.content, "lxml")

    table = soup.find("table", class_ = "infobox vcard")
    if invalid_table(table):
        table = soup.find("table", class_ = "infobox biography vcard")
    if invalid_table(table):
        table = soup.find("tbody")
        
    rows = table.find_all("tr")

    row_dict = {}
    for r in rows:
        th = r.find("th")
        td = r.find("td")
        if not th is None and not td is None and th.text in vars:
            if th.text in ["Born","Died"]: row_dict[th.text] = date_clean(td.text)
            elif th.text == "School": row_dict[th.text] = school_clean(td)
    
    var_list = []
    for var in vars:
        if var in row_dict:
            var_list.append([var,row_dict[var]])
        else: var_list.append([var,None])
        
    return var_list

def school_clean(td):

    schools = td.find_all('a')
    schools = [s.get_text() for s in schools]
    schools = [re.sub('\[.*\]', '', s) for s in schools]
    schools = [s for s in schools if s != '']

    return schools

In [123]:
prefix = "https://en.wikipedia.org"
descartes = "/wiki/William_of_Ockham"
vars = ["Born","Died","School"]
table_scrape(prefix, descartes, vars)

[['Born', datetime.date(1285, 1, 1)],
 ['Died', datetime.date(1347, 1, 1)],
 ['School',
  ['Scholasticism', 'Occamism', 'Nominalism', 'Theological voluntarism']]]

In [124]:
f = open(r'philosophers5.pkl', 'rb')
phil_5 = pickle.load(f)
f.close()

In [125]:
phil_5 = add_info(prefix, phil_5, vars)

In [127]:
phil = list(phil_5.values())
[x[5] for x in phil]

[['Rationalism',
  'Cartesianism',
  'Mechanism',
  'Innatism',
  'Foundationalism',
  'Conceptualism',
  'Augustinianism',
  'Indirect realism',
  'Correspondence theory of truth',
  'Corpuscularianism',
  'Theological voluntarism'],
 ['Platonism'],
 ['Peripatetic school', 'Aristotelianism', 'Classical republicanism'],
 None,
 ['Aristotelianism'],
 ['Scholasticism', 'Neoplatonism', 'Augustinianism'],
 ['Augustinianism'],
 ['Scholasticism',
  'Thomism',
  'Aristotelianism',
  'Theological intellectualism',
  'Philosophical realism',
  'Moderate realism',
  'Direct realism',
  'Virtue ethics',
  'Natural law',
  'Correspondence theory of truth'],
 ['Scholasticism', 'Occamism', 'Nominalism', 'Theological voluntarism'],
 ['Scholasticism', 'Medieval realism', 'School of Salamanca'],
 ['Pyrrhonism', 'Empiric school'],
 ['Renaissance humanism', 'Renaissance skepticism', 'Pyrrhonism'],
 None,
 None,
 ['Rationalism',
  'Spinozism',
  'Cartesianism',
  'Foundationalism',
  'Hegel',
  'Conceptua