In [81]:
from requests import get
from bs4 import BeautifulSoup
from unidecode import unidecode
from collections import defaultdict
import re
import string

In [82]:
r = get('http://www.philosophybasics.com/philosophers_anaximander.html')
r2 = get('http://www.philosophybasics.com/philosophers_thales.html')
soup = BeautifulSoup(r.content, 'html.parser')
soup2 = BeautifulSoup(r2.content, 'html.parser')

In [83]:
content = soup.select('p b')
content2 = soup2.select('p b')

In [84]:
content
content2

[<b>Thales of Miletus</b>,
 <b>Greek</b>,
 <b>Miletus</b>,
 <b>Ionia</b>,
 <b>Seven Sages of Greece</b>,
 <b>first philosopher</b>,
 <b>founder</b>,
 <b>teacher</b>,
 <b>substance</b>,
 <b>material objects</b>,
 <b>water</b>,
 <b>universality</b>,
 <b>"first scientist".</b>,
 <b>Miletus</b>,
 <b>Diogenes La\xebrtius</b>,
 <b>Examyas</b>,
 <b>Cleobulina</b>,
 <b>Thelidae</b>,
 <b>Agenor</b>,
 <b>Cadmus</b>,
 <b>Thebes</b>,
 <b>Phoenician</b>,
 <b>sketchy</b>,
 <b>contradictory</b>,
 <b>married</b>,
 <b>Cybisthus</b>,
 <b>Cybisthon</b>,
 <b>never married</b>,
 <b>no writings</b>,
 <b><i>"On the Solstice"</i></b>,
 <b><i>"On the Equinox"</i></b>,
 <b>business</b>,
 <b>politics</b>,
 <b>olive presses</b>,
 <b>good harvest</b>,
 <b>demonstrate</b>,
 <b>could</b>,
 <b>local politics</b>,
 <b>Lydians</b>,
 <b>Medes</b>,
 <b>Persians</b>,
 <b>predicted</b>,
 <b>eclipse</b>,
 <b>negotiations</b>,
 <b>favourable terms</b>,
 <b>died</b>,
 <b>history</b>,
 <b>legend</b>,
 <b>Seven Sages</b>,
 <b>S

In [85]:
print content[0].string
print content2[0].string

Anaximander
Thales of Miletus


In [86]:
def add_philosopher(url, name, phil_dict, time_period, birth='BC', death='BC', western=True):
    r = get(url)
    soup = BeautifulSoup(r.content, 'html.parser')

    # Unidecode first paragraph
    par = unidecode(soup.select('p')[0].get_text())
    sentences = par.split('.')
    
    # Calculate lifespan of philosopher
    all=string.maketrans('','')
    nodigs=all.translate(all, string.digits)
    
    # Check for sentence that contains years person lived
    for sentence in sentences:
        lifespan = sentence.translate(all, nodigs)
        if len(lifespan) > 2:
            break
            
    # Check if there were extra numbers in the sentence
    if len(lifespan) > 8:
        lifespan = lifespan[:8]
        
    # Set Philosopher information (Make year negative if BC)
    middle = len(lifespan) / 2
    try:
        phil_dict[name]['year_born'] = -1 * int(lifespan[:middle]) if birth == 'BC' else int(lifespan[:middle])
        phil_dict[name]['year_died'] = -1 * int(lifespan[middle:]) if death == 'BC' else int(lifespan[middle:])
        
    # Fill in with null values if can't be read in
    except ValueError:
        phil_dict[name]['year_born'] = float('NaN')
        phil_dict[name]['year_died'] = float('NaN')
        
    phil_dict[name]['time_period'] = time_period
    phil_dict[name]['Western?'] = western
    
    return phil_dict

In [87]:
# Obtain Name, birth year, and death year for philosophers of given time period
def ancient_time_period(time_period):
    # Make Request to page of specific time period
    r = get('http://www.philosophybasics.com/historical_' + time_period + '.html')
    soup = BeautifulSoup(r.content, 'html.parser')
    
    # Get names of each philosopher from specified time period
    philosophers = soup.select('a')
    if time_period == 'presocratic':
        min_slice, max_slice = 11, 23
        
    elif time_period == 'socratic':
        min_slice, max_slice = 10, 14
    
    elif time_period == 'hellenistic':
        min_slice, max_slice = 10, 15
    
    else: # time_period == 'roman'
        min_slice, max_slice = 9, 14 
    
    philosophers = [unidecode(x.string) for x in philosophers[min_slice:max_slice]]
    
    # Base url for web pages
    base_url = 'http://www.philosophybasics.com/philosophers_'
    phil_dict = defaultdict(dict)
    
    # Determine url to get request from
    if time_period == 'presocratic':
        for name in philosophers:
                
            if name == 'Zeno of Elea':
                name = 'Zeno_Elea'
                
            else:
                # Split name into needed part only
                name = name.split()[0]
                
            
            url = base_url + name.lower() + '.html'
            
            phil_dict = add_philosopher(url, name, phil_dict, time_period)
                
    elif time_period == 'socratic':
        for name in philosophers:
            name = name.split()[0]
                
            url = base_url + name.lower() + '.html'
                
                
            phil_dict = add_philosopher(url, name, phil_dict, time_period)
    
    elif time_period == 'hellenistic':
        for name in philosophers:
            if name == 'Zeno of Citium':
                name = 'Zeno'
                birth, death = 'BC', 'BC'
                url = base_url + 'zeno_citium.html'
                
            elif name == 'Philo of Alexandria':
                name = 'Philo'
                birth, death = 'BC', 'AC'
                url = base_url + name.lower() + '.html'
                
            else:
                birth, death = 'BC', 'BC'
                url = base_url + name.lower() + '.html'
            
            if name == 'Plotinus':
                birth, death = 'AC', 'AC'
            
            phil_dict = add_philosopher(url, name, phil_dict, time_period, birth=birth, death=death)
            
    else: # time_period == 'roman'
        for name in philosophers:
            if name == 'Cicero, Marcus Tullius':
                name = 'Cicero'
                birth, death = 'BC', 'BC'
                url = base_url + name.lower() + '.html'
            
            elif name == 'St. Augustine of Hippo':
                name = 'St_Augustine'
                birth, death = 'AD', 'AD'
                url = base_url + 'augustine.html'
                
            else:
                name = name.split(r'[, \. ' ']')[0]
                birth, death = 'AD', 'AD'
                url = base_url + name.lower() + '.html'
            
            phil_dict = add_philosopher(url, name, phil_dict, time_period, birth=birth, death=death)
        
    return dict(phil_dict)

In [88]:
pre_socratic = ancient_time_period('presocratic')

In [89]:
print pre_socratic['Parmenides']
print '\n', pre_socratic.keys()

{'year_died': -450, 'year_born': -515, 'time_period': 'presocratic', 'Western?': True}

['Parmenides', 'Empedocles', 'Pythagoras', 'Gorgias', 'Thales', 'Zeno_Elea', 'Anaxagoras', 'Anaximenes', 'Anaximander', 'Heraclitus', 'Protagoras', 'Democritus']


In [90]:
socratic = ancient_time_period('socratic')

In [91]:
print socratic['Socrates']
print '\n', socratic.keys()

{'year_died': -399, 'year_born': -469, 'time_period': 'socratic', 'Western?': True}

['Socrates', 'Plato', 'Diogenes', 'Aristotle']


In [92]:
hellenistic = ancient_time_period('hellenistic')

In [93]:
print hellenistic['Pyrrho']
print '\n', hellenistic.keys()

{'year_died': -270, 'year_born': -360, 'time_period': 'hellenistic', 'Western?': True}

['Pyrrho', 'Plotinus', 'Epicurus', 'Zeno', 'Philo']


In [94]:
roman = ancient_time_period('roman')

In [95]:
print roman['Epictetus']
print '\n', roman.keys()

{'year_died': 135, 'year_born': 55, 'time_period': 'roman', 'Western?': True}

['Marcus Aurelius', 'Cicero', 'Boethius', 'St_Augustine', 'Epictetus']


In [96]:
def ancient_philosophers():
    pre_socratic = ancient_time_period('presocratic')
    socratic = ancient_time_period('socratic')
    hellenistic = ancient_time_period('hellenistic')
    roman = ancient_time_period('roman')
    
    ancient = pre_socratic
    ancient.update(socratic)
    ancient.update(hellenistic)
    ancient.update(roman)
    
    return ancient

In [97]:
ancient = ancient_philosophers()

In [98]:
print ancient['Aristotle']
print '\n', ancient.keys()

{'year_died': -322, 'year_born': -384, 'time_period': 'socratic', 'Western?': True}

['Empedocles', 'Boethius', 'Diogenes', 'Philo', 'Thales', 'Cicero', 'Epictetus', 'Anaxagoras', 'Socrates', 'Marcus Aurelius', 'Anaximander', 'Heraclitus', 'Plotinus', 'Protagoras', 'St_Augustine', 'Parmenides', 'Pythagoras', 'Zeno', 'Gorgias', 'Plato', 'Zeno_Elea', 'Democritus', 'Pyrrho', 'Anaximenes', 'Aristotle', 'Epicurus']


In [99]:
def medieval_time_period(time_period):
    # Make Request to page of specific time period
    r = get('http://www.philosophybasics.com/historical_' + time_period + '.html')
    soup = BeautifulSoup(r.content, 'html.parser')
    
    # Get names of each philosopher from specified time period
    philosophers = soup.select('a')
    
    if time_period == 'medieval':
        min_slice, max_slice = 8, 18

    else: # time_period == 'renaissance'
        min_slice, max_slice = 12, 16
        
    philosophers = [unidecode(x.string) for x in philosophers[min_slice:max_slice]]
    
    base_url = 'http://www.philosophybasics.com/philosophers_'
    phil_dict = defaultdict(dict)
    
    if time_period == 'medieval':
        
        for name in philosophers:
            
            if name == 'Bacon, Roger':
                name = 'Roger_Bacon'
                url = base_url + 'bacon_roger.html'
            
            else:
                name = name.split(r'[, ' ']')[0]
                url = base_url + name.lower() + '.html'
                
            birth, death = 'AD', 'AD'
            
            phil_dict = add_philosopher(url, name, phil_dict, time_period, birth=birth, death=death)
        
    else: # time_period == 'Renaissance'
        
        for name in philosophers:
            
            if name == 'Bacon, Sir Francis':
                name = 'Sir_Francis_Bacon'
                url = base_url + 'bacon_francis.html'
            
            else:
                name = name.split(',')[0]
                url = base_url + name.lower() + '.html'
            
            birth, death = 'AD', 'AD'
            
            phil_dict = add_philosopher(url, name, phil_dict, time_period, birth=birth, death=death)
    
    return dict(phil_dict)

In [100]:
medieval_time = medieval_time_period('medieval')

In [101]:
print medieval_time['Roger_Bacon']
print '\n', medieval_time.keys()

{'year_died': 1294, 'year_born': 1214, 'time_period': 'medieval', 'Western?': True}

['Aquinas, St. Thomas', 'Roger_Bacon', 'Avicenna (Ibn Sina)', 'Scotus, John Duns', 'Abelard, Peter', 'Anselm, St.', 'Averroes (Ibn Rushd)', 'Ockham (Occam), William of', 'Albertus Magnus', 'Maimonides']


In [102]:
renaissance = medieval_time_period('renaissance')

In [103]:
print renaissance['Machiavelli']
print '\n', renaissance.keys()

{'year_died': 1527, 'year_born': 1469, 'time_period': 'renaissance', 'Western?': True}

['Erasmus', 'Machiavelli', 'Sir_Francis_Bacon', 'More']


In [104]:
def medieval_philosophers():
    medieval = medieval_time_period('medieval')
    renaissance = medieval_time_period('renaissance')
    
    medieval = medieval
    medieval.update(renaissance)
    
    return medieval

In [105]:
medieval = medieval_philosophers()

In [106]:
print medieval['Erasmus']
print '\n', medieval.keys()

{'year_died': 1536, 'year_born': 1466, 'time_period': 'renaissance', 'Western?': True}

['Aquinas, St. Thomas', 'Sir_Francis_Bacon', 'Roger_Bacon', 'Avicenna (Ibn Sina)', 'Scotus, John Duns', 'Machiavelli', 'Abelard, Peter', 'Erasmus', 'Anselm, St.', 'Averroes (Ibn Rushd)', 'Ockham (Occam), William of', 'Albertus Magnus', 'Maimonides', 'More']


In [107]:
def modern_time_period(time_period):
    # Make Request to page of specific time period
    r = get('http://www.philosophybasics.com/historical_' + time_period + '.html')
    soup = BeautifulSoup(r.content, 'html.parser')
    
    # Get names of each philosopher from specified time period
    philosophers = soup.select('a')
    
    if time_period == 'reason' or time_period == 'enlightenment':
        min_slice, max_slice = 9, 16

    else: # time_period == 'modern'
        min_slice, max_slice = 8, 36
        
    philosophers = [unidecode(x.string) for x in philosophers[min_slice:max_slice]]
    
    base_url = 'http://www.philosophybasics.com/philosophers_'
    phil_dict = defaultdict(dict)
    
    if time_period == 'reason':
        for name in philosophers:
            
            name = name.split(',')[0]
            
            url = base_url + name.lower() + '.html'
            birth, death = 'AD', 'AD'
            
            phil_dict = add_philosopher(url, name, phil_dict, time_period, birth=birth, death=death)
    
    elif time_period == 'enlightenment':
        for  name in philosophers:
            
            name = name.split(',')[0]
            
            url = base_url + name.lower() + '.html'
            birth, death = 'AD', 'AD'
            
            phil_dict = add_philosopher(url, name, phil_dict, time_period, birth=birth, death=death)
            
    else: # time_period == 'modern'
        for name in philosophers:
            
            if name == 'Friedrich Schelling':
                name = 'Schelling'
            
            else:
                name = name.split(',')[0]
            
            url = base_url + name.lower() + '.html'
            birth, death = 'AD', 'AD'
            
            phil_dict = add_philosopher(url, name, phil_dict, time_period, birth=birth, death=death)
            
    return dict(phil_dict)

In [108]:
reason = modern_time_period('reason')

In [109]:
print reason['Locke']
print '\n', reason.keys()

{'year_died': 1704, 'year_born': 1632, 'time_period': 'reason', 'Western?': True}

['Pascal', 'Descartes', 'Hobbes', 'Locke', 'Leibniz', 'Spinoza', 'Malebranche']


In [110]:
enlightenment = modern_time_period('enlightenment')

In [111]:
print enlightenment['Smith']
print '\n', enlightenment.keys()

{'year_died': 1790, 'year_born': 1723, 'time_period': 'enlightenment', 'Western?': True}

['Berkeley', 'Voltaire (Francois Marie Arouet)', 'Rousseau', 'Smith', 'Kant', 'Burke', 'Hume']


In [112]:
modern_time = modern_time_period('modern')

In [113]:
print modern_time['Nietzsche']
print '\n', modern_time.keys()

{'year_died': 1900, 'year_born': 1844, 'time_period': 'modern', 'Western?': True}

['Whitehead', 'Schopenhauer', 'Kierkegaard', 'Russell', 'Heidegger', 'James', 'Ayer', 'Nietzsche', 'Wittgenstein', 'Derrida', 'Emerson', 'Hegel', 'Sartre', 'Frege', 'Comte', 'Marx', 'Mill', 'Schelling', 'Ryle', 'Moore', 'Fichte', 'Bentham', 'Foucault', 'Husserl', 'Peirce', 'Dewey', 'Quine', 'Thoreau']


In [114]:
def modern_philosophers():
    reason = modern_time_period('reason')
    enlightenment = modern_time_period('enlightenment')
    modern = modern_time_period('modern')
    
    modern_phil = reason
    modern_phil.update(enlightenment)
    modern_phil.update(modern)
    
    return modern_phil

In [115]:
modern = modern_philosophers()

In [116]:
print modern['Kant']
print '\n', modern.keys()

{'year_died': 1804, 'year_born': 1724, 'time_period': 'enlightenment', 'Western?': True}

['Whitehead', 'Schopenhauer', 'Russell', 'Heidegger', 'Thoreau', 'Sartre', 'Smith', 'Derrida', 'Ayer', 'Frege', 'Marx', 'Burke', 'Hume', 'Ryle', 'Fichte', 'Foucault', 'Husserl', 'Berkeley', 'Rousseau', 'Dewey', 'Nietzsche', 'James', 'Voltaire (Francois Marie Arouet)', 'Wittgenstein', 'Schelling', 'Emerson', 'Hegel', 'Kierkegaard', 'Peirce', 'Comte', 'Mill', 'Descartes', 'Moore', 'Locke', 'Leibniz', 'Bentham', 'Spinoza', 'Pascal', 'Malebranche', 'Hobbes', 'Kant', 'Quine']


In [117]:
def western_philosophers():
    ancient = ancient_philosophers()
    medieval = medieval_philosophers()
    modern = modern_philosophers()
    
    western = ancient
    western.update(medieval)
    western.update(modern)
    
    return western

In [118]:
western = western_philosophers()

In [119]:
print western.keys()

['Aquinas, St. Thomas', 'Russell', 'Heidegger', 'Zeno_Elea', 'Smith', 'Frege', 'Erasmus', 'Foucault', 'Rousseau', 'Comte', 'Pythagoras', 'Roger_Bacon', 'Plato', 'Abelard, Peter', 'Berkeley', 'Ockham (Occam), William of', 'Epicurus', 'Husserl', 'Empedocles', 'James', 'Voltaire (Francois Marie Arouet)', 'Wittgenstein', 'Socrates', 'Anaximander', 'Hegel', 'Kierkegaard', 'Maimonides', 'Parmenides', 'Malebranche', 'Moore', 'Locke', 'Bentham', 'Gorgias', 'Pyrrho', 'Pascal', 'Anaximenes', 'Hobbes', 'Anselm, St.', 'Mill', 'Albertus Magnus', 'Whitehead', 'Schopenhauer', 'Diogenes', 'Scotus, John Duns', 'Anaxagoras', 'Derrida', 'Heraclitus', 'Ayer', 'Marx', 'Burke', 'More', 'Ryle', 'Fichte', 'Epictetus', 'Emerson', 'Democritus', 'Boethius', 'Thoreau', 'Hume', 'Dewey', 'Nietzsche', 'Aristotle', 'Philo', 'Thales', 'Cicero', 'Schelling', 'Machiavelli', 'Sartre', 'Plotinus', 'Protagoras', 'St_Augustine', 'Kant', 'Descartes', 'Sir_Francis_Bacon', 'Zeno', 'Avicenna (Ibn Sina)', 'Leibniz', 'Spinoza', '