In [260]:
from requests import get
from bs4 import BeautifulSoup
from unidecode import unidecode
from collections import defaultdict
import re
import string

In [261]:
r = get('http://www.philosophybasics.com/philosophers_anaximander.html')
r2 = get('http://www.philosophybasics.com/philosophers_thales.html')
soup = BeautifulSoup(r.content, 'html.parser')
soup2 = BeautifulSoup(r2.content, 'html.parser')

In [262]:
content = soup.select('p b')
content2 = soup2.select('p b')

In [263]:
content
content2

[<b>Thales of Miletus</b>,
 <b>Greek</b>,
 <b>Miletus</b>,
 <b>Ionia</b>,
 <b>Seven Sages of Greece</b>,
 <b>first philosopher</b>,
 <b>founder</b>,
 <b>teacher</b>,
 <b>substance</b>,
 <b>material objects</b>,
 <b>water</b>,
 <b>universality</b>,
 <b>"first scientist".</b>,
 <b>Miletus</b>,
 <b>Diogenes La\xebrtius</b>,
 <b>Examyas</b>,
 <b>Cleobulina</b>,
 <b>Thelidae</b>,
 <b>Agenor</b>,
 <b>Cadmus</b>,
 <b>Thebes</b>,
 <b>Phoenician</b>,
 <b>sketchy</b>,
 <b>contradictory</b>,
 <b>married</b>,
 <b>Cybisthus</b>,
 <b>Cybisthon</b>,
 <b>never married</b>,
 <b>no writings</b>,
 <b><i>"On the Solstice"</i></b>,
 <b><i>"On the Equinox"</i></b>,
 <b>business</b>,
 <b>politics</b>,
 <b>olive presses</b>,
 <b>good harvest</b>,
 <b>demonstrate</b>,
 <b>could</b>,
 <b>local politics</b>,
 <b>Lydians</b>,
 <b>Medes</b>,
 <b>Persians</b>,
 <b>predicted</b>,
 <b>eclipse</b>,
 <b>negotiations</b>,
 <b>favourable terms</b>,
 <b>died</b>,
 <b>history</b>,
 <b>legend</b>,
 <b>Seven Sages</b>,
 <b>S

In [264]:
print content[0].string
print content2[0].string

Anaximander
Thales of Miletus


In [265]:
def add_philosopher(url, name, phil_dict, time_period, birth='BC', death='BC', western=True):
    r = get(url)
    soup = BeautifulSoup(r.content, 'html.parser')

    # Unidecode first paragraph
    par = unidecode(soup.select('p')[0].get_text())
    sentences = par.split('.')
    
    # Calculate lifespan of philosopher
    all=string.maketrans('','')
    nodigs=all.translate(all, string.digits)
    
    # Check for sentence that contains years person lived
    for sentence in sentences:
        lifespan = sentence.translate(all, nodigs)
        if len(lifespan) > 2:
            break
            
    # Check if there were extra numbers in the sentence
    if len(lifespan) > 8:
        lifespan = lifespan[:8]
        
    # Set Philosopher information (Make year negative if BC)
    middle = len(lifespan) / 2
    try:
        phil_dict[name]['year_born'] = -1 * int(lifespan[:middle]) if birth == 'BC' else int(lifespan[:middle])
        phil_dict[name]['year_died'] = -1 * int(lifespan[middle:]) if death == 'BC' else int(lifespan[middle:])
        
    # Fill in with null values if can't be read in
    except ValueError:
        phil_dict[name]['year_born'] = float('NaN')
        phil_dict[name]['year_died'] = float('NaN')
        
    phil_dict[name]['time_period'] = time_period
    phil_dict[name]['Western?'] = western
    
    return phil_dict

In [266]:
# Obtain Name, birth year, and death year for philosophers of given time period
def ancient_time_period(time_period):
    # Make Request to page of specific time period
    r = get('http://www.philosophybasics.com/historical_' + time_period + '.html')
    soup = BeautifulSoup(r.content, 'html.parser')
    
    # Get names of each philosopher from specified time period
    philosophers = soup.select('a')
    if time_period == 'presocratic':
        min_slice, max_slice = 11, 23
        
    elif time_period == 'socratic':
        min_slice, max_slice = 10, 14
    
    elif time_period == 'hellenistic':
        min_slice, max_slice = 10, 15
    
    else: # time_period == 'roman'
        min_slice, max_slice = 9, 14 
    
    philosophers = [unidecode(x.string) for x in philosophers[min_slice:max_slice]]
    
    # Base url for web pages
    base_url = 'http://www.philosophybasics.com/philosophers_'
    phil_dict = defaultdict(dict)
    
    # Determine url to get request from
    if time_period == 'presocratic':
        for name in philosophers:
                
            if name == 'Zeno of Elea':
                name = 'Zeno_Elea'
                
            else:
                # Split name into needed part only
                name = name.split()[0]
                
            
            url = base_url + name.lower() + '.html'
            
            phil_dict = add_philosopher(url, name, phil_dict, time_period)
                
    elif time_period == 'socratic':
        for name in philosophers:
            name = name.split()[0]
                
            url = base_url + name.lower() + '.html'
                
                
            phil_dict = add_philosopher(url, name, phil_dict, time_period)
    
    elif time_period == 'hellenistic':
        for name in philosophers:
            if name == 'Zeno of Citium':
                name = 'Zeno'
                birth, death = 'BC', 'BC'
                url = base_url + 'zeno_citium.html'
                
            elif name == 'Philo of Alexandria':
                name = 'Philo'
                birth, death = 'BC', 'AC'
                url = base_url + name.lower() + '.html'
                
            else:
                birth, death = 'BC', 'BC'
                url = base_url + name.lower() + '.html'
            
            if name == 'Plotinus':
                birth, death = 'AC', 'AC'
            
            phil_dict = add_philosopher(url, name, phil_dict, time_period, birth=birth, death=death)
            
    else: # time_period == 'roman'
        for name in philosophers:
            if name == 'Cicero, Marcus Tullius':
                name = 'Cicero'
                birth, death = 'BC', 'BC'
                url = base_url + name.lower() + '.html'
            
            elif name == 'St. Augustine of Hippo':
                name = 'Augustine'
                birth, death = 'AD', 'AD'
                url = base_url + name.lower() + '.html'
                
            else:
                name = name.split(r'[, \. ' ']')[0]
                birth, death = 'AD', 'AD'
                url = base_url + name.lower() + '.html'
            
            phil_dict = add_philosopher(url, name, phil_dict, time_period, birth=birth, death=death)
        
    return phil_dict

In [267]:
pre_socratic = ancient_time_period('presocratic')

In [268]:
print pre_socratic['Parmenides']
print '\n', pre_socratic.keys()

{'year_died': -450, 'year_born': -515, 'time_period': 'presocratic', 'Western?': True}

['Parmenides', 'Empedocles', 'Pythagoras', 'Gorgias', 'Thales', 'Zeno_Elea', 'Anaxagoras', 'Anaximenes', 'Anaximander', 'Heraclitus', 'Protagoras', 'Democritus']


In [269]:
socratic = ancient_time_period('socratic')

In [270]:
print socratic['Socrates']
print '\n', socratic.keys()

{'year_died': -399, 'year_born': -469, 'time_period': 'socratic', 'Western?': True}

['Socrates', 'Plato', 'Aristotle', 'Diogenes']


In [271]:
hellenistic = ancient_time_period('hellenistic')

In [272]:
print hellenistic['Pyrrho']
print '\n', hellenistic.keys()

{'year_died': -270, 'year_born': -360, 'time_period': 'hellenistic', 'Western?': True}

['Pyrrho', 'Plotinus', 'Epicurus', 'Zeno', 'Philo']


In [273]:
roman = ancient_time_period('roman')

In [274]:
print roman['Epictetus']
print '\n', roman.keys()

{'year_died': 135, 'year_born': 55, 'time_period': 'roman', 'Western?': True}

['Marcus Aurelius', 'Cicero', 'Boethius', 'Augustine', 'Epictetus']


In [275]:
def ancient_philosophers():
    pre_socratic = ancient_time_period('presocratic')
    socratic = ancient_time_period('socratic')
    hellenistic = ancient_time_period('hellenistic')
    roman = ancient_time_period('roman')
    
    ancient = pre_socratic
    ancient.update(socratic)
    ancient.update(hellenistic)
    ancient.update(roman)
    
    return ancient

In [276]:
ancient = ancient_philosophers()

In [277]:
print ancient['Aristotle']
print '\n', ancient.keys()

{'year_died': -322, 'year_born': -384, 'time_period': 'socratic', 'Western?': True}

['Empedocles', 'Boethius', 'Diogenes', 'Philo', 'Thales', 'Cicero', 'Epictetus', 'Anaxagoras', 'Socrates', 'Marcus Aurelius', 'Anaximander', 'Heraclitus', 'Plotinus', 'Protagoras', 'Parmenides', 'Pythagoras', 'Augustine', 'Zeno', 'Gorgias', 'Plato', 'Zeno_Elea', 'Democritus', 'Pyrrho', 'Anaximenes', 'Aristotle', 'Epicurus']


In [278]:
def medieval_time_period(time_period):
    # Make Request to page of specific time period
    r = get('http://www.philosophybasics.com/historical_' + time_period + '.html')
    soup = BeautifulSoup(r.content, 'html.parser')
    
    # Get names of each philosopher from specified time period
    philosophers = soup.select('a')
    
    if time_period == 'medieval':
        min_slice, max_slice = 8, 18

    else: # time_period == 'renaissance'
        min_slice, max_slice = 12, 16
        
    philosophers = [unidecode(x.string) for x in philosophers[min_slice:max_slice]]
    
    base_url = 'http://www.philosophybasics.com/philosophers_'
    phil_dict = defaultdict(dict)
    
    if time_period == 'medieval':
        
        for name in philosophers:
            
            if name == 'Bacon, Roger':
                url = base_url + 'bacon_roger.html'
            
            else:
                name = name.split(r'[, ' ']')[0]
                url = base_url + name.lower() + '.html'
                
            birth, death = 'AD', 'AD'
            
            phil_dict = add_philosopher(url, name, phil_dict, time_period, birth=birth, death=death)
        
    else: # time_period == 'Renaissance'
        
        for name in philosophers:
            
            if name == 'Bacon, Sir Francis':
                name = 'Sir_Francis_Bacon'
                url = base_url + 'bacon_francis.html'
            
            else:
                name = name.split(',')[0]
                url = base_url + name.lower() + '.html'
            
            birth, death = 'AD', 'AD'
            
            phil_dict = add_philosopher(url, name, phil_dict, time_period, birth=birth, death=death)
    
    return phil_dict

In [279]:
medieval_time = medieval_time_period('medieval')

In [280]:
print medieval_time['Bacon, Roger']
print '\n', medieval_time.keys()

{'year_died': 1294, 'year_born': 1214, 'time_period': 'medieval', 'Western?': True}

['Aquinas, St. Thomas', 'Bacon, Roger', 'Avicenna (Ibn Sina)', 'Scotus, John Duns', 'Abelard, Peter', 'Anselm, St.', 'Ockham (Occam), William of', 'Averroes (Ibn Rushd)', 'Albertus Magnus', 'Maimonides']


In [281]:
renaissance = medieval_time_period('renaissance')

In [282]:
print renaissance['Machiavelli']
print '\n', renaissance.keys()

{'year_died': 1527, 'year_born': 1469, 'time_period': 'renaissance', 'Western?': True}

['Erasmus', 'Machiavelli', 'Sir_Francis_Bacon', 'More']


In [283]:
def medieval_philosophers():
    medieval = medieval_time_period('medieval')
    renaissance = medieval_time_period('renaissance')
    
    medieval = medieval
    medieval.update(renaissance)
    
    return medieval

In [284]:
medieval = medieval_philosophers()

In [285]:
print medieval['Erasmus']
print '\n', medieval.keys()

{'year_died': 1536, 'year_born': 1466, 'time_period': 'renaissance', 'Western?': True}

['Aquinas, St. Thomas', 'Sir_Francis_Bacon', 'Bacon, Roger', 'Avicenna (Ibn Sina)', 'Scotus, John Duns', 'Machiavelli', 'Abelard, Peter', 'Erasmus', 'Anselm, St.', 'Ockham (Occam), William of', 'Averroes (Ibn Rushd)', 'Albertus Magnus', 'Maimonides', 'More']


In [286]:
def modern_time_period(time_period):
    # Make Request to page of specific time period
    r = get('http://www.philosophybasics.com/historical_' + time_period + '.html')
    soup = BeautifulSoup(r.content, 'html.parser')
    
    # Get names of each philosopher from specified time period
    philosophers = soup.select('a')
    
    if time_period == 'reason' or time_period == 'enlightenment':
        min_slice, max_slice = 9, 16

    else: # time_period == 'modern'
        min_slice, max_slice = 8, 36
        
    philosophers = [unidecode(x.string) for x in philosophers[min_slice:max_slice]]
    
    base_url = 'http://www.philosophybasics.com/philosophers_'
    phil_dict = defaultdict(dict)
    
    if time_period == 'reason':
        for name in philosophers:
            
            name_temp = name.split(',')[0]
            
            url = base_url + name_temp.lower() + '.html'
            birth, death = 'AD', 'AD'
            
            phil_dict = add_philosopher(url, name, phil_dict, time_period, birth=birth, death=death)
    
    elif time_period == 'enlightenment':
        for  name in philosophers:
            
            name_temp = name.split(',')[0]
            
            url = base_url + name_temp.lower() + '.html'
            birth, death = 'AD', 'AD'
            
            phil_dict = add_philosopher(url, name, phil_dict, time_period, birth=birth, death=death)
            
    else: # time_period == 'modern'
        for name in philosophers:
            
            if name == 'Friedrich Schelling':
                name_temp = 'Schelling'
            
            else:
                name_temp = name.split(',')[0]
            
            url = base_url + name_temp.lower() + '.html'
            birth, death = 'AD', 'AD'
            
            phil_dict = add_philosopher(url, name, phil_dict, time_period, birth=birth, death=death)
            
    return phil_dict

In [287]:
reason = modern_time_period('reason')

In [288]:
print reason['Locke, John']
print '\n', reason.keys()

{'year_died': 1704, 'year_born': 1632, 'time_period': 'reason', 'Western?': True}

['Descartes, Rene', 'Leibniz, Gottfried Wilhelm', 'Pascal, Blaise', 'Malebranche, Nicolas', 'Spinoza, Baruch (Benedict)', 'Hobbes, Thomas', 'Locke, John']


In [289]:
enlightenment = modern_time_period('enlightenment')

In [290]:
print enlightenment['Smith, Adam']
print '\n', enlightenment.keys()

{'year_died': 1790, 'year_born': 1723, 'time_period': 'enlightenment', 'Western?': True}

['Berkeley, Bishop George', 'Hume, David', 'Smith, Adam', 'Voltaire (Francois Marie Arouet)', 'Burke, Edmund', 'Kant, Immanuel', 'Rousseau, Jean-Jacques']


In [291]:
modern_time = modern_time_period('modern')

In [292]:
print modern_time['Nietzsche, Friedrich']
print '\n', modern_time.keys()

{'year_died': 1900, 'year_born': 1844, 'time_period': 'modern', 'Western?': True}

['Husserl, Edmund', 'Quine, Willard Van Orman', 'Heidegger, Martin', 'Peirce, Charles Sanders', 'Hegel, G.W.F.', 'Sartre, Jean-Paul', 'James, William', 'Derrida, Jacques', 'Foucault, Michel', 'Nietzsche, Friedrich', 'Emerson, Ralph Waldo', 'Kierkegaard, Soren', 'Marx, Karl', 'Mill, John Stuart', 'Ryle, Gilbert', 'Comte, Auguste', 'Wittgenstein, Ludwig', 'Dewey, John', 'Bentham, Jeremy', 'Moore, George Edward', 'Russell, Bertrand', 'Schopenhauer, Arthur', 'Thoreau, Henry David', 'Fichte, Johann Gottlieb', 'Ayer, Alfred', 'Friedrich Schelling', 'Whitehead, Alfred North', 'Frege, Gottlob']


In [293]:
def modern_philosophers():
    reason = modern_time_period('reason')
    enlightenment = modern_time_period('enlightenment')
    modern = modern_time_period('modern')
    
    modern_phil = reason
    modern_phil.update(enlightenment)
    modern_phil.update(modern)
    
    return modern_phil

In [294]:
modern = modern_philosophers()

In [295]:
print modern['Kant, Immanuel']
print '\n', modern.keys()

{'year_died': 1804, 'year_born': 1724, 'time_period': 'enlightenment', 'Western?': True}

['Smith, Adam', 'Husserl, Edmund', 'Quine, Willard Van Orman', 'Kant, Immanuel', 'Bentham, Jeremy', 'Heidegger, Martin', 'Peirce, Charles Sanders', 'Hegel, G.W.F.', 'Locke, John', 'Sartre, Jean-Paul', 'James, William', 'Hume, David', 'Derrida, Jacques', 'Foucault, Michel', 'Mill, John Stuart', 'Nietzsche, Friedrich', 'Emerson, Ralph Waldo', 'Pascal, Blaise', 'Malebranche, Nicolas', 'Rousseau, Jean-Jacques', 'Kierkegaard, Soren', 'Ryle, Gilbert', 'Berkeley, Bishop George', 'Marx, Karl', 'Leibniz, Gottfried Wilhelm', 'Voltaire (Francois Marie Arouet)', 'Comte, Auguste', 'Burke, Edmund', 'Wittgenstein, Ludwig', 'Dewey, John', 'Spinoza, Baruch (Benedict)', 'Descartes, Rene', 'Moore, George Edward', 'Russell, Bertrand', 'Schopenhauer, Arthur', 'Thoreau, Henry David', 'Fichte, Johann Gottlieb', 'Ayer, Alfred', 'Friedrich Schelling', 'Whitehead, Alfred North', 'Hobbes, Thomas', 'Frege, Gottlob']


In [349]:
def western_philosophers():
    ancient = ancient_philosophers()
    medieval = medieval_philosophers()
    modern = modern_philosophers()
    
    western = ancient
    western.update(medieval)
    western.update(modern)
    
    return western

In [350]:
western = western_philosophers()

In [351]:
print western.keys()

['Aquinas, St. Thomas', 'Bacon, Roger', 'Smith, Adam', 'Husserl, Edmund', 'Zeno_Elea', 'Quine, Willard Van Orman', 'Kant, Immanuel', 'Bentham, Jeremy', 'Heidegger, Martin', 'Peirce, Charles Sanders', 'Hegel, G.W.F.', 'Erasmus', 'Locke, John', 'Sartre, Jean-Paul', 'Pythagoras', 'Derrida, Jacques', 'Sir_Francis_Bacon', 'Plato', 'Emerson, Ralph Waldo', 'Abelard, Peter', 'Ockham (Occam), William of', 'Epicurus', 'Berkeley, Bishop George', 'Empedocles', 'Ryle, Gilbert', 'Comte, Auguste', 'Burke, Edmund', 'Mill, John Stuart', 'Rousseau, Jean-Jacques', 'Socrates', 'Anaximander', 'Descartes, Rene', 'Maimonides', 'Parmenides', 'Zeno', 'Spinoza, Baruch (Benedict)', 'Russell, Bertrand', 'Thoreau, Henry David', 'Gorgias', 'Ayer, Alfred', 'Pyrrho', 'Anaximenes', 'Anselm, St.', 'Whitehead, Alfred North', 'Hobbes, Thomas', 'Albertus Magnus', 'Leibniz, Gottfried Wilhelm', 'Diogenes', 'Scotus, John Duns', 'Anaxagoras', 'Heraclitus', 'Frege, Gottlob', 'More', 'James, William', 'Foucault, Michel', 'Epict

{'Abelard, Peter': {'Western?': True,
  'time_period': 'medieval',
  'year_born': nan,
  'year_died': nan},
 'Albertus Magnus': {'Western?': True,
  'time_period': 'medieval',
  'year_born': nan,
  'year_died': nan},
 'Anaxagoras': {'Western?': True,
  'time_period': 'presocratic',
  'year_born': -500,
  'year_died': -428},
 'Anaximander': {'Western?': True,
  'time_period': 'presocratic',
  'year_born': -610,
  'year_died': -546},
 'Anaximenes': {'Western?': True,
  'time_period': 'presocratic',
  'year_born': -585,
  'year_died': -525},
 'Anselm, St.': {'Western?': True,
  'time_period': 'medieval',
  'year_born': nan,
  'year_died': nan},
 'Aquinas, St. Thomas': {'Western?': True,
  'time_period': 'medieval',
  'year_born': nan,
  'year_died': nan},
 'Aristotle': {'Western?': True,
  'time_period': 'socratic',
  'year_born': -384,
  'year_died': -322},
 'Augustine': {'Western?': True,
  'time_period': 'roman',
  'year_born': 354,
  'year_died': 430},
 'Averroes (Ibn Rushd)': {'Weste