In [1]:
from requests import get
from requests.exceptions import RequestException
from contextlib import closing
from bs4 import BeautifulSoup

In [2]:
def simple_get(url):
    """
    Attempts to get the content at `url` by making an HTTP GET request.
    If the content-type of response is some kind of HTML/XML,
    return the text content, else return None
    """
    try: 
        with closing(get(url, stream=True)) as resp:
            if is_good_response(resp):
                return resp.content
            else:
                return None
    except RequestException as e:
        log_error('Error during requests to {0}: {1}'.format(url, str(e)))
        return None

In [3]:
def is_good_response(resp):
    """
    Returns true if the response seems to be HTML, false otherwise
    """
    content_type = resp.headers['Content-Type'].lower()
    return(resp.status_code == 200
           and content_type is not None
           and content_type.find('html') > -1)

In [4]:
def log_error(e):
    """
    It is always a good idea to log errors. 
    This function just prints them, but you can make it do anything.
    """
    print(e)

In [11]:
url = 'http://www.fabpedigree.com/james/mathmen.htm'

In [12]:
raw_html = simple_get(url)

In [14]:
html = BeautifulSoup(raw_html, 'html.parser')

In [20]:
for i, li in enumerate(html.select('li')[:6]):
    print(i, li.text)

0  Isaac Newton
 Archimedes
 Carl F. Gauss
 Leonhard Euler
 Bernhard Riemann

1  Archimedes
 Carl F. Gauss
 Leonhard Euler
 Bernhard Riemann

2  Carl F. Gauss
 Leonhard Euler
 Bernhard Riemann

3  Leonhard Euler
 Bernhard Riemann

4  Bernhard Riemann

5  Henri Poincaré
 Joseph-Louis Lagrange
 Euclid  of Alexandria
 David Hilbert
 Gottfried W. Leibniz



In [21]:
def get_names():
    """
    Downloads the page where the list of mathematicians is found
    and returns a list of strings, one per mathematician
    """
    response = simple_get(url)
    if response is not None:
        html = BeautifulSoup(response, 'html.parser')
        names = set()
        for li in html.select('li'):
            for name in li.text.split('\n'):
                if len(name) > 0:
                    names.add(name.strip())
        return list(names)
    
    # Raise an exception if we failed to get any data from the url
    raise Exception('Error retrieving contents at {}'.format(url))

In [22]:
def get_hits_on_name(name):
    """
    Accepts a `name` of a mathematician and returns the number
    of hits that mathematician's wikipedia page received in the 
    last 60 days as an `int`
    """
    url_root = 'https://xtools.wmflabs.org/articleinfo/en.wikipedia.org/{}'
    response = simple_get(url_root.format(name))
    
    if response is not None:
        html = BeautifulSoup(response, 'html.parser')
        hit_link = [a for a in html.select('a')
                    if a['href'].find('latest-60') > -1]
        
        if len(hit_link) > 0:
            link_text = hit_link[0].text.replace(',', '')
            try: 
                return int(link_text)
            except:
                log_error("Couldn't parse {} as an `int`".format(link_text))
    
    log_error('No pageviews found for {}'.format(name))
    return None

In [44]:
names = get_names()
results = []

for name in names:
    try:
        hits = get_hits_on_name(name)
        if hits is None:
            hits = -1
        results.append((hits, name))
    except:
        results.append((-1, name))
        log_error('Error encountered while processing '
                  '{}, skipping'.format(name))
        
results.sort()
results.reverse()

if len(results) > 5:
    top_marks = results[:5]
else:
    top_marks = results
    
print('\nThe most popular mathematicains are:\n')
for (mark, mathematician) in top_marks:
    print('{} with {} pageviews'.format(mathematician, mark))

no_results = len([res for res in results if res[0] == -1])
print('\nBut we did not find results for '
      '{} mathematicians on the list'.format(no_results))

No pageviews found for Adrien M. Legendre
No pageviews found for Leonardo `Fibonacci'
No pageviews found for Gottfried W. Leibniz
No pageviews found for F. L. Gottlob Frege
No pageviews found for F. Gotthold Eisenstein
No pageviews found for Karl W. T. Weierstrass
No pageviews found for M. E. Camille Jordan
No pageviews found for James J. Sylvester
No pageviews found for F.E.J. Émile Borel
No pageviews found for Ernst E. Kummer
No pageviews found for Hermann K. H. Weyl
No pageviews found for William R. Hamilton
No pageviews found for Panini  of Shalatula
No pageviews found for Peter G. L. Dirichlet
No pageviews found for Bháscara (II) Áchárya
No pageviews found for Alhazen ibn al-Haytham
No pageviews found for Muhammed al-Khowârizmi
No pageviews found for Omar al-Khayyám
No pageviews found for Hermann G. Grassmann

The most popular mathematicains are:

Albert Einstein with 1073992 pageviews
Isaac Newton with 514082 pageviews
Galileo Galilei with 363680 pageviews
Srinivasa Ramanujan wit