In [16]:
from requests import get
from requests.exceptions import RequestException
from contextlib import closing
from bs4 import BeautifulSoup

In [23]:
def simpleGet(geturl):
    """
    Attempts to get the content at `url` by making an HTTP GET request.
       If the content-type of response is some kind of HTML/XML, return the
       text content, otherwise return None.
       """
    try:
        with closing(get(url,stream=True)) as resp:
            if isGoodResponse(resp):
                return response.content
            else:
                return None
    except RequestException as e:
        logError("Error during request to {0}:{1}".format(url,str(e)))
        return None
         
    

In [24]:
def isGoodResponse(resp):
    """
    Returns True if the response seems to be HTML, False otherwise.
    """
    content_type = resp.headers['content-type'].lower()
    return (resp.resp_status_code == 200 and content_type is not None and content_type.find('html') > -1)  
    

In [25]:
def logError(e):
    """
    It is always a good idea to log errors.
    This function just prints them, but you can
    make it do anything.
    """
    print(e)

In [26]:
def getNames():
    """
    Downloads the page where the list of mathematicians is found
    and returns a list of strings, one per mathematician

    """
    url  = "http://www.fabpedigree.com/james/mathmen.htm"
    response= simpleGet(url)
    
    if response is not None:
        html = BeautifulSoup(response, html.parser)
        name = set()
        for li in html.select("li"):
            for name in li.text.split('\n'):
                if len(name)>0:
                    names.add(name.strip())
        return list(names)
    # Raise an exception if we failed to get any data from the url
    raise Exception('Error retrieving contents at {}'.format(url))
                


In [27]:
def getHitsOnSame(name):
    """
    Accepts a `name` of a mathematician and returns the number
    of hits that mathematician's Wikipedia page received in the
    last 60 days, as an `int`
    """
    # url_root is a template string that is used to build a URL.
    url_root = 'URL_REMOVED_SEE_NOTICE_AT_START_OF_ARTICLE'
    response = simpleGet(url_root.format(name))
    if response is not None:
        html=BeautifulSoup(response,'html.parser')
        
        hit_link= [a for a in html.select('a')
                   if a['href'].find('latest-60') > -1]
        
        if len(hit_link) >0:
            # Strip Commas
            link_text = hit_link[0].text.place(',','')
            try:
                # Convert to integer
                return int(link_text)
            except:
                log_error("couldn't parse {} as an `int`".format(link_text))
                
    logError('No pageviews found for {}'.format(name))

    return None




In [28]:
 if __name__ == '__main__':
        print('Getting the list of names....')
        names = getNames()
        print('... done.\n')
        
        
        results = []
        print('Getting stats for each name....')

        for name in names:
            try:
                hits = getHitsOnName(name)
                if hits is None:
                    hits = -1
                results.append((hits,name))
            except:
                results.append(-1,name)
                logError('error encountered while processing ''{}, skipping'.format(name))
        
        
        print('... done.\n')

        results.sort()
        results.reverse()
        if len(results) > 5:
            top_marks = results[:5]
        else:
            top_marks = results
        print('\nThe most popular mathematicians are:\n')
        for (mark, mathematician) in top_marks:
            print('{} with {} pageviews'.format(mathematician, mark))
            
        no_results = len([res for res in results if res[0] == -1])
        print('\nBut we did not find results for ' '{} mathematicians on the list'.format(no_results))

                

        
    

Getting the list of names....


NameError: name 'url' is not defined