# US presidential statistics
## Simon Repko, Lukas Petrasek
### IES FSS CU
### 31.5.2019

This notebook serves as a demonstration of a school project whose goal is to achieve the following:
* scrape web pages to get historical data on US presidents
* manipulate the data into a form suitable for being visualized
* make vizualizations based the data

In [1]:
# TODO: import packages

In [10]:
from typing import Any, Dict

import pandas as pd
import requests
from bs4 import BeautifulSoup

In [3]:
# TODO: initialize the scraping class and apply the methods necessary to get the data here

In [4]:
def getSoup(link):
    response = requests.get(link) 
    return BeautifulSoup(response.text,'html.parser')

def getjoinSoup(link):
    html = "".join(['https://millercenter.org',link]) 
    response = requests.get(html)
    return BeautifulSoup(response.text,'html.parser') 

def getDataMiller(dictionary):
    data_presidents = {}
    for name,href in dictionary.items():
        # main loop: iterates names and link of presidents contained in previously created dictionary 'pres_dict'
        # getting on the subsite of specific president
        soup = getjoinSoup(href)

        #1 extraction of FAST FACTS dashboard of specific president
        details = soup.find('div',{'class':'president-main-wrapper'}).find('div',{'class':'fast-facts-wrapper'})   

        list_of_relevant_details = [x for x in list(details.children) if x != '\n'] # removing redundant elements 
        list_of_relevant_details.pop(0) # removing first div with decsription

        fast_facts = {}
        for det in list_of_relevant_details:
            fast_facts[det.label.text] = det.div.text # for loop to load details and specifics into dict (key: label of detail)

        #2 brief description of the president
        brief_desc = {}
        brief_desc['Description'] = soup.find('div',{'class':'copy-wrapper'}).p.text # short description of president

        #3 famous quote of the president
        quote = {}
        quote['Quote'] = soup.find('blockquote',{'class':'president-quote'}).contents[0]

        #4 number of KEY EVENTS that happened during office
        # extracting url 'ending' for subsite with notable events that happened at time of office
        key_events = soup.find('div',{'class':'sub-nav-region'}).find_all('a')[1] 
        soup_1 = getjoinSoup(key_events['href']) # getting into the list of key events of president

        ke_count = {}
        # count of number of major events that happened at time of office - key_events_count_X : ke_c_X  
        # D. Trump page has no information about major events hence we included error exception   
        # after some time they changed the notation when 'titles' are highlighted in bold
        try:   
            ke_c_1 = len(soup_1.find('div',{'class':'article-wysiwyg-body'}).find_all('strong')) # count of all events - highlighted by bolding
            ke_c_2 = len(soup_1.find('div',{'class':'article-wysiwyg-body'}).find_all('b')) # notation change
            ke_count['Number of major events'] = ke_c_1 + ke_c_2 # due to double 'bold' notation we sum the count
        except AttributeError:
            ke_count['Number of major events'] = 0
            pass

        data_presidents[name] = {**fast_facts,**brief_desc,**quote,**ke_count} #merge of the dictionaries
        
    return data_presidents

In [5]:
html = 'https://millercenter.org/'
soup = getSoup(html) # Parsing of given html utilizing BeautifulSoup 

# Enter main navigation panel and find submenu that contains list of US presidents and following url 'ending' of their respective subsite
# Select second list to aim for desired list and remove duplicates due to multiple similar/same lists in the whole html
name_list = soup.find('nav',{'aria-labelledby':'block-mainnavigation-3-menu'}).find_all('ul',{'class':'submenu'})[1]

pres_dict = {} # Creation of empty dictionary
for pres in name_list.find_all('a'): 
    pres_dict[pres.text] = pres['href'] # extract and save names (key) and link 'endings' (value)

In [6]:
data_presidents = getDataMiller(pres_dict)

Unnamed: 0,George Washington,John Adams,Thomas Jefferson,James Madison,James Monroe,John Quincy Adams,Andrew Jackson,Martin Van Buren,William Harrison,John Tyler,...,Lyndon B. Johnson,Richard Nixon,Gerald Ford,Jimmy Carter,Ronald Reagan,George H. W. Bush,Bill Clinton,George W. Bush,Barack Obama,Donald Trump
Birth Date,"February 22, 1732\n","October 30, 1735\n","April 13, 1743\n","March 16, 1751\n","April 28, 1758\n","July 11, 1767\n","March 15, 1767\n","December 5, 1782\n","February 9, 1773\n","March 29, 1790\n",...,"August 27, 1908\n","January 9, 1913\n","July 14, 1913\n","October 1, 1924\n","February 6, 1911\n","June 12, 1924\n","August 19, 1946\n","July 6, 1946\n","August 4, 1961\n","June 14, 1946\n"
Birth Place,"Pope’s Creek, Virginia","North Precinct of Braintree (now Quincy), Mass...","Shadwell plantation, Goochland County, Virginia","Port Conway, Virginia","Westmoreland County, Virginia","Braintree (now Quincy), Massachusetts","Waxhaw area, on North Carolina-South Carolina ...","Kinderhook, New York","Berkeley plantation, Charles City County, Virg...","Charles City County, Virginia",...,"Johnson City, Texas","Yorba Linda, California","Omaha, Nebraska","Plains, Georgia","Tampico, Illinois","Milton, Massachusetts","Hope, Arkansas","New Haven, Connecticut","Honolulu, Hawaii","New York, New York"
Burial Place,"Family vault, Mount Vernon, Virginia","Quincy, Massachusetts","Monticello, near Charlottesville, Virginia","Montpelier, Orange County, Virginia","New York City, New York","First Unitarian Church, Quincy, Massachusetts","The Hermitage, Nashville, Tennessee","Kinderhook Cemetery, Kinderhook, New York","William Henry Harrison Memorial State Park, No...","Hollywood Cemetery, Richmond, Virginia",...,"Near Johnson City, Texas","Yorba Linda, California","Grand Rapids, Michigan",,"Simi Valley, California","College Station, Texas",,,,
Career,"Soldier, Planter",Lawyer,"Lawyer, Planter","Politician, Planter",Lawyer,"Lawyer, Senator, Diplomat","Lawyer, Soldier",Lawyer,Soldier,Lawyer,...,"Teacher, Public Official","Lawyer, Public Official","Lawyer, Public Official","Soldier; Farmer, Warehouseman, Public Official...","Actor, Public Official","Businessman, public official","Lawyer, public official","Businessman, public official","Community Organizer, Public Official","Businessman, Real Estate Developer, Television..."
Children,,"Abigail Amelia (1765–1813), John Quincy (1767–...","Martha (1772–1836), Jane Randolph (1774–1775),...",,"Eliza Kortright (1786–1835), James Spence (179...","George Washington (1801–1829), John (1803–1834...","Andrew Jackson, Jr. (adopted nephew, 1808-1865)","Abraham (1807–1873), John (1810–1866), Martin ...","Elizabeth Bassett (1796–1846), John Cleves Sym...","Mary (1815–1848), Robert (1816–1877), John (18...",...,Lynda Bird (1944– ); Luci Baines (1947– ),Patricia (1946– ); Julie (1948– ),Michael Gerald (1950– ); John Gardner (1952– )...,"John William (Jack) (1947–), James Earl III (C...",Maureen Elizabeth (1941–2001); Michael Edward ...,"George W. (1946–), Robin (1949–1953), John Ell...",Chelsea Victoria (1980),Barbara (1981); Jenna (1981),"Malia Ann (1998); Natasha, known as Sasha (2001)",Donald Jr. (1977); Ivanka (1981); Eric (1984);...
Date Ended,"\nMarch 4, 1797\n\n","\nMarch 4, 1801\n\n","\nMarch 4, 1809\n\n","\nMarch 4, 1817\n\n","\nMarch 4, 1825\n\n","\nMarch 4, 1829\n\n","\nMarch 4, 1837\n\n","\nMarch 4, 1841\n\n","\nApril 4, 1841\n\n","\nMarch 4, 1845\n\n",...,"\nJanuary 20, 1969\n\n","\nAugust 9, 1974\n\n","\nJanuary 20, 1977\n\n","\nJanuary 20, 1981\n\n","\nJanuary 20, 1989\n\n","\nJanuary 20, 1993\n\n","\nJanuary 20, 2001\n\n","\nJanuary 20, 2009\n\n","\nJanuary 20, 2017\n\n",
Death Date,"December 14, 1799\n","July 4, 1826\n","July 4, 1826\n","June 28, 1836\n","July 4, 1831\n","February 23, 1848\n","June 8, 1845\n","July 24, 1862\n","April 4, 1841\n","January 18, 1862\n",...,"January 22, 1973\n","April 22, 1994\n","December 26, 2006\n",,"June 5, 2004\n","November 30, 2018\n",,,,
Description,George Washington was born into a mildly prosp...,"Before becoming President in 1797, John Adams ...","Thomas Jefferson, the author of the Declaratio...","Like his close friend Thomas Jefferson, James ...",James Monroe was the last American President o...,"Reared for public service, John Quincy Adams b...","Andrew Jackson, seventh President of the Unite...",Martin Van Buren said that the two happiest da...,William Henry Harrison served the shortest tim...,John Tyler signaled the last gasp of the Old V...,...,"On November 22, 1963, John F. Kennedy was shot...",Schoolchildren absorb at least one fact about ...,Gerald R. Ford became President of the United ...,Jimmy Carter's one-term presidency is remember...,"Ronald Wilson Reagan, the 40th President of th...",George Herbert Walker Bush belongs to a politi...,"William Jefferson Clinton, the young President...","George W. Bush, the 43rd President of the Unit...",Barack Obama was inaugurated as the 44th presi...,Donald Trump at the age of 70 won his first bi...
Education,The equivalent of an elementary school education,Harvard College (graduated 1755),College of William and Mary (graduated 1762),College of New Jersey (now Princeton Universit...,College of William and Mary (graduated 1776),Harvard College (graduated 1787),,Kinderhook Academy (graduated 1796),Hampden-Sydney College,College of William and Mary (graduated 1807),...,Southwest Texas State Teachers College (now Te...,Whittier College (1934); Duke University Law S...,University of Michigan (1935); Yale University...,"Georgia Southwestern College, 1941–1942; Georg...",Eureka College (1932),"Yale University, 1948","Georgetown University (1968), attended Oxford ...","Yale (B.S., 1968), Harvard (M.B.A., 1975)","Columbia University (B.A., 1983), Harvard (J.D...","Wharton School, University of Pennsylvania (1968)"
Full Name,George Washington,,Thomas Jefferson,James Madison,James Monroe,John Quincy Adams,Andrew Jackson,Martin Van Buren,William Henry Harrison,,...,Lyndon Baines Johnson,Richard Milhous Nixon,Gerald Rudolph Ford,"James Earl Carter, Jr.",Ronald Wilson Reagan,George Herbert Walker Bush,William Jefferson Clinton,George Walker Bush,Barack Hussein Obama,Donald John Trump


In [7]:
# TODO: manipulate the data here

In [11]:
# Grover Cleveland was in office 2 non-consecutive times
data_presidents['Grover Cleveland 2'] = {
    key: value for key, value in data_presidents['Grover Cleveland'].items() if key not in ['Inauguration Date', 'Date Ended']
}

def correct_Grover_Cleveland_dates(data: Dict[str, Any]) -> Dict[str, Any]:
    inauguration_date_1 = data['Grover Cleveland']['Inauguration Date'].split('\n')[1]
    inauguration_date_2 = data['Grover Cleveland']['Inauguration Date'].split('\n')[3]
    date_ended_1 = data['Grover Cleveland']['Date Ended'].split('\n')[1]
    date_ended_2 = data['Grover Cleveland']['Date Ended'].split('\n')[3]

    data['Grover Cleveland']['Inauguration Date'] = inauguration_date_1
    data['Grover Cleveland 2']['Inauguration Date'] = inauguration_date_2
    data['Grover Cleveland']['Date Ended'] = date_ended_1
    data['Grover Cleveland 2']['Date Ended'] = date_ended_2

    return data

# run only once
data_presidents = correct_Grover_Cleveland_dates(data_presidents)

In [12]:
# Final table with data extracted from https://millercenter.org
# pd.DataFrame(data_presidents)

data = pd.DataFrame(data_presidents).applymap(lambda x: x.replace('\n', '') if isinstance(x, str) else x).

In [8]:
# TODO: make the visualizations here

In [9]:
# TODO: conclude here?