<h4> Import Dependencies </h4>

In [54]:
import os
import pandas as pd
import json
import requests
from pandas.io.json import json_normalize
from pprint import pprint
import sqlite3
import csv

# API keys
from config import api_key

<h2>Financial Data, API Exctraction</h2>

<h4>Make API Calls and Populate a DataFrame</h4>

In [58]:
#Starting Year
year = 2000

In [55]:
#API url path
url = f"https://api.open.fec.gov/v1/elections/?api_key={api_key}&sort=-total_receipts&sort_hide_null=false&office=president&election_full=true&sort_null_only=false&cycle={year}&sort_nulls_last=false&page=1"

# read data from the API 
response = requests.get(url).json()
# pprint(response)

# convert JSON to dataframe
candidate_finance = pd.json_normalize(response, "results")
# print(candidate_finance.shape)

#Append following year data to existing DataFrame
for yr in range(year + 4, 2017, 4):
    url = f"https://api.open.fec.gov/v1/elections/?api_key={api_key}&sort=-total_receipts&sort_hide_null=false&office=president&election_full=true&sort_null_only=false&cycle={yr}&sort_nulls_last=false&page=1"
    response = requests.get(url).json()
    sub_df = pd.json_normalize(response, "results")
    candidate_finance.head(2)
#     print(f"{yr} {type(sub_df)}  ================================")
    candidate_finance = candidate_finance.append(sub_df, ignore_index=True)
    
# Format dataframe
candidate_finance = candidate_finance[['candidate_election_year', 'candidate_id', 'candidate_name', 'party_full', 'total_receipts', 'total_disbursements', 'cash_on_hand_end_period']]
candidate_finance.style.format({"total_receipts": '${0:,.2f}',
                                "total_disbursements": '${0:,.2f}',
                                "cash_on_hand_end_period": '${0:,.2f}',})

candidate_finance["candidate_name"]

0                  BUSH, GEORGE W
1                        GORE, AL
2                   FORBES, STEVE
3                  MCCAIN, JOHN S
4                   BRADLEY, BILL
                 ...             
95        O'MALLEY, MARTIN JOSEPH
96              GRAHAM, LINDSEY O
97                 HUCKABEE, MIKE
98           SANTORUM, RICHARD J.
99    MCMULLIN, EVAN / MINDY FINN
Name: candidate_name, Length: 100, dtype: object

<h4>Clean Data</h4>

In [52]:
def rename(name):
    #Some names are in format: Pres / Vice Pres - we only want President name
    name = name.split("/")[0]
    #President name in Last, First MI. format - we want First MI Last
    name_components = name.split(",")
    new_name = name_components[-1] + " " + name_components[0]
    return new_name

#Apply the renaming funtion to the dataframe
candidate_finance["candidate_name"] = candidate_finance["candidate_name"].apply(rename)
candidate_finance


Unnamed: 0,candidate_election_year,candidate_id,candidate_name,party_full,total_receipts,total_disbursements,cash_on_hand_end_period
0,2000,P00003335,GEORGE W BUSH GEORGE W BUSH,REPUBLICAN PARTY,1.936577e+08,1.864560e+08,7201734.18
1,2000,P80000912,AL GORE AL GORE,DEMOCRATIC PARTY,1.331080e+08,1.203352e+08,12772827.00
2,2000,P60003852,STEVE FORBES STEVE FORBES,REPUBLICAN PARTY,8.604857e+07,8.605477e+07,0.00
3,2000,P80002801,JOHN S MCCAIN JOHN S MCCAIN,REPUBLICAN PARTY,5.863623e+07,5.854997e+07,86262.00
4,2000,P80000516,BILL BRADLEY BILL BRADLEY,DEMOCRATIC PARTY,5.115617e+07,5.042141e+07,775203.00
...,...,...,...,...,...,...,...
95,2016,P60007671,MARTIN JOSEPH O'MALLEY MARTIN JOSEPH O'MALLEY,DEMOCRATIC PARTY,6.377550e+06,6.285182e+06,92368.71
96,2016,P60007697,LINDSEY O GRAHAM LINDSEY O GRAHAM,REPUBLICAN PARTY,6.010010e+06,6.010010e+06,0.00
97,2016,P80003478,MIKE HUCKABEE MIKE HUCKABEE,REPUBLICAN PARTY,4.335975e+06,4.330418e+06,5557.58
98,2016,P20002721,RICHARD J. SANTORUM RICHARD J. SANTORUM,REPUBLICAN PARTY,2.296044e+06,2.312075e+06,110242.76


<h2>Election Result Data, CSV Extraction</h2>

<h4>Read CSV and Populate a DataFrame</h4>

In [60]:
election_csv = f"{year}_PopularVote.csv"

path = os.path.join("Resources", election_csv)
election_df = pd.read_csv(path, encoding="ISO-8859-1")

<h4>Clean Data</h4>

In [64]:
election_df = election_df.rename(columns = {
    "Candidate (Party Label)": "name"
})
election_df = election_df[["name", "Popular Vote Total", "Percent of Popular Vote"]]
election_df["year"] = 2000
election_df = election_df.dropna()

def conv_name(long_name):
    #Some data in FOrmat Name (Party) - we just want name
    short_name = long_name.split('(')[0]
    #MI is followed by ".", we want to remove that
    short_name = short_name.replace('.', '')
    return short_name

#apply the name cleaning to the dataframe
election_df["name"] = election_df["name"].apply(conv_name)
election_df.head()

Unnamed: 0,name,Popular Vote Total,Percent of Popular Vote,year
1,Al Gore,50999897,48.38,2000
2,George W Bush,50456002,47.87,2000
3,Ralph Nader,2882955,2.74,2000
4,Patrick J Buchanan,448895,0.42,2000
5,Harry Browne,384431,0.36,2000


<h4>Append folling year data to DataFrame</h4?

In [70]:
for yr in range(year + 4, 2017, 4):
    path = os.path.join("Resources", f"{yr}_PopularVote.csv")
    temp_df = pd.read_csv(path, encoding="ISO-8859-1")
    if yr == 2004: #2004 data is formatted differently
        temp_df = temp_df.rename(columns = {
            "Candidate": "name"
        })
        temp_df = temp_df[["name", "Popular Vote Total", "Percent of Popular Vote"]]
        temp_df = temp_df.dropna()
        temp_df["name"] = temp_df["name"].apply(conv_name)
        temp_df["year"] = yr
        election_df = election_df.append(temp_df)
    else:
        temp_df = temp_df.rename(columns = {
            "Candidate (Party Label)": "name"
        })
        temp_df = temp_df[["name", "Popular Vote Total", "Percent of Popular Vote"]]
        temp_df = temp_df.dropna()
        temp_df["name"] = temp_df["name"].apply(conv_name)
        temp_df["year"] = yr
        election_df = election_df.append(temp_df)

election_df.dtypes

name                       object
Popular Vote Total         object
Percent of Popular Vote    object
year                        int64
dtype: object

<h2>Store DataFrames into SQL Database Tables</h2>

In [71]:
conn = sqlite3.connect('ETL_project.db')
c = conn.cursor()

c.execute("DROP TABLE IF EXISTS Finance")
c.execute('CREATE TABLE Finance (candidate_election_year number, candidate_id text, candidate_name text, party_full text, total_receipts number, total_disbursements number, cash_on_hand_end_period number)')
conn.commit()

candidate_finance.to_sql('Finance', conn, if_exists='replace', index = False)


c.execute("SELECT * FROM Finance WHERE candidate_election_year = 2012")

print("=========================")
print("Financial Data for 2012")
for row in c.fetchall():
    print (row)
################################################################
c.execute("DROP TABLE IF EXISTS Votes")
c.execute('CREATE TABLE Votes (name text, Popular Vote Total int, Percent of Popular Vote text, year int)')
conn.commit()

election_df.to_sql('Votes', conn, if_exists='replace', index = False)


c.execute("SELECT * FROM Votes WHERE year = 2012")

print("=========================")
print("Vote Data for 2012")
for row in c.fetchall():
    print (row)

Financial Data for 2012
(2012, 'P80003338', 'OBAMA, BARACK', 'DEMOCRATIC PARTY', 742601913.88, 757574968.34, 3299312.93)
(2012, 'P80003353', 'ROMNEY, MITT / RYAN, PAUL D. ', 'REPUBLICAN PARTY', 483642897.19, 483292209.77, 383275.27)
(2012, 'P80000748', 'PAUL, RON', 'REPUBLICAN PARTY', 41064074.11, 40495369.56, 1147671.75)
(2012, 'P60003654', 'GINGRICH, NEWT', 'REPUBLICAN PARTY', 24976120.46, 24900556.09, 75564.37)
(2012, 'P20002721', 'SANTORUM, RICHARD J.', 'REPUBLICAN PARTY', 23552730.52, 23426456.81, 126273.71)
(2012, 'P20003281', 'PERRY, JAMES R (RICK)', 'REPUBLICAN PARTY', 20593581.15, 20199068.69, 394512.46)
(2012, 'P00003608', 'CAIN, HERMAN', 'REPUBLICAN PARTY', 16917976.94, 16913859.48, 4117.46)
(2012, 'P20002184', 'HERMAN, RAPHAEL', 'DEMOCRATIC PARTY', 16414018.0, 16414018.0, 0.0)
(2012, 'P20003067', 'HUNTSMAN, JON', 'REPUBLICAN PARTY', 9173365.13, 9173365.13, 0.0)
(2012, 'P20002556', 'PAWLENTY, TIMOTHY', 'REPUBLICAN PARTY', 5965502.16, 5965502.16, 0.0)
(2012, 'P20002671', 'JOH

  method=method,


<h4>Practice putting SQL back into Dataframe</h4>

In [72]:
c.execute('''  
SELECT candidate_name, max(total_receipts) FROM Finance
          ''')

max_receipts = pd.DataFrame(c.fetchall(), columns=['candidate_name','total_receipts'])    
print (max_receipts)

  candidate_name  total_receipts
0  OBAMA, BARACK    7.786430e+08
