# An Analysis of Political Contributions During the 2020 House of Representatives Election

In this part, you will obtain as much data as you can on the campaign contributions received by each candidate. This data is avaiable through the website https://www.opensecrets.org/.

### Part 1: Data Gathering

#### 1. Start by acquiring the data from Tennessee's 7th District, which is available at https://www.opensecrets.org/races/summary?cycle=2020&id=TN07&spec=N. If you click the "Download .csv file", you can get a csv for this district. However, we don't want to have to click this button across all districts. Instead, we'll use Python to help automate this process. Start by sending a get request to the download button URL, https://www.opensecrets.org/races/summary.csv?cycle=2020&id=TN07. Convert the result to a DataFrame.

In [29]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from IPython.core.display import HTML
import io
import re
import regex
import csv
from datetime import datetime as dt
import urllib3
from IPython.core.display import HTML
import re
from requests.exceptions import ConnectionError
import numpy as np

In [2]:
url = 'https://www.opensecrets.org/races/summary.csv?cycle=2020&id=TN07'

#Lets use try-except whenever we make any any http request.

#If we invoke .raise_for_status(), then Requests will raise an HTTPError for status codes between 400 and 600. 
#If the status code indicates a successful request, then the program will proceed without raising that exception.

try:
    response = requests.get(url)
    response.raise_for_status()
except HTTPError as http_err:
    print(f"HTTP error occurred: {http_err}")
except Exception as err:
    print(f"Other error occurred: {err}")
else:
    data = response.content.decode('utf8')
    df = pd.read_csv(io.StringIO(data))

#Lets populate the district ID column with TN07 so that we could use it later
df['DistIDCurr']='TN07'
df


Unnamed: 0,cid,FirstLastP,Rcpts,Spent,PACs,Indivs,Cand,Other,EndCash,LgIndivs,...,Result,CRPICO,State,IncCID,Incumbent,primarydate,DistIDCurr,capeye,sort,SmLgIndivsNote
0,N00041873,Mark Green (R),1194960.47,935486.67,171900.0,819151.42,0.0,203909.05,287888.55,819151.42,...,W,I,Tennessee,,,2020-08-06 00:00:00 +0000,TN07,0,1,N
1,N00045536,Kiran Sreepada (D),206644.28,207190.98,4000.0,202644.28,0.0,0.0,0.0,179129.75,...,L,C,Tennessee,,,2020-08-06 00:00:00 +0000,TN07,0,2,N
2,N00047077,Ronald Brown (I),1750.0,0.0,0.0,1750.0,0.0,0.0,9006.0,300.0,...,L,C,Tennessee,,,2020-08-06 00:00:00 +0000,TN07,0,2,N
3,N00046592,Scott Vieira Jr (I),655.47,1048.51,10.0,45.0,35.0,565.47,-196.52,0.0,...,L,C,Tennessee,,,2020-08-06 00:00:00 +0000,TN07,0,2,N
4,N00045535,Benjamin Estes (3),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,C,Tennessee,,,2020-08-06 00:00:00 +0000,TN07,0,2,N


#### 2. Once you have working code for Tennessee's 7th District, expand on your code to capture all of Tennessee's districts into a single DataFrame. Make sure that you can distinguish which district each result came from. Export the results to a csv file.


In [98]:
#Lets define function to get the dataFrame for district

def getDistrictData(districtCode):
    url = f'https://www.opensecrets.org/races/summary.csv?cycle=2020&id={districtCode}'

    #Lets use try-except whenever we make any any http request.
    #If we invoke .raise_for_status(), then Requests will raise an HTTPError for status codes between 400 and 600. 
    #If the status code indicates a successful request, then the program will proceed without raising that exception.

    try:
        response = requests.get(url)
        response.raise_for_status()
    except HTTPError as http_err:
        print(f"HTTP error occurred: {http_err}")
    except Exception as err:
        print(f"Other error occurred: {err}")
    else:
        data = response.content.decode('utf8')
        df = pd.read_csv(io.StringIO(data))

    #Lets populate the district ID column with TN07 so that we could use it later
    df['DistIDCurr']= districtCode
    return df

    

#Lets define dictionary with state and district code.
districtList = ['01','02','03','04','05','06','07','08','09']
StateDistricts = {
    "state":'TN',
    "Districts": districtList
}

#lets define the dataframe list which we can use later to cooncatenate all district data
frameList=[]

for i in range(0,len(StateDistricts['Districts'])):
    DistrictCode = f'{StateDistricts["state"]}{StateDistricts["Districts"][i]}'
    frameList.append(getDistrictData(DistrictCode))

StateDistrictframe = pd.concat(frameList, ignore_index=True)
StateDistrictframe.to_csv('.\/data\/TNDistrictData.csv', index=False)
StateDistrictframe


Unnamed: 0,cid,FirstLastP,Rcpts,Spent,PACs,Indivs,Cand,Other,EndCash,LgIndivs,...,Result,CRPICO,State,IncCID,Incumbent,primarydate,DistIDCurr,capeye,sort,SmLgIndivsNote
0,N00046688,Diana Harshbarger (R),2126945.6,1869099.77,222800.0,359728.5,1461293.0,83124.1,257845.83,315489.1,...,W,O,Tennessee,,,2020-08-06 00:00:00 +0000,TN01,0,2,N
1,N00046686,Blair Nicole Walsingham (D),140209.14,134994.55,1520.0,138689.14,0.0,0.0,5214.59,70085.2,...,L,O,Tennessee,,,2020-08-06 00:00:00 +0000,TN01,0,2,N
2,N00047760,Steve Holder (I),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,O,Tennessee,,,2020-08-06 00:00:00 +0000,TN01,0,2,N
3,N00041594,Tim Burchett (R),1336275.75,878487.63,269535.0,1072845.61,0.0,-6104.86,593677.72,729831.26,...,W,I,Tennessee,,,2020-08-06 00:00:00 +0000,TN02,0,1,N
4,N00041699,Renee Hoyos (D),812783.86,816793.15,3100.0,807459.01,0.0,2224.85,209.82,807459.01,...,L,C,Tennessee,,,2020-08-06 00:00:00 +0000,TN02,0,2,N
5,N00047761,Matthew Campbell (I),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,C,Tennessee,,,2020-08-06 00:00:00 +0000,TN02,0,2,N
6,N00030815,Chuck Fleischmann (R),1051653.39,381411.2,453858.46,603344.93,0.0,-5550.0,1880341.32,599059.93,...,W,I,Tennessee,,,2020-08-06 00:00:00 +0000,TN03,0,1,N
7,N00046911,Meg Gorman (D),85843.21,77759.83,2671.6,81271.61,2000.0,-100.0,8083.38,50245.2,...,L,C,Tennessee,,,2020-08-06 00:00:00 +0000,TN03,0,2,N
8,N00046589,Nancy Baxley (I),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,C,Tennessee,,,2020-08-06 00:00:00 +0000,TN03,0,2,N
9,N00047762,Amber Hysell (I),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,C,Tennessee,,,2020-08-06 00:00:00 +0000,TN03,0,2,N


In [4]:
wiki_rep_url = 'https://en.wikipedia.org/wiki/List_of_states_and_territories_of_the_United_States'

r = requests.get(wiki_rep_url)
wiki_rep_soup = BeautifulSoup(r.text, features="html.parser")
wiki_rep_soup
table_html_rep_wiki = str(wiki_rep_soup.find('table', attrs={'class':'wikitable','class': 'sortable'}))
HTML(table_html_rep_wiki)


wiki_rep_df = pd.read_html(io.StringIO(str(table_html_rep_wiki)))[0]
wiki_rep_df
#wiki_rep_df_limited = wiki_rep_df[['Reps.']]
#wiki_rep_df_limited
#wiki_rep_df_limited_flat = wiki_rep_df_limited.to_csv(header=None,index=False)
#wiki_rep_df_limited_flat_df = pd.read_csv(io.StringIO(wiki_rep_df_limited_flat), names=['US State', 'Number of Districts'])
#wiki_rep_df_limited_flat_df.head(2)

Unnamed: 0_level_0,"Flag, name and postal abbreviation[8]","Flag, name and postal abbreviation[8]",Cities,Cities,Ratification or admission[A],Population (2020)[10],Total area[11],Total area[11],Reps.
Unnamed: 0_level_1,"Flag, name and postal abbreviation[8]","Flag, name and postal abbreviation[8].1",Capital,Largest[12],Ratification or admission[A],Population (2020)[10],mi2,km2,Reps.
0,Alabama,AL,Montgomery,Huntsville,"Dec 14, 1819",5024279,52420,135767,7
1,Alaska,AK,Juneau,Anchorage,"Jan 3, 1959",733391,665384,1723337,1
2,Arizona,AZ,Phoenix,Phoenix,"Feb 14, 1912",7151502,113990,295234,9
3,Arkansas,AR,Little Rock,Little Rock,"Jun 15, 1836",3011524,53179,137732,4
4,California,CA,Sacramento,Los Angeles,"Sep 9, 1850",39538223,163695,423967,52
5,Colorado,CO,Denver,Denver,"Aug 1, 1876",5773714,104094,269601,8
6,Connecticut,CT,Hartford,Bridgeport,"Jan 9, 1788",3605944,5543,14357,5
7,Delaware,DE,Dover,Wilmington,"Dec 7, 1787",989948,2489,6446,1
8,Florida,FL,Tallahassee,Jacksonville,"Mar 3, 1845",21538187,65758,170312,28
9,Georgia,GA,Atlanta,Atlanta,"Jan 2, 1788",10711908,59425,153910,14


In [5]:
import pandas as pd
webpage_url = "https://en.wikipedia.org/wiki/List_of_states_and_territories_of_the_United_States"
webpage_tables = pd.read_html(webpage_url)
states_df = webpage_tables[1]
states_df = states_df.rename(columns={'Flag, name and postal abbreviation[8]':'stateName','Flag, name and postal abbreviation[8].1':'StateAbbr','Reps.':"NumDistricts"})
states_df = states_df.drop(columns={'Cities','Ratification or admission[A]','Population (2020)[10]','Total area[11]'})
states_df.columns = states_df.columns.droplevel()
states_df


  states_df = states_df.drop(columns={'Cities','Ratification or admission[A]','Population (2020)[10]','Total area[11]'})


Unnamed: 0,stateName,StateAbbr,NumDistricts
0,Alabama,AL,7
1,Alaska,AK,1
2,Arizona,AZ,9
3,Arkansas,AR,4
4,California,CA,52
5,Colorado,CO,8
6,Connecticut,CT,5
7,Delaware,DE,1
8,Florida,FL,28
9,Georgia,GA,14


In [32]:
Fed_district_df = webpage_tables[2]
Fed_district_df = Fed_district_df.rename(columns={'Flag, name and postal abbreviation[8]':'stateName','Flag, name and postal abbreviation[8].1':'StateAbbr','Reps.':"NumDistricts"})
Fed_district_df = Fed_district_df.drop(columns={'Established','Population [10]','Total area[11]'})
Fed_district_df.columns = Fed_district_df.columns.droplevel()
Fed_district_df['NumDistricts']='0'
Fed_district_df

  Fed_district_df = Fed_district_df.drop(columns={'Established','Population [10]','Total area[11]'})


Unnamed: 0,stateName,StateAbbr,NumDistricts
0,District of Columbia,DC,0


In [33]:
territories_df= webpage_tables[3]
territories_df= territories_df.rename(columns={'Name and postal abbreviation[8]':'stateName','Name and postal abbreviation[8].1':'StateAbbr','Reps.':"NumDistricts"})
territories_df = territories_df.drop(columns={'Capital','Acquired [15]','Territorial status[16]','Population [10][17]','Total area[11]'})
territories_df.columns = territories_df.columns.droplevel()
territories_df['NumDistricts']='0'
territories_df

  territories_df = territories_df.drop(columns={'Capital','Acquired [15]','Territorial status[16]','Population [10][17]','Total area[11]'})


Unnamed: 0,stateName,StateAbbr,NumDistricts
0,American Samoa,AS,0
1,Guam,GU,0
2,Northern Mariana Islands,MP,0
3,Puerto Rico,PR,0
4,U.S. Virgin Islands,VI,0


In [65]:
state_district_df = pd.concat([states_df,Fed_district_df,territories_df],ignore_index=True)
state_district_df['NumDistricts'] = state_district_df['NumDistricts'].astype(int)
state_district_df

Unnamed: 0,stateName,StateAbbr,NumDistricts
0,Alabama,AL,7
1,Alaska,AK,1
2,Arizona,AZ,9
3,Arkansas,AR,4
4,California,CA,52
5,Colorado,CO,8
6,Connecticut,CT,5
7,Delaware,DE,1
8,Florida,FL,28
9,Georgia,GA,14


In [88]:
def arrangeDistricts(num):
    defaultList=['00']
    returnList=[]
    if(num > 0):
        for i in range(1,num+1):
            if(i<10):
                returnList.append('0'+ str(i))
            else:
                returnList.append(str(i))
        
        return returnList
    
    return defaultList

state_district_df['DistrictCode'] = state_district_df['NumDistricts'].apply(arrangeDistricts)
state_district_df['combined'] = state_district_df[['StateAbbr','DistrictCode']].values.tolist()
state_district_df

Unnamed: 0,stateName,StateAbbr,NumDistricts,DistrictCode,combined
0,Alabama,AL,7,"[01, 02, 03, 04, 05, 06, 07]","[AL, [01, 02, 03, 04, 05, 06, 07]]"
1,Alaska,AK,1,[01],"[AK, [01]]"
2,Arizona,AZ,9,"[01, 02, 03, 04, 05, 06, 07, 08, 09]","[AZ, [01, 02, 03, 04, 05, 06, 07, 08, 09]]"
3,Arkansas,AR,4,"[01, 02, 03, 04]","[AR, [01, 02, 03, 04]]"
4,California,CA,52,"[01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 1...","[CA, [01, 02, 03, 04, 05, 06, 07, 08, 09, 10, ..."
5,Colorado,CO,8,"[01, 02, 03, 04, 05, 06, 07, 08]","[CO, [01, 02, 03, 04, 05, 06, 07, 08]]"
6,Connecticut,CT,5,"[01, 02, 03, 04, 05]","[CT, [01, 02, 03, 04, 05]]"
7,Delaware,DE,1,[01],"[DE, [01]]"
8,Florida,FL,28,"[01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 1...","[FL, [01, 02, 03, 04, 05, 06, 07, 08, 09, 10, ..."
9,Georgia,GA,14,"[01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 1...","[GA, [01, 02, 03, 04, 05, 06, 07, 08, 09, 10, ..."


In [102]:
feature_names = ['state','Districts']
def lists2dict(list1, list2):
    """Return a dictionary where list1 provides
    the keys and list2 provides the values."""

    # Zip lists: zipped_lists
    zipped_lists = zip(list1, list2)

    # Create a dictionary: rs_dict
    rs_dict = dict(zipped_lists)

    # Return the dictionary
    return rs_dict

# Turn list of lists into list of dicts: list_of_dicts
list_of_dicts = [lists2dict(feature_names,sublist) for sublist in state_district_df['combined']]
list_of_dicts[0]['state']


'AL'

In [104]:
#lets define the dataframe list which we can use later to cooncatenate all district data
frameList=[]

for i in range(0,len(list_of_dicts)):
    for j in range(0,len(list_of_dicts[i]['Districts'])):
        DistrictCode = f'{list_of_dicts[i]["state"]}{list_of_dicts[i]["Districts"][j]}'
        frameList.append(getDistrictData(DistrictCode))
    
    StateDistrictframe = pd.concat(frameList, ignore_index=True)
    StateDistrictframe.to_csv(f'.\/data\/{list_of_dicts[i]["state"]}.csv', index=False)
    
#StateDistrictframe