## Bayesian Butterfingers Data Collection

In [1]:
#import statements
import tqdm
import requests
import pandas as pd
from bs4 import BeautifulSoup
from io import StringIO

### Collecting Data for TN District 7

In [2]:
URL = 'https://www.opensecrets.org/races/summary.csv?cycle=2020&id=TN07'
response = requests.get(URL)
print(type(response))
if response.status_code == requests.codes.ok:
    print('Request is okay!')
else:
    response.raise_for_status()
TN_district7 = pd.read_csv(StringIO(response.text), sep=',')
TN_district7

<class 'requests.models.Response'>
Request is okay!


Unnamed: 0,cid,FirstLastP,Rcpts,Spent,PACs,Indivs,Cand,Other,EndCash,LgIndivs,...,Result,CRPICO,State,IncCID,Incumbent,primarydate,DistIDCurr,capeye,sort,SmLgIndivsNote
0,N00041873,Mark Green (R),1194960.47,935486.67,171900.0,819151.42,0.0,203909.05,287888.55,819151.42,...,W,I,Tennessee,,,2020-08-06 00:00:00 +0000,TN07,0,1,N
1,N00045536,Kiran Sreepada (D),206644.28,207190.98,4000.0,202644.28,0.0,0.0,0.0,179129.75,...,L,C,Tennessee,,,2020-08-06 00:00:00 +0000,,0,2,N
2,N00047077,Ronald Brown (I),1750.0,0.0,0.0,1750.0,0.0,0.0,9006.0,300.0,...,L,C,Tennessee,,,2020-08-06 00:00:00 +0000,,0,2,N
3,N00046592,Scott Vieira Jr (I),655.47,1048.51,10.0,45.0,35.0,565.47,-196.52,0.0,...,L,C,Tennessee,,,2020-08-06 00:00:00 +0000,,0,2,N
4,N00045535,Benjamin Estes (3),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,C,Tennessee,,,2020-08-06 00:00:00 +0000,,0,2,N


In [3]:
URL = 'https://en.wikipedia.org/wiki/List_of_U.S._state_and_territory_abbreviations'
states_df = pd.read_html(URL)[1]
states_df.columns = states_df.columns.map(lambda x: x[1])
states_df = (
    states_df
    .reset_index()
    .drop(columns = ['index', 'Status of region', 'Unnamed: 2_level_1', 'Unnamed: 4_level_1', 'Unnamed: 5_level_1', 'Unnamed: 6_level_1', 'GPO', 'AP', 'Other abbreviations'])
    .dropna()
    .rename(columns = {'Name': 'State', 'Unnamed: 3_level_1': 'Abbreviation'})
    .drop(0).reset_index(drop=True)
)
#states_df is a dataframe that includes states and territories and their abbreviations
states_abr_dict = states_df.set_index('State')['Abbreviation'].to_dict()
#states_abr_dict is a dictionary that maps State names to their abbreviations

In [4]:
URL = 'https://en.wikipedia.org/wiki/2020_United_States_House_of_Representatives_elections'
response = requests.get(URL)
if response.status_code == requests.codes.ok:
    soup = BeautifulSoup(response.text, features="html.parser")
else:
    response.raise_for_status()
tables_html = str(soup.find_all('table', attrs={'class' : 'wikitable'}))
all_states_df = pd.read_html(StringIO(str(tables_html)))[1].fillna('-')
all_states_df.columns = all_states_df.columns.map(lambda x: x[1])
all_states_df = all_states_df.drop(columns=['Seats', 'Change'])
state_representatives_df = pd.merge(left=all_states_df, right=states_df, on='State', how='left')
state_representatives_df

Unnamed: 0,State,Total seats,Abbreviation
0,Alabama,7,AL
1,Alaska,1,AK
2,Arizona,9,AZ
3,Arkansas,4,AR
4,California,53,CA
5,Colorado,7,CO
6,Connecticut,5,CT
7,Delaware,1,DE
8,Florida,27,FL
9,Georgia,14,GA


In [5]:
import retrievedata

retrievedata.retrieve_2020_state_district_data('Texars', 17)

No state by this name. Assuming you meant Texas.


Unnamed: 0,State_Abbreviation,District,cid,FirstLast,Party,Rcpts,Spent,PACs,Indivs,Cand,...,Result,CRPICO,State,IncCID,Incumbent,primarydate,DistIDCurr,capeye,sort,SmLgIndivsNote
0,TX,17,N00005681,Pete Sessions,R,1447957.93,1658637.39,583090.75,716783.42,140000.0,...,W,O,Texas,,,2020-03-03 00:00:00 +0000,,0,2,N
1,TX,17,N00041704,Rick Kennedy,D,200667.91,202095.65,2554.84,207052.99,0.0,...,L,O,Texas,,,2020-03-03 00:00:00 +0000,,0,2,N
2,TX,17,N00047490,Ted Brown,L,3040.73,3040.75,200.0,232.77,2607.96,...,L,O,Texas,,,2020-03-03 00:00:00 +0000,,0,2,N


## Getting All Data and Saving as .csv

In [None]:
#retrievedata.get_all_data().reset_index(drop=True).to_csv('All_2020_Election_Data.csv', index=None)

100%|██████████| 50/50 [09:33<00:00, 11.46s/it]
