In [1]:
import pandas as pd
import re
import requests
import plotly.express as px
from bs4 import BeautifulSoup, SoupStrainer
from IPython.core.display import HTML
from io import StringIO
from urllib.request import Request, urlopen

In [2]:
wiki_district_url = 'https://en.wikipedia.org/wiki/2020_United_States_House_of_Representatives_elections'
wiki_abbrev_url = 'https://en.wikipedia.org/wiki/List_of_U.S._state_and_territory_abbreviations'

In [3]:
r_district = requests.get(wiki_district_url)
soup_district = BeautifulSoup(r_district.text, features = 'html.parser')
district_table = str(soup_district.findAll('table', {'class': 'wikitable'}))

In [4]:
r_abbrev = requests.get(wiki_abbrev_url)
soup_abbrev = BeautifulSoup(r_abbrev.text, features = 'html.parser')
abbrev_table = str(soup_abbrev.findAll('table', {'class': 'wikitable'}))

In [5]:
seats_df = (
    pd
    .read_html(StringIO(str(district_table)))[1][['State', 'Total seats']]
    .rename(columns = {'Total seats': 'Districts'})
)

In [6]:
abbrev_df = (
    pd
    .read_html(StringIO(str(abbrev_table)))[1][['Name', 'USPS']]
    .rename(columns = {'Name': 'State', 
                       'Unnamed: 5_level_1': 'Code'})
)

In [7]:
state_code_df = pd.merge(seats_df, abbrev_df).droplevel(0, axis=1)

In [8]:
state_code_df = state_code_df[state_code_df['Code'] != 'NB']

In [9]:
NUM = state_code_df['Districts'].tolist()

In [10]:
ID = state_code_df['Code'].tolist()

In [11]:
TN07_URL = 'https://www.opensecrets.org/races/summary.csv?cycle=2020&id=TN07'

In [12]:
response = requests.get(TN07_URL).text

In [13]:
TN07_df = pd.read_csv(StringIO(response))

In [14]:
#TN07_df.to_csv('../data/TN07_df.csv', index = False)

In [15]:
urls_list = []
num = 1
while num < 10:
    URL = 'https://www.opensecrets.org/races/summary.csv?cycle=2020&id=TN' + str(num).zfill(2)
    response = requests.get(URL).text
    TN_df = pd.read_csv(StringIO(response))
    TN_df.insert(0, 'District', str(num).zfill(2))
    urls_list.append(TN_df)
    num += 1
TN_df = pd.concat(urls_list)

In [16]:
#TN_df.to_csv('../data/TN_df.csv', index = False)

In [17]:
urls_list = []
num = 1

for district, code in zip(NUM, ID):
    while num <= district:
        URL = 'https://www.opensecrets.org/races/summary.csv?cycle=2020&id=' + code + str(num).zfill(2)
        response = requests.get(URL).text
        States_df = pd.read_csv(StringIO(response))
        States_df.insert(0, 'District', str(num).zfill(2))
        col = States_df.pop('State')
        States_df.insert(1, 'State', col)
        urls_list.append(States_df)
        num += 1
    num = 1 
States_df = pd.concat(urls_list, ignore_index=True)

In [57]:
States_df['Party'] = States_df['FirstLastP'].str.split(' ').str[-1]

In [59]:
#States_df.to_csv('../data/States_df.csv', index = False)

In [61]:
States_df

Unnamed: 0,District,State,cid,FirstLastP,Rcpts,Spent,PACs,Indivs,Cand,Other,...,Result,CRPICO,IncCID,Incumbent,primarydate,DistIDCurr,capeye,sort,SmLgIndivsNote,Party
0,01,Alabama,N00044245,Jerry Carl (R),1971321.50,1859348.91,387000.00,1044195.95,434655.50,105470.05,...,W,O,,,2020-03-03 00:00:00 +0000,,0,2,N,(R)
1,01,Alabama,N00044750,James Averhart (D),80094.95,78973.24,0.00,50849.95,29245.00,0.00,...,L,O,,,2020-03-03 00:00:00 +0000,,0,2,N,(D)
2,02,Alabama,N00041295,Barry Moore (R),650806.75,669367.70,230281.65,408536.20,11500.00,488.90,...,W,O,,,2020-03-03 00:00:00 +0000,,0,2,N,(R)
3,02,Alabama,N00045944,Phyllis Harvey-Hall (D),56049.68,55988.07,2032.00,42411.95,10575.41,1030.32,...,L,O,,,2020-03-03 00:00:00 +0000,,0,2,N,(D)
4,02,Alabama,N00045631,John Page (L),0.00,0.00,0.00,0.00,0.00,0.00,...,,O,,,2020-03-03 00:00:00 +0000,,0,2,N,(L)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1259,01,Wyoming,N00035504,Liz Cheney (R),3003883.34,3060166.78,1292490.00,1169995.46,0.00,541397.88,...,W,I,,,2020-08-18 00:00:00 +0000,WY01,0,1,N,(R)
1260,01,Wyoming,N00047272,Lynnette Grey Bull (D),134597.32,132234.75,2800.00,130197.32,0.00,1600.00,...,L,C,,,2020-08-18 00:00:00 +0000,,0,2,N,(D)
1261,01,Wyoming,N00047207,Zoilo Adalia (3),0.00,0.00,0.00,0.00,0.00,0.00,...,,C,,,2020-08-18 00:00:00 +0000,,0,2,N,(3)
1262,01,Wyoming,N00035139,Richard Brubaker (L),0.00,0.00,0.00,0.00,0.00,0.00,...,,C,,,2020-08-18 00:00:00 +0000,,0,2,N,(L)
