In [10]:
import pandas as pd
import re
import requests
import plotly.express as px
from bs4 import BeautifulSoup, SoupStrainer
from IPython.core.display import HTML
from io import StringIO
from urllib.request import Request, urlopen

In [11]:
wiki_district_url = 'https://en.wikipedia.org/wiki/2020_United_States_House_of_Representatives_elections'
wiki_abbrev_url = 'https://en.wikipedia.org/wiki/List_of_U.S._state_and_territory_abbreviations'

In [12]:
r_district = requests.get(wiki_district_url)
soup_district = BeautifulSoup(r_district.text, features = 'html.parser')
district_table = str(soup_district.findAll('table', {'class': 'wikitable'}))

In [13]:
r_abbrev = requests.get(wiki_abbrev_url)
soup_abbrev = BeautifulSoup(r_abbrev.text, features = 'html.parser')
abbrev_table = str(soup_abbrev.findAll('table', {'class': 'wikitable'}))

In [14]:
seats_df = (
    pd
    .read_html(StringIO(str(district_table)))[1][['State', 'Total seats']]
    .rename(columns = {'Total seats': 'Districts'})
)

In [15]:
abbrev_df = (
    pd
    .read_html(StringIO(str(abbrev_table)))[1][['Name', 'USPS']]
    .rename(columns = {'Name': 'State', 
                       'Unnamed: 5_level_1': 'Code'})
)

In [16]:
state_code_df = pd.merge(seats_df, abbrev_df).droplevel(0, axis=1)

In [17]:
state_code_df = state_code_df[state_code_df['Code'] != 'NB']

In [18]:
NUM = state_code_df['Districts'].tolist()

In [19]:
ID = state_code_df['Code'].tolist()

In [20]:
TN07_URL = 'https://www.opensecrets.org/races/summary.csv?cycle=2020&id=TN07'

In [21]:
response = requests.get(TN07_URL).text

In [22]:
TN07_df = pd.read_csv(StringIO(response))

In [23]:
TN07_df.to_csv('../data/TN07_df.csv', index = False)

In [24]:
urls_list = []
num = 1
while num < 10:
    URL = 'https://www.opensecrets.org/races/summary.csv?cycle=2020&id=TN' + str(num).zfill(2)
    response = requests.get(URL).text
    TN_df = pd.read_csv(StringIO(response))
    TN_df.insert(0, 'District', str(num).zfill(2))
    urls_list.append(TN_df)
    num += 1
TN_df = pd.concat(urls_list)
TN_df.to_csv('../data/TN_df.csv', index = False)

In [25]:
urls_list = []
num = 1

for district, code in zip(NUM, ID):
    while num <= district:
        URL = 'https://www.opensecrets.org/races/summary.csv?cycle=2020&id=' + code + str(num).zfill(2)
        response = requests.get(URL).text
        States_df = pd.read_csv(StringIO(response))
        States_df.insert(0, 'District', str(num).zfill(2))
        col = States_df.pop('State')
        States_df.insert(1, 'State', col)
        urls_list.append(States_df)
        num += 1
    num = 1
    
States_df = pd.concat(urls_list, ignore_index=True)

In [26]:
States_df.to_csv('../data/States_df.csv', index = False)

In [27]:
wiki_district_url = 'https://en.wikipedia.org/wiki/2020_United_States_House_of_Representatives_elections'
wiki_abbrev_url = 'https://en.wikipedia.org/wiki/List_of_U.S._state_and_territory_abbreviations'

In [28]:
def get_url(url):
    request = requests.get(url)
    soup = BeautifulSoup(request.text, features = 'html.parser')
    table = str(soup.findAll('table', {'class': 'wikitable'}))

    return table




In [29]:
abb = get_url(wiki_abbrev_url)
dist = get_url(wiki_district_url)

In [32]:

def get_states_csv(NUM, ID):
    urls_list = []
    num = 1

    for district, code in zip(NUM, ID):
        while num <= district:
            URL = 'https://www.opensecrets.org/races/summary.csv?cycle=2020&id=' + code + str(num).zfill(2)
            response = requests.get(URL).text
            States_df = pd.read_csv(StringIO(response))
            States_df.insert(0, 'District', str(num).zfill(2))
            col = States_df.pop('State')
            States_df.insert(1, 'State', col)
            urls_list.append(States_df)
            num += 1
        num = 1
        
    States_df = pd.concat(urls_list, ignore_index=True)
    States_csv = States_df.to_csv('../data/States_df.csv', index = False)
    
    return States_csv


In [33]:
get_states_csv(NUM, ID)

In [30]:
def get_state_seats(seats, abbrev):
    seats_df = (
        pd
        .read_html(StringIO(str(seats)))[1][['State', 'Total seats']]
        .rename(columns = {'Total seats': 'Districts'})
    )
    abbrev_df = (
        pd
        .read_html(StringIO(str(abbrev)))[1][['Name', 'USPS']]
        .rename(columns = {'Name': 'State', 
                        'Unnamed: 5_level_1': 'Code'})
    )
    state_code_df = pd.merge(seats_df, abbrev_df).droplevel(0, axis=1)
    state_code_df = state_code_df[state_code_df['Code'] != 'NB']
    return state_code_df

In [31]:
get_state_seats(dist, abb)

Unnamed: 0,State,Districts,Code
0,Alabama,7,AL
1,Alaska,1,AK
2,Arizona,9,AZ
3,Arkansas,4,AR
4,California,53,CA
5,Colorado,7,CO
6,Connecticut,5,CT
7,Delaware,1,DE
8,Florida,27,FL
9,Georgia,14,GA
