In [None]:
import requests
from bs4 import BeautifulSoup as BS
import pandas as pd
import timeit
import re
from tqdm.notebook import tqdm

In [None]:
def state_parse(key, value):
    url = f"https://www.opensecrets.org/races/summary?cycle=2020&id={key}"
    numb_districts = value
    for i in range(1, numb_districts + 1):

        district = '{:0>2}'.format(i)
        URL = update_url(url, district)
        response = requests.get(URL)

        results = generate_dataframe(response)

        election_results = election_results.append(results)
       
    return election_results

def update_url(URL, numb_districts):
    district = numb_districts

    URL = url + f"{district}&spec=N"
    
    return url;

def split_title(title):
    title = title.get_text()
    title = (
    title.replace(' 2020 Race • OpenSecrets', '')
         .replace('District ', '')
         .split(' ')
        )
    return title

def get_dataframe(state, district, election_results):
    regex_candidate = r"(\w+\s\w+)"
    incumbent = "Incumbent"
    regex_party = r"(\W\w\W)"
    winner = "Winner"

    columns_extract= {'Candidate_name': regex_candidate,'Party':regex_party}
    columns_contains = {'Incumbent':incumbent, 'Winner':winner}

    for key, value in columns_extract.items():
        election_results[key] = election_results['Candidate'].str.extract(value, expand=True)

    for key, value in columns_contains.items():
        election_results[key] = election_results['Candidate'].str.contains(value)
        
    election_results['Raised'] = election_results['Raised'].map(lambda x: re.sub(r'\D', '', x))
    election_results['Spent'] = election_results['Spent'].map(lambda x: re.sub(r'\D', '', x))
    
    election_results['Raised'] = election_results['Raised'].apply(pd.to_numeric)
    election_results['Spent'] = election_results['Spent'].apply(pd.to_numeric)
    
    election_results['percent_raised'] = election_results['Raised'] / election_results['Raised'].sum() * 100
    election_results['percent_spent'] = election_results['Spent'] / election_results['Spent'].sum() * 100


    election_results['State'] = state 

    election_results['District'] = district

    election_results = election_results[['Candidate_name', 
                                         'Party', 
                                         'State',
                                         'District',
                                         'Incumbent', 
                                         'Winner', 
                                         'Raised', 
                                         'Spent',
                                         'percent_raised',
                                         'percent_spent']]
    return election_results
    
def generate_dataframe(response):
    soup = BS(response.text)
    soup.prettify()
    
    
    title = soup.find('title')
    title = split_title(title)
    state = (title[0])
    district = title[1]
    
    election_results = pd.read_html(str(soup.find('table')))[0]
    election_results = get_dataframe(state, district, election_results)
    return election_results

In [95]:
election_results = pd.DataFrame(columns=['Candidate_name', 
                                         'Party', 
                                         'State',
                                         'District',
                                         'Incumbent', 
                                         'Winner', 
                                         'Raised', 
                                         'Spent'])

In [None]:
URL_abb = "https://www.scouting.org/resources/los/states/"

response_abb = requests.get(URL_abb)

soup_abb = BS(response_abb.text)

soup_abb

In [None]:
URL_states = "https://www.britannica.com/topic/United-States-House-of-Representatives-Seats-by-State-1787120"
URL_abb = "https://www.50states.com/abbreviations.htm"

response_states = requests.get(URL_states)
response_abb = requests.get(URL_abb)

soup = BS(response_states.text)

soup_abb = BS(response_abb.text)

abb = pd.read_html(str(soup_abb.find('table')))[0]
abb = abb.drop(columns='STANDARD ABBREVIATION')
abb = abb.rename(columns={'US STATE':'state', 'POSTAL ABBREVIATION':'state_abb'})


states = pd.read_html(str(soup.find('table')))[0]
states = states.drop([50])

state_districts = pd.merge(states, abb, on='state')
state_districts['representatives'] = state_districts['representatives'].apply(pd.to_numeric)
state_districts = state_districts[['state_abb', 'representatives']].set_index('state_abb').T.to_dict('records')

state_districts = state_districts[0]
state_districts

In [None]:
for key in state_districts:
    print(key)

In [None]:
for key in state_districts:
    election_results = state_parse(key, state_districts[key])
    election_results = election_results.append(election_results)

election_results    