In [1]:
import requests
from bs4 import BeautifulSoup as BS
import numpy as np
import pandas as pd
import re
import statsmodels.api as sm
import statsmodels.graphics as g
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
import seaborn as sns
import time
from jupyterthemes import jtplot
jtplot.style()

In [2]:
URL = 'https://www.opensecrets.org/races/candidates?cycle=2020&id=TN02&spec=N'
response = requests.get(URL)
soup = BS(response.text)

In [3]:
states = pd.read_csv('../data/state_reps_ref.csv')

# Make a Function

In [4]:
st = 'TN'
num = 5
d = st + f'{num:02}'
print(d)

TN05


In [5]:
def get_district_SOF(state_abbrv, district_num):
    """Given a state abbreviation and district, create a Source of Funds (SOF) dataframe and fill it with candidate data"""
    D_SOF = pd.DataFrame(columns = ['district', 'name', 'party', 'incumbent', 'winner', 'Type of Contribution', 'Amount', 'Percentage'])
    label = ''
    district = state_abbrv + f'{district_num:02}'

    URL = f'https://www.opensecrets.org/races/candidates?cycle=2020&id={district}&spec=N'
    response = requests.get(URL)
    soup = BS(response.text)
    
    search = soup.findAll(['h2', 'h3', 'table'])
    for res in search[:-2]:
        if res.name == 'h2':
            incumbent = 'Incumbent' in res.text
            winner = 'Winner' in res.text
            party = re.search(r'\((.*?)\)',res.text).group(1)
            name = res.text[:res.text.find('(')].strip()
        elif res.name == 'h3':
            label = res.text
        else:
            if 'Source of Funds' in label:
                d_SOF = pd.read_html(str(res))[0]
                d_SOF['incumbent'] = incumbent
                d_SOF['winner'] = winner
                d_SOF['party'] = party
                d_SOF['name'] = name
                d_SOF['district'] = district
                D_SOF = D_SOF.append(d_SOF)
            else:
                pass
    return D_SOF

In [6]:
get_district_SOF('TN', 2)

Unnamed: 0,district,name,party,incumbent,winner,Type of Contribution,Amount,Percentage
0,TN02,Tim Burchett,R,True,True,Small Individual Contributions (≤ $200),"$343,014",25.67%
1,TN02,Tim Burchett,R,True,True,Large Individual Contributions,"$729,831",54.62%
2,TN02,Tim Burchett,R,True,True,PAC Contributions*,"$269,535",20.17%
3,TN02,Tim Burchett,R,True,True,Candidate self-financing,$0,0.00%
4,TN02,Tim Burchett,R,True,True,Other,"-$6,105",-0.46%
0,TN02,Renee Hoyos,D,False,False,Small Individual Contributions (≤ $200),$0,0.00%
1,TN02,Renee Hoyos,D,False,False,Large Individual Contributions,"$807,459",99.34%
2,TN02,Renee Hoyos,D,False,False,PAC Contributions*,"$3,100",0.38%
3,TN02,Renee Hoyos,D,False,False,Candidate self-financing,$0,0.00%
4,TN02,Renee Hoyos,D,False,False,Other,"$2,225",0.27%


In [7]:
def get_SOF():
    
    SOF = pd.DataFrame(columns = ['state', 'district', 'name', 'party', 'incumbent', 'winner', 'Type of Contribution', 'Amount', 'Percentage'])

    for ind, row in states.iterrows():
    
        # Save current state information in cleaner variable names
        state = row['state']
        abbrv = row['abbreviation']
        reps = row['representatives']

        # Loop through each district of the current state
        for i in range(1,reps+1):

            D_SOF = get_district_SOF(state_abbrv = abbrv, district_num = i)
            D_SOF['state'] = state
            SOF = SOF.append(D_SOF)

        time.sleep(2)
        
    return SOF

In [8]:
SOF = get_SOF()

In [12]:
SOF.to_csv('../data/us_rep_source_of_funds.csv', index = False)