In [1]:
import requests
from bs4 import BeautifulSoup as BS
import numpy as np
import pandas as pd
import re

In [11]:
def candidate_info(df):
    """Extract candidate info from Open Secrets tables, then drop candidate column."""
    
    df['incumbent'] = df['candidate'].str.contains(r'Incumbent')
    df['winner'] = df['candidate'].str.contains(r'Winner')
    df['party'] = df['candidate'].apply(lambda x: re.search(r'\((.*?)\)',x).group(1))
    df['name'] = df['candidate'].str.split(r'\(.\)', expand = True)[0]
    df = df.drop(columns = 'candidate')[['district', 'name', 'party','incumbent', 'winner', 'raised', 'spent']]

    return df

def clean_money(df):
    """Clean and conver the money columns"""
    
    money_cols = ['raised', 'spent']
    for col in money_cols:
        df[col] = df[col].str.replace(r'\$|,', '', regex = True).astype('int')
    
    return df

def format_os_table(df):
    """lowercase column names, extract candidate info, rearrange columns, and clean the money columns"""
    
    df.columns = df.columns.str.lower()
    df = candidate_info(df)
    df = clean_money(df)
    
    return df

In [12]:
TN = pd.DataFrame(columns = ['district', 'name', 'party','incumbent', 'winner', 'raised', 'spent'])

In [13]:
for i in range(1,9+1):

    # Get request
    URL = f'https://www.opensecrets.org/races/summary?cycle=2020&id=TN0{str(i)}&spec=N'
    response = requests.get(URL)
    soup = BS(response.text)

    TN_district = pd.read_html(str(soup.find('table')))[0]
    
    # Add a district column
    TN_district['district'] = f"TN0{str(i)}"
    
    # Format
    TN_district = format_os_table(TN_district)
        
    TN = pd.concat([TN, TN_district])

In [14]:
TN

Unnamed: 0,district,name,party,incumbent,winner,raised,spent
0,TN01,Diana Harshbarger,R,False,True,2126946,1869100
1,TN01,Blair Nicole Walsingham,D,False,False,140209,134995
0,TN02,Tim Burchett,R,True,True,1336276,878488
1,TN02,Renee Hoyos,D,False,False,812784,816793
0,TN03,Chuck Fleischmann,R,True,True,1051653,381411
1,TN03,Meg Gorman,D,False,False,85843,77760
0,TN04,Scott Desjarlais,R,True,True,331464,392499
1,TN04,Christopher Hale,D,False,False,308731,302996
0,TN05,Jim Cooper,D,True,True,936569,1332131
0,TN06,John Rose,R,True,True,1050429,625688
