# An Analysis of Political Contributions During the 2020 House of Representatives Election

# TN Districts

## Progress

The Goobers have scraped the website

## The Imports

In [1]:
import requests
from bs4 import BeautifulSoup as BS
import pandas as pd
from datetime import datetime
from lxml import html
import requests
import re

## The Soup Prep

In [2]:
def get_district (State, District):
    url = "https://www.opensecrets.org/races/candidates?cycle=2020&id="+State+District+"&spec=N"
    page = requests.get(url)
    soup = BS(page.content, 'html.parser')
    #Demographic information soup.
    persons = soup.findAll('div',  {'class':"Members--list-item"})
    #Funds raised and spent soup.
    money = soup.find_all('td', {'class': "Members--number"}) 
    #Loops for name, party, votes, state, etc.
    for person in persons:
        name_list.append(get_name (person))
        party_list.append(get_party (person))
        votes_received.append(votes (person))
        state_list.append(State)
        district_list.append(District)
        winner_list.append(winner(person))
        incumbent_list.append(incumbent(person))
    #Loop for finding amount of money raised
    starting_index = 0
    end = len(money) //3 -1
    raised.append(money[starting_index].text)
    for index in range(end):
        starting_index = starting_index + 3
        raised.append(money[starting_index].text)
    starting_index = 1
    end = len(money) //3 -1
    spent.append(money[starting_index].text)
    for index in range(end):
        starting_index = starting_index + 3
        spent.append(money[starting_index].text)

## The Soup Functions

In [3]:
#Function to clean name and append to list.
def get_name (person):
    member_text = person.find("h2").text
    member_text = member_text.replace("\t", "").replace("\n", "")
    return re.search(r"(.+) \(.\)", member_text)[1]
#Function to clean affiliation and append to list
def get_party (person):
    member_text = person.find("h2").text
    member_text = member_text.replace("\t", "").replace("\n", "")
    return re.search(r"\((.)\)", member_text)[1]
#Function clean votes and append to list.
def votes (person):
    member_text = person.find("h2").text
    member_text = member_text.replace("\t", "").replace("\n", "")
    return re.search("\((\d+\.\d)\%", member_text)[1]
#Function to find money raised, spent append to list
def get_money(amount):
    money_text = amount.text
    return re.search(r"\d+.\d{3}.\d{3}", money_text)[0]
def winner(person):
#Function finds if candidate is winner and append to list.   
    if 'Winner' in [x.text for x in person.find_all('i')]:
        return(True)
    else:
        return(False)
#Function finds if canditdate was incumbant and append to list.
def incumbent(person):
    if 'Incumbent' in [x.text for x in person.find_all('i')]:
        return(True)
    else:
        return(False)

## The Master Lists

In [8]:
#Lists populated by the soup functions
name_list = []
party_list = []
state_list = []
district_list = []
votes_received = []
raised = []
spent = []
winner_list = []
incumbent_list = []
############################################
#Pull this lever to feed soup to the panda:#
############################################
State = 'Vt' #Two letter state code
for district in range(1, 2): #Districts in state
    #converts numbers from range to strings
    get_district(State, str(district).zfill(2))

## The Pandas Food
### (╯°□°)╯︵ ┻━┻

> Don't feed the panda unless hungry!

In [9]:
#Dataframe constructed from lists and functions above.
#Saves info from each state to its own local .csv.
Shoots_and_Leaves = pd.DataFrame({"Name": name_list,
              "Party": party_list, 
              "Percent_Vote": votes_received, 
              "Amount_Raised" : raised, 
              "Amount_Spent"  : spent,
              "State" : state_list,
              "District" : district_list,
              "Winner" : winner_list,
              "Incumbent" : incumbent_list 
             })
Shoots_and_Leaves.to_csv(f'../Data/AllStates/{State}.csv', index=False) 

In [3]:
import pandas as pd
df = pd.read_csv('../Data/alldistricts.csv').drop_duplicates(keep = False)
df['Amount_Raised'] = df['Amount_Raised'].str.replace(',', '').str.replace('$', '')
df['Amount_Spent'] = df['Amount_Spent'].str.replace(',', '').str.replace('$', '')
df[['Amount_Raised', 'Amount_Spent']] = df[['Amount_Raised', 'Amount_Spent']].apply(pd.to_numeric)
df.head()

  df['Amount_Raised'] = df['Amount_Raised'].str.replace(',', '').str.replace('$', '')
  df['Amount_Spent'] = df['Amount_Spent'].str.replace(',', '').str.replace('$', '')


Unnamed: 0,Name,Party,Percent_Vote,Amount_Raised,Amount_Spent,State,District,Winner,Incumbent
0,Don Young,R,54.4,1922901,1790448,Ak,1,True,True
1,Alyse Galvin,I,45.3,5178452,5088103,Ak,1,False,False
3,Jerry Carl,R,64.9,1971321,1859349,Al,1,True,False
4,James Averhart,D,35.0,80095,78973,Al,1,False,False
5,Barry Moore,R,65.3,650807,669368,Al,2,True,False


In [37]:
df.dtypes

Name             object
Party            object
Percent_Vote     object
Amount_Raised     int64
Amount_Spent      int64
State            object
District         object
Winner           object
Incumbent        object
dtype: object

In [84]:
#Dataframe to calculate the relationship between winning and fundraising
win_raised = df.groupby(['State', 'District'])['Winner','Name',"Amount_Raised"].max()
win_raised.head()

  win_raised = df.groupby(['State', 'District'])['Winner','Name',"Amount_Raised"].max()


Unnamed: 0_level_0,Unnamed: 1_level_0,Winner,Name,Amount_Raised
State,District,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Ak,1,True,Don Young,5178452
Al,1,True,Jerry Carl,1971321
Al,2,True,Phyllis Harvey-Hall,650807
Al,3,True,Mike D Rogers,1193111
Al,4,True,Robert B Aderholt,1255076


In [68]:
print(win_raised['Winner'].value_counts())
print("The candidate who raised the most money in each district had a "+str(round(430/434*100, 2))+'% chance of winning.')

True     430
False      4
Name: Winner, dtype: int64
The candidate who raised the most money in each district had a 99.08% chance of winning.


In [87]:
#Dataframe to calculate the relationship between winning and spending
win_spent = df.groupby(['State', 'District'])['Winner','Name',"Amount_Spent"].max()
win_spent.head()

  win_spent = df.groupby(['State', 'District'])['Winner','Name',"Amount_Spent"].max()


Unnamed: 0_level_0,Unnamed: 1_level_0,Winner,Name,Amount_Spent
State,District,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Ak,1,True,Don Young,5088103
Al,1,True,Jerry Carl,1859349
Al,2,True,Phyllis Harvey-Hall,669368
Al,3,True,Mike D Rogers,1218564
Al,4,True,Robert B Aderholt,1323812


In [88]:
print(win_spent['Winner'].value_counts())
print("The candidate who raised the most money in each district had a "+str(round(430/434*100, 2))+'% chance of winning.')

True     430
False      4
Name: Winner, dtype: int64
The candidate who raised the most money in each district had a 99.08% chance of winning.


4. Using your scraped data, investigates different relationships between candidates and the amount of money they raised. Here are some suggestions to get you started, but feel free to pose you own questions or do additional exploration:  
    a. How often does the candidate who raised more money win a race?  
    b. How often does the candidate who spent more money win a race?  
    c. Does the difference between either money raised or money spent seem to influence the likelihood of a candidate winning a race?  
    d. How often does the incumbent candidate win a race?  
    e. Can you detect any relationship between amount of money raised and the incumbent status of a candidate?

### Bonus Questions:
If you complete all of the above, you can attempt these challenging bonus questions.

Open Secrets also gives a detailed breakdown of contributions by source.

Scrape these pages to get information on contributions by source. See if you can find anything interesting in terms of the source of contributions. Some examples to get you started:
* What does the overall distribution of funding sources look like?
* Is there any detectable difference in contribution sources between Democrat and Republican candidates?
* Do the funding sources for either the winning candidate or incumbent candidate differ from the other candidates?