In [64]:
# Import dependencies
import pandas as pd
import psycopg2
import boto3

In [None]:
# Get the S3 keys from config file
from config import aws_access_key, aws_secret_access_key, postgres_key

In [44]:
# Load the fec data from the site
fec = pd.read_excel("https://transition.fec.gov/press/summaries/2014/tables/congressional/ConCand7_2014_18m.xlsx", header=None)

In [45]:
# Convert data to a DataFrame
fec_df = pd.DataFrame(fec)

In [46]:
# Drop first 4 rows of DataFrame
fec_df = fec_df.iloc[4:]

In [47]:
# Convert first row of DataFrame to column names and drop the first row
fec_df.columns = fec_df.iloc[0]
fec_df = fec_df.iloc[1:]

In [48]:
# Bucket the party column
party_counts = fec_df["Party"].value_counts()

replace_parties = list(
    party_counts[party_counts < 100].index)

for parties in replace_parties:
    fec_df.Party = fec_df.Party.replace(parties, "Other")

In [49]:
# Remove commas from DataFrame
fec_df = fec_df.replace(",", "", regex=True)

In [50]:
# Drop na values
fec_df = fec_df.dropna()

In [51]:
# Rename columns
fec_df.columns = [
    "state",
    "district",
    "name",
    "party",
    "incumbent_challenger_open",
    "receipts",
    "from_individuals",
    "from_committees",
    "from_candidate",
    "disbursements",
    "cash_on_hand",
    "debts",
    "end_date"
]

In [52]:
# Drop duplicates in name column
fec_df = fec_df.drop_duplicates(subset=["name"])

In [53]:
# Drop unwated values from the state column
fec_df.drop(fec_df[fec_df["state"] == "Guam"].index, inplace=True)
fec_df.drop(fec_df[fec_df["state"] == "District Of Columbia"].index, inplace=True)
fec_df.drop(fec_df[fec_df["state"] == "Northern Mariana"].index, inplace=True)
fec_df.drop(fec_df[fec_df["state"] == "American Samoa"].index, inplace=True)
fec_df.drop(fec_df[fec_df["state"] == "Virgin Islands"].index, inplace=True)
fec_df.drop(fec_df[fec_df["state"] == "Puerto Rico"].index, inplace=True)

In [54]:
# Strip the spaces from the state column
fec_df["state"] = fec_df["state"].str.replace(" ","")

In [55]:
# Convert state column values to abbreviations

state_list = fec_df["state"]

new_states = []

for state in state_list:
    if state == "Alabama":
        new_states.append("AL")
    if state == "Alaska":
        new_states.append("AK")
    if state == "Arizona":
        new_states.append("AZ")
    if state == "Arkansas":
        new_states.append("AR")
    if state == "California":
        new_states.append("CA")
    if state == "Colorado":
        new_states.append("CO")
    if state == "Connecticut":
        new_states.append("CT")
    if state == "Delaware":
        new_states.append("DE")
    if state == "Florida":
        new_states.append("FL")
    if state == "Georgia":
        new_states.append("GA")
    if state == "Hawaii":
        new_states.append("HI")
    if state == "Idaho":
        new_states.append("ID")
    if state == "Illinois":
        new_states.append("IL")
    if state == "Indiana":
        new_states.append("IN")
    if state == "Iowa":
        new_states.append("IA")
    if state == "Kansas":
        new_states.append("KS")
    if state == "Kentucky":
        new_states.append("KY")
    if state == "Louisiana":
        new_states.append("LA")
    if state == "Maine":
        new_states.append("ME")
    if state == "Maryland":
        new_states.append("MD")
    if state == "Massachusetts":
        new_states.append("MA")
    if state == "Michigan":
        new_states.append("MI")
    if state == "Minnesota":
        new_states.append("MN")
    if state == "Mississippi":
        new_states.append("MS")
    if state == "Missouri":
        new_states.append("MO")
    if state == "Montana":
        new_states.append("MT")
    if state == "Nebraska":
        new_states.append("NE")
    if state == "Nevada":
        new_states.append("NV")
    if state == "NewHampshire":
        new_states.append("NH")
    if state == "NewJersey":
        new_states.append("NJ")
    if state == "NewMexico":
        new_states.append("NM")
    if state == "NewYork":
        new_states.append("NY")
    if state == "NorthCarolina":
        new_states.append("NC")
    if state == "NorthDakota":
        new_states.append("ND")
    if state == "Ohio":
        new_states.append("OH")
    if state == "Oklahoma":
        new_states.append("OK")
    if state == "Oregon":
        new_states.append("OR")
    if state == "Pennsylvania":
        new_states.append("PA")
    if state == "RhodeIsland":
        new_states.append("RI")
    if state == "SouthCarolina":
        new_states.append("SC")
    if state == "SouthDakota":
        new_states.append("SD")
    if state == "Tennessee":
        new_states.append("TN")
    if state == "Texas":
        new_states.append("TX")
    if state == "Utah":
        new_states.append("UT")
    if state == "Vermont":
        new_states.append("VT")
    if state == "Virginia":
        new_states.append("VA")
    if state == "Washington":
        new_states.append("WA")
    if state == "WestVirginia":
        new_states.append("WV")
    if state == "Wisconsin":
        new_states.append("WI")
    if state == "Wyoming":
        new_states.append("WY")

In [56]:
# Add the abbreviations to a new column
fec_df["new_state"] = new_states

# Drop the original column
fec_df = fec_df.drop(columns="state")

In [57]:
# Concatenate the state and district columns
fec_df["state_district"] = fec_df["new_state"] + fec_df["district"]

In [58]:
# Drop the district, new_state and end_date columns
fec_df = fec_df.drop(columns="district")
fec_df = fec_df.drop(columns="new_state")
fec_df = fec_df.drop(columns="end_date")

In [59]:
# Move the state_district column to the front of the DataFrame
col_name="state_district"
first_col = fec_df.pop(col_name)
fec_df.insert(0, col_name, first_col)

In [60]:
# Replace 00 values with 01
fec_df = fec_df.replace("00", "01", regex=True)

In [61]:
# Add an election year column
data = 2014

fec_df.insert(1, "election", data)

In [62]:
# Export to csv
fec_df.to_csv("Data/fec_2014.csv", index=False)

In [63]:
# Upload fec_df to sql database
conn = psycopg2.connect("host=projectdatabase.c1ibogbs35nd.us-east-2.rds.amazonaws.com dbname=election_results user=postgres password=postgres_key)
cur = conn.cursor()
with open("Data/fec_2014.csv", "r") as f:
    next(f) # Skip the header row.
    cur.copy_from(f, "fec_2014", sep=",")

conn.commit()