# Python and R
Notebook Setup

In [None]:
%load_ext rpy2.ipython
%load_ext autoreload
%autoreload 2

%matplotlib inline  
import pythonimports
import psycopg2
import pandas as pd

In [None]:
%%javascript
// Disable auto-scrolling
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

# Load FEC Data

Data is loaded from this FEC link:

https://www.fec.gov/data/browse-data/?tab=bulk-data

In [None]:
conn = psycopg2.connect(
    host="postgresfec.cnvthm1pgcw1.us-east-2.rds.amazonaws.com",
    database="fec",
    user="postgresfec",
    password="postgresfec")

cursor = conn.cursor()
print("Database connected successfully")

In [None]:
print("List of tables in the database; see documentation below:")
print("https://www.fec.gov/data/browse-data/?tab=bulk-data")


In [None]:
cursor.execute(
    """SELECT table_name FROM information_schema.tables
       WHERE table_schema = 'public'""")

for table in cursor.fetchall():
    print(table)


In [None]:
# Planned Parenthood committee transactions 2022
sql = '''SELECT sum(oe.transaction_amt) 
FROM committee_master cm 
LEFT JOIN operating_expenditures oe 
	ON oe.file_year=cm.file_year AND oe.cmte_id=cm.cmte_id
WHERE cm.cmte_nm LIKE '%PLANNED%PARENT%' AND cm.file_year=2022'''
planned_parenthood_operating_expenditures = pd.read_sql(sql, con=conn)
planned_parenthood_operating_expenditures.head(50)

In [None]:
# Planned Parenthood committee transactions 2010-2022
sql = '''SELECT cm.cmte_id, cm.cmte_nm, ct."name", cm.file_year, ct.other_id, sum(ct.transaction_amt)
FROM committee_master cm 
LEFT JOIN committee_transactions ct ON ct.cmte_id=cm.cmte_id AND ct.file_year=cm.file_year
WHERE cm.cmte_nm LIKE '%PLANNED%PARENT%' AND cm.file_year=2022 OR cm.cmte_nm LIKE '%PLANNED%PARENT%' AND cm.file_year=2020 OR cm.cmte_nm LIKE '%PLANNED%PARENT%' AND cm.file_year=2018 OR cm.cmte_nm LIKE '%PLANNED%PARENT%' AND cm.file_year=2016 OR cm.cmte_nm LIKE '%PLANNED%PARENT%' AND cm.file_year=2014 OR cm.cmte_nm LIKE '%PLANNED%PARENT%' AND cm.file_year=2012 OR cm.cmte_nm LIKE '%PLANNED%PARENT%' AND cm.file_year=2010
GROUP BY cm.cmte_id, cm.cmte_nm, ct."name", ct.other_id, cm.file_year
ORDER BY SUM DESC NULLS LAST;'''
planned_parenthood_committee_transactions = pd.read_sql(sql, con=conn)
planned_parenthood_committee_transactions.head(5)

In [None]:
planned_parenthood_committee_transactions.to_csv('data/planned_parenthood_committee_transactions_2010_2022.csv')

In [None]:
# Emily's List/Women Vote committee transactions 2010-2022
sql = '''SELECT cm.cmte_id, cm.cmte_nm, ct."name", cm.file_year, ct.other_id, sum(ct.transaction_amt)
FROM committee_master cm 
LEFT JOIN committee_transactions ct ON ct.cmte_id=cm.cmte_id AND ct.file_year=cm.file_year
WHERE cm.cmte_nm LIKE 'EMILY%LIST%' AND cm.file_year=2022 OR cm.cmte_nm LIKE 'EMILY%LIST%' AND cm.file_year=2020 OR cm.cmte_nm LIKE 'EMILY%LIST%' AND cm.file_year=2018 OR cm.cmte_nm LIKE 'EMILY%LIST%' AND cm.file_year=2016 OR cm.cmte_nm LIKE 'EMILY%LIST%' AND cm.file_year=2014 OR cm.cmte_nm LIKE 'EMILY%LIST%' AND cm.file_year=2012 OR cm.cmte_nm LIKE 'EMILY%LIST%' AND cm.file_year=2010 OR cm.cmte_nm LIKE 'WOMEN%VOTE%' AND cm.file_year=2022 OR cm.cmte_nm LIKE 'WOMEN%VOTE%' AND cm.file_year=2020 OR cm.cmte_nm LIKE 'WOMEN%VOTE%' AND cm.file_year=2018 OR cm.cmte_nm LIKE 'WOMEN%VOTE%' AND cm.file_year=2016 OR cm.cmte_nm LIKE 'WOMEN%VOTE%' AND cm.file_year=2014 OR cm.cmte_nm LIKE 'WOMEN%VOTE%' AND cm.file_year=2012 OR cm.cmte_nm LIKE 'WOMEN%VOTE%' AND cm.file_year=2010
GROUP BY cm.cmte_id, cm.cmte_nm, ct."name", ct.other_id, cm.file_year
ORDER BY SUM DESC NULLS LAST;'''
emilys_list_committee_transactions = pd.read_sql(sql, con=conn)
emilys_list_committee_transactions.head(5)

In [None]:
emilys_list_committee_transactions.to_csv('data/emilys_list_committee_transactions_2010_2022.csv')

In [None]:
# NARAL Pro-Choice committee transactions 2010-2022
sql = '''SELECT cm.cmte_id, cm.cmte_nm, ct."name", cm.file_year, ct.other_id, sum(ct.transaction_amt)
FROM committee_master cm 
LEFT JOIN committee_transactions ct ON ct.cmte_id=cm.cmte_id AND ct.file_year=cm.file_year
WHERE cm.cmte_nm LIKE 'NARAL%PRO%' AND cm.file_year=2022 OR cm.cmte_nm LIKE 'NARAL%PRO%' AND cm.file_year=2020 OR cm.cmte_nm LIKE 'NARAL%PRO%' AND cm.file_year=2018 OR cm.cmte_nm LIKE 'NARAL%PRO%' AND cm.file_year=2016 OR cm.cmte_nm LIKE 'NARAL%PRO%' AND cm.file_year=2014 OR cm.cmte_nm LIKE 'NARAL%PRO%' AND cm.file_year=2012 OR cm.cmte_nm LIKE 'NARAL%PRO%' AND cm.file_year=2010 
GROUP BY cm.cmte_id, cm.cmte_nm, ct."name", ct.other_id, cm.file_year
ORDER BY SUM DESC NULLS LAST;'''
naral_committee_transactions = pd.read_sql(sql, con=conn)
naral_committee_transactions.head(5)

In [None]:
naral_committee_transactions.to_csv('data/naral_committee_transactions_2010_2022.csv')

### Looking at the PAC summaries

In [None]:
# Emily's List/Women Vote PAC summary 2010-2022
sql = '''SELECT * 
FROM pac_summary ps
WHERE ps.cmte_nm LIKE 'EMILY%LIST%' AND ps.file_year=2022 OR ps.cmte_nm LIKE 'WOMEN%VOTE%' AND ps.file_year=2022 OR ps.cmte_nm LIKE 'EMILY%LIST%' AND ps.file_year=2020 OR ps.cmte_nm LIKE 'WOMEN%VOTE%' AND ps.file_year=2020 OR ps.cmte_nm LIKE 'EMILY%LIST%' AND ps.file_year=2018 OR ps.cmte_nm LIKE 'WOMEN%VOTE%' AND ps.file_year=2018 OR ps.cmte_nm LIKE 'EMILY%LIST%' AND ps.file_year=2016 OR ps.cmte_nm LIKE 'WOMEN%VOTE%' AND ps.file_year=2016 OR ps.cmte_nm LIKE 'EMILY%LIST%' AND ps.file_year=2014 OR ps.cmte_nm LIKE 'WOMEN%VOTE%' AND ps.file_year=2014 OR ps.cmte_nm LIKE 'EMILY%LIST%' AND ps.file_year=2012 OR ps.cmte_nm LIKE 'WOMEN%VOTE%' AND ps.file_year=2012 OR ps.cmte_nm LIKE 'EMILY%LIST%' AND ps.file_year=2010 OR ps.cmte_nm LIKE 'WOMEN%VOTE%' AND ps.file_year=2010'''
emilys_list_pac_summary = pd.read_sql(sql, con=conn)
emilys_list_pac_summary.head(5)

In [None]:
emilys_list_pac_summary.to_csv('data/emilys_list_pac_summary_2010_2022.csv')

In [None]:
# Planned Parenthood PAC summary 2010-2022
sql = '''SELECT * 
FROM pac_summary ps
WHERE ps.cmte_nm LIKE '%PLANNED%PARENT%' AND ps.file_year=2022 OR ps.cmte_nm LIKE '%PLANNED%PARENT%' AND ps.file_year=2020 OR ps.cmte_nm LIKE '%PLANNED%PARENT%' AND ps.file_year=2018 OR ps.cmte_nm LIKE '%PLANNED%PARENT%' AND ps.file_year=2016 OR ps.cmte_nm LIKE '%PLANNED%PARENT%' AND ps.file_year=2014 OR ps.cmte_nm LIKE '%PLANNED%PARENT%' AND ps.file_year=2012 OR ps.cmte_nm LIKE '%PLANNED%PARENT%' AND ps.file_year=2010'''
planned_parenthood_pac_summary = pd.read_sql(sql, con=conn)
planned_parenthood_pac_summary.head(5)

In [None]:
planned_parenthood_pac_summary.to_csv('data/planned_parenthood_pac_summary_2010_2022.csv')

In [None]:
# Naral Pro-Choice summary 2010-2022
sql = '''SELECT * 
FROM pac_summary ps
WHERE ps.cmte_nm LIKE 'NARAL%PRO%' AND ps.file_year=2022 OR ps.cmte_nm LIKE 'NARAL%PRO%' AND ps.file_year=2020 OR ps.cmte_nm LIKE 'NARAL%PRO%' AND ps.file_year=2018 OR ps.cmte_nm LIKE 'NARAL%PRO%' AND ps.file_year=2016 OR ps.cmte_nm LIKE 'NARAL%PRO%' AND ps.file_year=2014 OR ps.cmte_nm LIKE 'NARAL%PRO%' AND ps.file_year=2012 OR ps.cmte_nm LIKE 'NARAL%PRO%' AND ps.file_year=2010'''
naral_pac_summary = pd.read_sql(sql, con=conn)
naral_pac_summary.head(5)

In [None]:
naral_pac_summary.to_csv('data/naral_pac_summary_2010_2022.csv')

# ggplot example

In [None]:
%%R

require(tidyverse)
require(ggbeeswarm)
require(ggrepel)

In [None]:
%%R -i house_senate_current_campaigns -w 1000

df <- house_senate_current_campaigns

ggplot(df) +
    aes(x=ttl_disb, y=NA, alpha=.005, label=cand_name) +
    geom_quasirandom(size=4) +
    geom_label_repel(data=df %>% filter(ttl_disb > 1e+07)) 

In [None]:
house_senate_current_campaigns.sort_values(by='ttl_disb', ascending=False)

# close database connections

In [None]:
# Close the connection
cursor.close()
conn.close()