In [1]:
import pandas as pd
import numpy as np

In [2]:
# Dataset description:
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20527%20Cmtes.htm
columns_cmtes527 = ['cycle', 'rpt', 'ein', 'crp527name', 'affiliate', 'ultorg', 
                    'recipcode', 'cmteid', 'cid', 'eccmteid', 'party', 
                    'primcode', 'source', 'ffreq', 'ctype', 'csource', 'viewpt',
                    'comments', 'state']
columns_cmtes527 = dict(enumerate(columns_cmtes527))

In [3]:
df = pd.read_csv('../../data/open_secrets/527/cmtes527.csv', on_bad_lines='skip', sep=',', header=None, na_values=['N/A', 'NA'])
df = df.rename(columns=columns_cmtes527)

In [4]:
# Get rid of the pipes on both sides of the data.
df = df.applymap(lambda x: x.replace('|', '') if type(x) == str else x)

# Other cleanup.
df['cycle'] = df['cycle'].apply(lambda x: int(x))
df['ein'] = df['ein'].apply(lambda x: int(x))

In [5]:
df_recent = df[df['cycle'] >= 2014]
df_recent = df_recent.reset_index(drop=True)

In [6]:
pd.options.display.max_columns = None
df_recent

Unnamed: 0,cycle,rpt,ein,crp527name,affiliate,ultorg,recipcode,cmteid,cid,eccmteid,party,primcode,source,ffreq,ctype,csource,viewpt,comments,state
0,2014,Q114,464747846,Brian D Goldberg for US Senate,,Brian D Goldberg for US Senate,RL,C00558874,N00036203,,R,Z1100,Rept,Q,F,Rept,C,,NJ
1,2014,Q213,462000473,Advocacy for Action Fund,,Advocacy for Action Fund,PI,,,,,J7500,webMS,Q,S,webMS,L,,GA
2,2014,Q214,463575406,Seeking Transparency in Government,,Seeking Transparency in Government,PI,,,,,Y0000,,Q,S,Rept,X,,FL
3,2014,Q414,522257109,International Brotherhood of Electrical Workers,,International Brotherhood of Electrical Workers,PL,C00027342,,,,LC150,PAC,Q,F,Name,L,,DC
4,2014,Q413,273202261,AFT Solidarity,,American Federation of Teachers,PL,,,,,L1300,Name,Q,F,Name,L,,DC
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2295,2014,Q213,453599950,Wright McLeod for Congress,,Wright McLeod for Congress,RC,,N00033605,,R,Z1100,Rept,Q,F,Rept,C,,GA
2296,2014,Q414,464747846,Brian D Goldberg for US Senate,,Brian D Goldberg for US Senate,RL,C00558874,N00036203,,R,Z1100,Rept,Q,F,Rept,C,,NJ
2297,2014,Q214,274900573,Americans for Liberty & Prosperity,,Americans for Liberty & Prosperity,PI,,,,,J1100,Rept,Q,F,Rept,C,,FL
2298,2014,Q314,454308347,Shuttleworth for Congress,Bruce Shuttleworth (D-VA),Shuttleworth for Congress,DL,,N00034161,,D,Z1200,Rept,Q,F,Rept,L,,VA


In [7]:
# Brian D Goldberg's non-mutable candidate id.
# These are his quarterly reports.
df_cand = df[df['cid'] == 'N00036203']
df_cand

Unnamed: 0,cycle,rpt,ein,crp527name,affiliate,ultorg,recipcode,cmteid,cid,eccmteid,party,primcode,source,ffreq,ctype,csource,viewpt,comments,state
13,2014,Q114,464747846,Brian D Goldberg for US Senate,,Brian D Goldberg for US Senate,RL,C00558874,N00036203,,R,Z1100,Rept,Q,F,Rept,C,,NJ
3465,2014,Q214,464747846,Brian D Goldberg for US Senate,,Brian D Goldberg for US Senate,RL,C00558874,N00036203,,R,Z1100,Rept,Q,F,Rept,C,,NJ
14609,2014,Q314,464747846,Brian D Goldberg for US Senate,,Brian D Goldberg for US Senate,RL,C00558874,N00036203,,R,Z1100,Rept,Q,F,Rept,C,,NJ
15068,2014,Q414,464747846,Brian D Goldberg for US Senate,,Brian D Goldberg for US Senate,RL,C00558874,N00036203,,R,Z1100,Rept,Q,F,Rept,C,,NJ


In [8]:
# Brian D Goldberg's election committee id.
# Same results as above.
df[df['cmteid'] == 'C00558874']

Unnamed: 0,cycle,rpt,ein,crp527name,affiliate,ultorg,recipcode,cmteid,cid,eccmteid,party,primcode,source,ffreq,ctype,csource,viewpt,comments,state
13,2014,Q114,464747846,Brian D Goldberg for US Senate,,Brian D Goldberg for US Senate,RL,C00558874,N00036203,,R,Z1100,Rept,Q,F,Rept,C,,NJ
3465,2014,Q214,464747846,Brian D Goldberg for US Senate,,Brian D Goldberg for US Senate,RL,C00558874,N00036203,,R,Z1100,Rept,Q,F,Rept,C,,NJ
14609,2014,Q314,464747846,Brian D Goldberg for US Senate,,Brian D Goldberg for US Senate,RL,C00558874,N00036203,,R,Z1100,Rept,Q,F,Rept,C,,NJ
15068,2014,Q414,464747846,Brian D Goldberg for US Senate,,Brian D Goldberg for US Senate,RL,C00558874,N00036203,,R,Z1100,Rept,Q,F,Rept,C,,NJ


In [9]:
# Andre Carson's election committee id.
# No results
df[df['cmteid'] == 'C00442921']

Unnamed: 0,cycle,rpt,ein,crp527name,affiliate,ultorg,recipcode,cmteid,cid,eccmteid,party,primcode,source,ffreq,ctype,csource,viewpt,comments,state


In [10]:
df[df['cmteid'] == 'C00401224'] # Act Blue PAC

Unnamed: 0,cycle,rpt,ein,crp527name,affiliate,ultorg,recipcode,cmteid,cid,eccmteid,party,primcode,source,ffreq,ctype,csource,viewpt,comments,state
31,2010,Q210,202517748,ActBlue Non-Federal,,ActBlue,PI,C00401224,,,,J1200,WebDA,Q,F,WebDA,L,,MA
646,2012,Q212,202517748,ActBlue Non-Federal,,ActBlue,PI,C00401224,,,,J1200,WebDA,M,F,WebDA,L,,MA
1310,2010,Q410,202517748,ActBlue Non-Federal,,ActBlue,PI,C00401224,,,,J1200,WebDA,M,F,WebDA,L,,MA
2779,2006,Q406,202517748,ActBlue,,ActBlue,PI,C00401224,,,,J1200,WebDA,Q,F,WebDA,L,,MA
2781,2012,Q112,202517748,ActBlue Non-Federal,,ActBlue,PI,C00401224,,,,J1200,WebDA,M,F,WebDA,L,,MA
2840,2008,Q208,202517748,ActBlue,,ActBlue,PI,C00401224,,,,J1200,WebDA,Q,F,WebDA,L,,MA
2879,2014,Q413,202517748,ActBlue Non-Federal,,ActBlue,PI,C00401224,,,,J1200,WebDA,M,F,WebDA,L,,MA
3349,2012,Q412,202517748,ActBlue Non-Federal,,ActBlue,PI,C00401224,,,,J1200,WebDA,M,F,WebDA,L,,MA
4012,2014,Q213,202517748,ActBlue Non-Federal,,ActBlue,PI,C00401224,,,,J1200,WebDA,M,F,WebDA,L,,MA
4522,2010,Q409,202517748,ActBlue Non-Federal,,ActBlue,PI,C00401224,,,,J1200,WebDA,Q,F,WebDA,L,,MA
