In [1]:
import pandas as pd
import numpy as np

In [2]:
# Dataset description:
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20for%20PAC%20to%20Cands%20Data.htm
columns_campfin22_pacs22 = ['cycle', 'fecrecno', 'pacid', 'cid', 'amount', 'date', 'realcode', 
                            'type', 'di', 'feccandid']
columns_campfin22_pacs22 = dict(enumerate(columns_campfin22_pacs22))

In [3]:
df = pd.read_csv('../../data/open_secrets/CampaignFin22/pacs22.csv', on_bad_lines='skip', sep=',', header=None, na_values=['N/A', 'NA'])
df = df.rename(columns=columns_campfin22_pacs22)

In [4]:
# Get rid of the pipes on both sides of the data.
df = df.applymap(lambda x: x.replace('|', '') if type(x) == str else x)

# Other cleanup.
df['cycle'] = df['cycle'].apply(lambda x: int(x))
df['fecrecno'] = df['fecrecno'].apply(lambda x: int(x))
df['amount'] = df['amount'].apply(lambda x: int(x))

df['date'] = pd.to_datetime(df['date'], format='%m/%d/%Y', errors='coerce')
df = df.dropna(subset=['date'])

In [5]:
pd.options.display.max_columns = None
df

Unnamed: 0,cycle,fecrecno,pacid,cid,amount,date,realcode,type,di,feccandid
0,2022,1011020220264924462,C00406124,N00029513,500,2021-10-18,B4000,24K,D,H8IN07184
1,2022,1011320230265253020,C00032979,N00041569,-2500,2022-11-21,Z9600,24K,D,H8IL14174
2,2022,1011320230265254779,C00032979,N00041511,-5000,2022-11-18,Z9600,24K,D,H8MN08043
3,2022,1011320230265254798,C00032979,N00035575,5000,2022-11-03,Z9600,24K,D,H4FL15155
4,2022,1011320230265254809,C00032979,N00041511,4000,2022-11-03,Z9600,24K,D,H8MN08043
...,...,...,...,...,...,...,...,...,...,...
758120,2022,4123120221645576757,C00575126,N00026914,2000,2022-10-11,Z1200,24K,D,H4WI04183
758121,2022,4123120221645576758,C00575126,N00045905,2000,2022-10-06,Z1200,24K,D,H0IN01150
758122,2022,4123120221645576760,C00575126,N00049757,1000,2022-10-14,Z1200,24K,D,H2PA18200
758123,2022,4123120221645576761,C00575126,N00049464,1000,2022-10-19,Z1200,24K,D,H2CA42205


In [6]:
# Andre Carlson's fec id shows us
# his receipts from PACs.
# Note: pacid C00401224 is ActBlue!
df_cand = df[df['feccandid'] == 'H8IN07184']
df_cand

Unnamed: 0,cycle,fecrecno,pacid,cid,amount,date,realcode,type,di,feccandid
0,2022,1011020220264924462,C00406124,N00029513,500,2021-10-18,B4000,24K,D,H8IN07184
610,2022,2022820231732873121,C00406124,N00029513,500,2022-09-02,B4000,24K,D,H8IN07184
4578,2022,4010420231665458816,C00035451,N00029513,5000,2022-08-10,LT100,24K,D,H8IN07184
5274,2022,4010420231665461494,C00068692,N00029513,2000,2022-10-27,T7100,24K,D,H8IN07184
5782,2022,4010520231666136188,C00004036,N00029513,5000,2022-09-21,LG300,24K,D,H8IN07184
...,...,...,...,...,...,...,...,...,...,...
752826,2022,4122120211386317018,C00097568,N00029513,1000,2021-11-22,D3000,24K,D,H8IN07184
753319,2022,4122120221644737448,C00166348,N00029513,-1000,2022-11-01,A1200,24K,D,H8IN07184
753891,2022,4122220211386487663,C00030718,N00029513,2000,2021-11-18,F4200,24K,D,H8IN07184
754518,2022,4122320211386555510,C00325092,N00029513,1000,2021-11-03,D5000,24K,D,H8IN07184


In [7]:
# Andre Carlson's non-mutable candidate id in lieu of feccandid.
df_cand2 = df[df['cid'] == 'N00029513']
df_cand2

Unnamed: 0,cycle,fecrecno,pacid,cid,amount,date,realcode,type,di,feccandid
0,2022,1011020220264924462,C00406124,N00029513,500,2021-10-18,B4000,24K,D,H8IN07184
610,2022,2022820231732873121,C00406124,N00029513,500,2022-09-02,B4000,24K,D,H8IN07184
4578,2022,4010420231665458816,C00035451,N00029513,5000,2022-08-10,LT100,24K,D,H8IN07184
5274,2022,4010420231665461494,C00068692,N00029513,2000,2022-10-27,T7100,24K,D,H8IN07184
5782,2022,4010520231666136188,C00004036,N00029513,5000,2022-09-21,LG300,24K,D,H8IN07184
...,...,...,...,...,...,...,...,...,...,...
752826,2022,4122120211386317018,C00097568,N00029513,1000,2021-11-22,D3000,24K,D,H8IN07184
753319,2022,4122120221644737448,C00166348,N00029513,-1000,2022-11-01,A1200,24K,D,H8IN07184
753891,2022,4122220211386487663,C00030718,N00029513,2000,2021-11-18,F4200,24K,D,H8IN07184
754518,2022,4122320211386555510,C00325092,N00029513,1000,2021-11-03,D5000,24K,D,H8IN07184


In [8]:
# Did he never take money from a pac?
# Brian Goldberg's non-mutable candidate id.
# He took money from these pacs. (No record!)
df_cand3 = df[df['cid'] == 'N00036203']
df_cand3

Unnamed: 0,cycle,fecrecno,pacid,cid,amount,date,realcode,type,di,feccandid
