# SI 608 Project – Workspace #
#### General scratchpad workspace that preloads all the dataframes. ####
See <code>./modules</code> to review how libraries are installed and imported, as well as where the data is loaded, cleaned, and formatted. This is only here as a helpful tool, make a copy and do whatever you'd like. Or don't use this at all if that's preferable.

[OpenSecrets Data Dictionary Index](../../docs/open_source_data_dictionary.md)
<br><small><em>(View the index with markdown preview)</em></small>

### Initialize
Init file contains helper functions used throughout the project.

In [3]:
%run modules/init.ipynb

Initializing project...
pandas is already installed.
matplotlib is already installed.
networkx is already installed.
numpy is already installed.
...initialization complete.


#### Load datasets ####

This module provides a single function for all of the *contribution* data from OpenSecrets.

In [5]:
%run modules/data.ipynb

Loading data module...
...data module loaded.


In [6]:
# OpenSecrets Data Definition: 527 Committees
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20527%20Cmtes.htm
columns_cmtes527 = ['cycle', 'rpt', 'ein', 'crp527name', 'affiliate', 'ultorg', 
                    'recipcode', 'cmteid', 'cid', 'eccmteid', 'party', 
                    'primcode', 'source', 'ffreq', 'ctype', 'csource', 'viewpt',
                    'comments', 'state']

df_cmtes527 = create_dataframe('../../data/open_secrets/527/cmtes527.csv', columns_cmtes527)

Creating cmtes527 dataframe...
cmtes527 dataframe created...


In [7]:
df_cmtes527.head()

Unnamed: 0,cycle_cmtes527,rpt_cmtes527,ein_cmtes527,crp527name_cmtes527,affiliate_cmtes527,ultorg_cmtes527,recipcode_cmtes527,cmteid_cmtes527,cid_cmtes527,eccmteid_cmtes527,party_cmtes527,primcode_cmtes527,source_cmtes527,ffreq_cmtes527,ctype_cmtes527,csource_cmtes527,viewpt_cmtes527,comments_cmtes527,state_cmtes527
0,2002,Q302,861006189,American Electronics Assn,,American Electronics Assn,PB,,,,,C5000,WebPN,Q,F,,N,,AZ
1,2008,Q308,262108560,California 2008 GOP Delegation Corporate,,California 2008 GOP Delegation,RP,,,,R,Z5100,Name,Q,F,,C,,CA
2,2000,Q400,912101097,Alabama League of Environmental Action,,Alabama League of Environmental Action,PI,,,,,JE300,Name,Q,S,Name,L,,AL
3,2012,Q412,522257109,International Brotherhood of Electrical Workers,,International Brotherhood of Electrical Workers,PL,C00027342,,,,LC150,PAC,Q,F,Name,L,,DC
4,2008,Q407,900049259,Pacific Northwest Regional Council of Carpenters,,Carpenters & Joiners Union,PL,C00001016,,C70004205,,LB100,PAC,Q,F,Expen,L,,WA


In [8]:
# OpenSecrets Data Dictionary 527 Expenditure Data - from IRS Form 8872B
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20527%20Expenditures.htm
columns_expends527 = ['rpt', 'formid', 'schbid', 'orgname', 'ein', 'recipient', 
                    'recipientcrp', 'amount', 'date', 'expcode', 'source', 
                    'purpose', 'addr1', 'addr2', 'city', 'state', 'zip',
                    'employer', 'occupation']

df_expends527 = create_dataframe('../../data/open_secrets/527/expends527.csv', columns_expends527)

Creating expends527 dataframe...
expends527 dataframe created...


In [9]:
df_expends527.head()

Unnamed: 0,rpt_expends527,formid_expends527,schbid_expends527,orgname_expends527,ein_expends527,recipient_expends527,recipientcrp_expends527,amount_expends527,date_expends527,expcode_expends527,source_expends527,purpose_expends527,addr1_expends527,addr2_expends527,city_expends527,state_expends527,zip_expends527,employer_expends527,occupation_expends527
0,Q210,9595787,2016057,Republican State Leadership Cmte,50532524,VERIZON,Verizon Communications,125,2010-04-16,A70,@new,TELEPHONE,PO BOX 660720,,DALLAS,TX,75266,,
1,Q210,9595787,2016059,Republican State Leadership Cmte,50532524,VERIZON WIRELESS,Verizon Wireless,141,2010-04-09,A70,@new,CELL PHONE,PO BOX 25505,,LEHIGH VALLEY,PA,18002,,
2,Q210,9595791,2016223,GOPAC,521237780,ADP,Automatic Data Processing Inc,414,2010-04-09,W10,@new,PAYROLL TAXES,8094 SAND PIPER CIRCLE,,WHITE MARSH,MD,21236,,
3,Q210,9595791,2016225,GOPAC,521237780,ADP,Automatic Data Processing Inc,78,2010-04-23,W10,@new,PAYROLL SERVICES,8094 SAND PIPER CIRCLE,,WHITE MARSH,MD,21236,,
4,Q210,9595791,2016228,GOPAC,521237780,AMERICAN EXPRESS,American Express,1,2010-04-26,U00,tempU,AMEX PYMT $6634.00 DETAIL FOLLOWS,P.O. BOX 1270,,NEWARK,NJ,7101,,


In [10]:
# OpenSecrets Data Dictionary 527 Contribution Data - from IRS Form 8872A
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20527%20Receipts.htm
columns_rcpts527 = ['id', 'rpt', 'formid', 'schaid', 'contribid', 'contrib', 
                    'amount', 'date', 'orgname', 'ultorg', 'realcode', 
                    'recipid', 'recipcode', 'party', 'recipient', 'city', 'state',
                    'zip', 'zip4', 'pmsa', 'employer', 'occupation', 'ytd', 'gender', 'source']

df_rcpts527 = create_dataframe('../../data/open_secrets/527/rcpts527.csv', columns_rcpts527)

Creating rcpts527 dataframe...
rcpts527 dataframe created...


In [11]:
df_rcpts527.head()

Unnamed: 0,id_rcpts527,rpt_rcpts527,formid_rcpts527,schaid_rcpts527,contribid_rcpts527,contrib_rcpts527,amount_rcpts527,date_rcpts527,orgname_rcpts527,ultorg_rcpts527,realcode_rcpts527,recipid_rcpts527,recipcode_rcpts527,party_rcpts527,recipient_rcpts527,city_rcpts527,state_rcpts527,zip_rcpts527,zip4_rcpts527,pmsa_rcpts527,employer_rcpts527,occupation_rcpts527,ytd_rcpts527,gender_rcpts527,source_rcpts527
0,981,Q210,9595837,2017490,,WEST LA DEMOCRATIC CLUB,1,2010-04-18,West La Democratic Club,,Z9600,270160261,PI,,ActBlue Technical Services,BURBANK,CA,91502,,4480,,,473,,Rept
1,982,Q210,9595837,2017492,,WINOGRAD FOR CONGRESS 2010,259,2010-04-18,Winograd For Congress 2010,,Z9600,270160261,PI,,ActBlue Technical Services,BURBANK,CA,91502,,4480,,,1049,,Rept
2,983,Q210,9595837,2017387,,FDL ACTION PAC,4,2010-04-18,Fdl Action Pac,,Z9600,270160261,PI,,ActBlue Technical Services,WASHINGTON,DC,20016,,8840,,,1524,,Rept
3,984,Q210,9595837,2017390,,FRANKEN MVPS,190,2010-04-18,Franken Mvps,,Z9600,270160261,PI,,ActBlue Technical Services,MINNEAPOLIS,MN,55458,,5120,,,662,,Rept
4,985,Q210,9595837,2017393,,FRIENDS OF BRENT BARTON,49,2010-04-18,Friends Of Brent Barton,,Z9600,270160261,PI,,ActBlue Technical Services,PORTLAND,OR,97214,,6440,,,241,,Rept


In [216]:
# OpenSecrets Data Definition: Candidates
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20Candidates%20Data.htm
columns_cands22 = ['cycle', 'feccandid', 'cid', 'firstlastp', 'party', 'distidrunfor', 
                    'distidcurr', 'currcand', 'cyclecand', 'crpico', 'recipcode', 
                    'nopacs']

df_cands22 = create_dataframe('../../data/open_secrets/CampaignFin22/cands22.csv', columns_cands22)

# Remove party labels from names: '3', 'R', 'D', 'I', 'L', 'U', 'i'
df_cands22['firstlast_cands22'] = df_cands22['firstlastp_cands22'].apply(
    lambda x: x.replace(" (3)", "").replace(" (R)", "").replace(" (D)", "").replace(" (I)", "").replace(" (L)", "").replace(" (U)", "").replace(" (i)", "") if isinstance(x, str) else x
)

Creating cands22 dataframe...
cands22 dataframe created...


In [218]:
df_cands22.head()

Unnamed: 0,cycle_cands22,feccandid_cands22,cid_cands22,firstlastp_cands22,party_cands22,distidrunfor_cands22,distidcurr_cands22,currcand_cands22,cyclecand_cands22,crpico_cands22,recipcode_cands22,nopacs_cands22,firstlast_cands22
0,2022,H0AK00105,N00039029,Thomas Lamb (3),3,AK01,,,,,3N,,Thomas Lamb
1,2022,H0AL01055,N00044245,Jerry Carl (R),R,AL01,AL01,Y,Y,I,RW,,Jerry Carl
2,2022,H0AL01063,N00044288,Wes Lambert (R),R,AL01,,,,,RN,,Wes Lambert
3,2022,H0AL01097,N00044750,James Averhart (D),D,AL01,,,,,DN,,James Averhart
4,2022,H0AL02087,N00030768,Martha Roby (R),R,AL02,,,,,RN,,Martha Roby


In [358]:
# OpenSecrets Table Definition: Committee table
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20for%20Cmtes.htm
columns_cmtes22 = ['cycle', 'cmteid', 'pacshort', 'affiliate', 'ultorg', 'recipid', 
                    'recipcode', 'feccandid', 'party', 'primcode', 'source', 'sensitive',
                    'foreign', 'active']

df_cmtes22 = create_dataframe('../../data/open_secrets/CampaignFin22/cmtes22.csv', columns_cmtes22)

Creating cmtes22 dataframe...
cmtes22 dataframe created...


In [360]:
df_cmtes22.head()

Unnamed: 0,cycle_cmtes22,cmteid_cmtes22,pacshort_cmtes22,affiliate_cmtes22,ultorg_cmtes22,recipid_cmtes22,recipcode_cmtes22,feccandid_cmtes22,party_cmtes22,primcode_cmtes22,source_cmtes22,sensitive_cmtes22,foreign_cmtes22,active_cmtes22
0,2022,C00000059,Hallmark Cards,,Hallmark Cards,C00000059,PB,,,C1400,WAA21,Y,0,1
1,2022,C00000422,American Medical Assn,American Medical Assn,American Medical Assn,C00000422,PB,,,H1100,WAA21,n,0,1
2,2022,C00000489,Teamsters Local 886,Teamsters Union,Teamsters Union,C00000489,PL,,,LT300,WAA21,n,0,1
3,2022,C00000547,KANSAS MEDICAL SOCIETY POLITICAL ACTION COMMIT...,,,C00000547,,,U,,,,0,0
4,2022,C00000729,American Dental Assn,American Dental Assn,American Dental Assn,C00000729,PB,,,H1400,WAA21,n,0,1


In [16]:
# OpenSecrets Data Definition for PAC to PAC Data (Pac_other table)
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20PAC%20to%20PAC%20Data.htm
columns_pac_other22 = ['cycle', 'fecrecno', 'filerid', 'donorcmte', 'contriblendtrans', 'city', 'state', 
                            'zip', 'fecoccemp', 'primcode', 'date', 'amount', 'recipid', 'party', 'otherid',
                            'recipcode', 'recipprimcode', 'amend', 'report', 'pg', 'microfilm', 'type',
                            'realcode', 'source']

df_pac_other22 = create_dataframe('../../data/open_secrets/CampaignFin22/pac_other22.csv', columns_pac_other22)

Creating pac_other22 dataframe...
pac_other22 dataframe created...


In [17]:
df_pac_other22.head()

Unnamed: 0,cycle_pac_other22,fecrecno_pac_other22,filerid_pac_other22,donorcmte_pac_other22,contriblendtrans_pac_other22,city_pac_other22,state_pac_other22,zip_pac_other22,fecoccemp_pac_other22,primcode_pac_other22,date_pac_other22,amount_pac_other22,recipid_pac_other22,party_pac_other22,otherid_pac_other22,recipcode_pac_other22,recipprimcode_pac_other22,amend_pac_other22,report_pac_other22,pg_pac_other22,microfilm_pac_other22,type_pac_other22,realcode_pac_other22,source_pac_other22
0,2022,1011320230265253021,C00032979,Teamsters Union,BOHANNAN VICTORY FUND,IOWA CITY,IA,52245,,LT300,2022-11-15,-1000,C00811059,D,C00811059,DP,Z4200,A,30G,P,202212089550408190,24K,LT300,PAC
1,2022,1011320230265254787,C00032979,Teamsters Union,WARNOCK VICTORY FUND,WASHINGTON,DC,20003,,LT300,2022-11-22,15000,C00740597,D,C00740597,DP,Z4200,A,30G,P,202212089550408210,24K,LT300,PAC
2,2022,1011320230265254797,C00032979,Teamsters Union,BLUE POWER PAC,COLLINSWOOD,NJ,8108,,LT300,2022-11-03,5000,C00575894,D,C00575894,PI,J2100,A,30G,P,202212089550408207,24K,LT300,PAC
3,2022,1011320230265254799,C00032979,Teamsters Union,COMMONWEALTH PAC,MERRIFIELD,VA,22116,,LT300,2022-11-03,5000,C00498931,D,C00498931,PI,J2100,A,30G,P,202212089550408208,24K,LT300,PAC
4,2022,1011320230265254826,C00032979,Teamsters Union,RHODE ISLAND DEMOCRATIC STATE CMTE,PROVIDENCE,RI,92940,,LT300,2022-11-03,5000,C00136200,D,C00136200,DP,Z5200,A,30G,P,202212089550408209,24K,LT300,PAC


In [18]:
# OpenSecrets Data Definition: PAC table (PACs to Candidates)
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20for%20PAC%20to%20Cands%20Data.htm
columns_pacs22 = ['cycle', 'fecrecno', 'pacid', 'cid', 'amount', 'date', 'realcode', 
                            'type', 'di', 'feccandid']

df_pacs22 = create_dataframe('../../data/open_secrets/CampaignFin22/pacs22.csv', columns_pacs22)

Creating pacs22 dataframe...
pacs22 dataframe created...


In [19]:
df_pacs22.head()

Unnamed: 0,cycle_pacs22,fecrecno_pacs22,pacid_pacs22,cid_pacs22,amount_pacs22,date_pacs22,realcode_pacs22,type_pacs22,di_pacs22,feccandid_pacs22
0,2022,1011020220264924462,C00406124,N00029513,500,2021-10-18,B4000,24K,D,H8IN07184
1,2022,1011320230265253020,C00032979,N00041569,-2500,2022-11-21,Z9600,24K,D,H8IL14174
2,2022,1011320230265254779,C00032979,N00041511,-5000,2022-11-18,Z9600,24K,D,H8MN08043
3,2022,1011320230265254798,C00032979,N00035575,5000,2022-11-03,Z9600,24K,D,H4FL15155
4,2022,1011320230265254809,C00032979,N00041511,4000,2022-11-03,Z9600,24K,D,H8MN08043


In [20]:
# OpenSecrets Data Definition: Individual Contribution Data
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20for%20Individual%20Contribution%20Data.htm
columns_indivs22 = ['cycle', 'fectransid', 'contribid', 'contrib_last', 'contrib_first', 'recipid', 'orgname', 
                    'ultorg', 'realcode', 'date', 'amount', 'street', 'city', 'state',
                    'zip', 'recipcode', 'type', 'cmteid', 'otherid', 'gender', 'microfilm',
                    'occupation', 'employer', 'source']

# This dataset is huge, and crashes my computer. For now, limiting to 5000000 rows
# until a better way is found to trim off this dataset to only the last cycle or two
df_indivs22 = create_dataframe('../../data/open_secrets/CampaignFin22/indivs22.csv', columns_indivs22, nrows=5000000)

Creating indivs22 dataframe...
indivs22 dataframe created...


In [21]:
df_indivs22.head()

Unnamed: 0,cycle_indivs22,fectransid_indivs22,contribid_indivs22,contrib_last_indivs22,contrib_first_indivs22,recipid_indivs22,orgname_indivs22,ultorg_indivs22,realcode_indivs22,date_indivs22,amount_indivs22,street_indivs22,city_indivs22,state_indivs22,zip_indivs22,recipcode_indivs22,type_indivs22,cmteid_indivs22,otherid_indivs22,gender_indivs22,microfilm_indivs22,occupation_indivs22,employer_indivs22,source_indivs22
0,2022,4061520221505841534,r0014256510,DILLARD,DANIEL,C00000935,[24T Contribution],,Z9500,2021-08-29,35,,HIDALGO,TX,78557,DP,24T,C00401224,C00000935,M,202205149504021121,NOT EMPLOYED,NOT EMPLOYED,Rept
1,2022,4061520221505841535,r0015503614,WHITE,SCOTTO,C00633404,[24T Contribution],,Z9500,2021-08-29,15,,YONKERS,NY,10701,PI,24T,C00401224,C00633404,M,202205149507911003,NOT EMPLOYED,NOT EMPLOYED,Rept
2,2022,4061520221505841536,p0003861308,DOMINGUEZ,SALLIE,C00632398,[24T Contribution],,Z9500,2021-08-29,10,,CAMPBELL,CA,95011,PI,24T,C00401224,C00632398,F,202205149504046504,HEALTHCARE,MCDONALD FAMILY DENTAL,Rept
3,2022,4061520221505841543,p0003689535,BRADLEY,JANNETTE,C00678839,[24T Contribution],,Z9500,2021-08-29,26,,OAK LAWN,IL,60453,PI,24T,C00401224,C00678839,F,202205149503347140,LIBRARIAN,MEDICAL CENTER,Rept
4,2022,4061520221505841545,q0002181378,BHATT,TANYA,C00580068,[24T Contribution],,Z9500,2021-08-29,50,,MIAMI BEACH,FL,33141,PI,24T,C00401224,C00580068,F,202205149503225635,MKTG,FRYE FINANCIAL,Rept


In [22]:
# OpenSecrets Data Dictionary for Expenditure Data - from FEC electronic filings
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20Expenditures.htm
columns_expends22 = ['cycle', 'id', 'transid', 'crpfilerid', 
                     'recipcode', 'pacshort', 'crprecipname', 
                     'expcode', 'amount', 'date', 'city', 'state', 
                     'zip', 'cmteid_ef', 'candid', 'type',
                     'descrip', 'pg', 'elecother', 'enttype',
                     'source']

df_expends22 = create_dataframe('../../data/open_secrets/Expend22/expends22.csv', columns_expends22, nrows=1000000)

Creating expends22 dataframe...
expends22 dataframe created...


In [23]:
df_expends22.head()

Unnamed: 0,cycle_expends22,id_expends22,transid_expends22,crpfilerid_expends22,recipcode_expends22,pacshort_expends22,crprecipname_expends22,expcode_expends22,amount_expends22,date_expends22,city_expends22,state_expends22,zip_expends22,cmteid_ef_expends22,candid_expends22,type_expends22,descrip_expends22,pg_expends22,elecother_expends22,enttype_expends22,source_expends22
0,2022,1,500252553,C00143230,DP,New York State Democratic Cmte,ActBlue,F40,1,2021-05-01,Somerville,MA,21443132,,,,Credit Card Processing Fee,,,ORG,@auto
1,2022,2,500252556,C00143230,DP,New York State Democratic Cmte,ActBlue,F40,10,2021-05-02,Somerville,MA,21443132,,,,Credit Card Processing Fee,,,ORG,@auto
2,2022,3,500252557,C00143230,DP,New York State Democratic Cmte,ActBlue,F40,4,2021-05-02,Somerville,MA,21443132,,,,Credit Card Processing Fee,,,ORG,@auto
3,2022,4,500252608,C00143230,DP,New York State Democratic Cmte,ActBlue,F40,6,2021-05-09,Somerville,MA,21443132,,,,Credit Card Processing Fee,,,ORG,@auto
4,2022,5,500252609,C00143230,DP,New York State Democratic Cmte,ActBlue,F40,1,2021-05-16,Somerville,MA,21443132,,,,Credit Card Processing Fee,,,ORG,@auto


In [24]:
# OpenSecrets Data Definition for Lobbying Data: Lobby agencies
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20lob_agency.htm
columns_lob_agency = ['uniqid', 'agencyid', 'agency']

df_lob_agency = create_dataframe('../../data/open_secrets/Lobby/lob_agency.csv', columns_lob_agency)

Creating lob_agency dataframe...
lob_agency dataframe created...


In [25]:
df_lob_agency.head()

Unnamed: 0,uniqid_lob_agency,agencyid_lob_agency,agency_lob_agency
0,BB7367A7-7B60-4DED-AA2D-A94771A9EBE8,1,US Senate
1,BB7367A7-7B60-4DED-AA2D-A94771A9EBE8,2,US House of Representatives
2,04366C6F-B0CE-4C28-87BF-EE1CC8A9BB41,2,US House of Representatives
3,04366C6F-B0CE-4C28-87BF-EE1CC8A9BB41,34,Dept of Health & Human Services
4,04366C6F-B0CE-4C28-87BF-EE1CC8A9BB41,134,Centers for Disease Control & Prevention


In [26]:
# OpenSecrets Data Definition for Lobbying Data: Lobby bills
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20lob_bills.htm
columns_lob_bills = ['b_id', 'si_id', 'congno', 'bill_name']

df_lob_bills = create_dataframe('../../data/open_secrets/Lobby/lob_bills.csv', columns_lob_bills)

Creating lob_bills dataframe...
lob_bills dataframe created...


In [27]:
df_lob_bills.head()

Unnamed: 0,b_id_lob_bills,si_id_lob_bills,congno_lob_bills,bill_name_lob_bills
0,s1461-117,2820018,117,S.1461
1,hr463-117,2820018,117,H.R.463
2,s910-116,2820035,116,S.910
3,hr2405-116,2820035,116,H.R.2405
4,hr3816-117,2820067,117,H.R.3816


In [28]:
# OpenSecrets Data Definition for Lobbying Data: Lobby industries
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20lob_indus.htm
columns_lob_indus = ['client', 'sub', 'total', 'year', 'catcode']

df_lob_indus = create_dataframe('../../data/open_secrets/Lobby/lob_indus.csv', columns_lob_indus)

Creating lob_indus dataframe...
lob_indus dataframe created...


In [29]:
df_lob_indus.head()

Unnamed: 0,client_lob_indus,sub_lob_indus,total_lob_indus,year_lob_indus,catcode_lob_indus
0,National Assn for County Community & Econ Develop,National Assn for County Community & Econ Develop,0,1998,X3000
1,Fox Valley Technical College,Fox Valley Technical College,80000,2015,H5200
2,Employers Cncl on Flexible Compensation,Employers Cncl on Flexible Compensation,580000,2001,J9000
3,American Federation of TV/Radio Artists,American Federation of TV/Radio Artists,30000,2011,LG400
4,Visiting Nurse Assn of America,Visiting Nurse Assn/Manchester & S NH,40000,2005,H3100


In [30]:
# OpenSecrets Data Definition for Lobbying Data: Lobby issues
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20lob_issues.htm
columns_lob_issue = ['si_id', 'uniqid', 'issueid', 'issue', 'specificissue', 5, 6, 7, 8, 9, 10, 'year']

df_lob_issue = create_dataframe('../../data/open_secrets/Lobby/lob_issue.csv', columns_lob_issue)

Creating lob_issue dataframe...
lob_issue dataframe created...


In [31]:
df_lob_issue.head()

Unnamed: 0,si_id_lob_issue,uniqid_lob_issue,issueid_lob_issue,issue_lob_issue,specificissue_lob_issue,5_lob_issue,6_lob_issue,7_lob_issue,8_lob_issue,9_lob_issue,10_lob_issue,year_lob_issue
0,3001624,02e92bd6-0159-495e-9d00-8a490a0be8be,DIS,Disaster & Emergency Planning,Issues affecting manufacturer of railroad and ...,environmental,transportation,trade,labor,shipping,disaster planning and tax issues.,2022
1,3001625,02e92bd6-0159-495e-9d00-8a490a0be8be,ENV,Environment & Superfund,Issues affecting manufacturer of railroad and ...,environmental,transportation,trade,labor,shipping,disaster planning and tax issues.,2022
2,3001627,02e92bd6-0159-495e-9d00-8a490a0be8be,ROD,Roads & Highways,Issues affecting manufacturer of railroad and ...,environmental,transportation,trade,labor,shipping,disaster planning and tax issues.,2022
3,3001628,02e92bd6-0159-495e-9d00-8a490a0be8be,RRR,Railroads,Issues affecting manufacturer of railroad and ...,environmental,transportation,trade,labor,shipping,disaster planning and tax issues.,2022
4,3001629,02e92bd6-0159-495e-9d00-8a490a0be8be,TAX,Taxes,Issues affecting manufacturer of railroad and ...,environmental,transportation,trade,labor,shipping,disaster planning and tax issues.,2022


In [756]:
# OpenSecrets Data Definition for Lobbying Data: Lobby issues (no specific issue)
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20lob_issues.htm
columns_lob_issue_no_specific = ['si_id', 'uniqid', 'issueid', 'issue', 'year']

df_lob_issue_no_specific = create_dataframe('../../data/open_secrets/Lobby/lob_issue_NoSpecficIssue.csv', columns_lob_issue_no_specific)

Creating lob_issue_NoSpecficIssue dataframe...
lob_issue_NoSpecficIssue dataframe created...


In [757]:
df_lob_issue_no_specific.head()

Unnamed: 0,si_id_lob_issue_NoSpecficIssue,uniqid_lob_issue_NoSpecficIssue,issueid_lob_issue_NoSpecficIssue,issue_lob_issue_NoSpecficIssue,year_lob_issue_NoSpecficIssue
0,3001624,02e92bd6-0159-495e-9d00-8a490a0be8be,DIS,Disaster & Emergency Planning,2022
1,3001625,02e92bd6-0159-495e-9d00-8a490a0be8be,ENV,Environment & Superfund,2022
2,3001627,02e92bd6-0159-495e-9d00-8a490a0be8be,ROD,Roads & Highways,2022
3,3001628,02e92bd6-0159-495e-9d00-8a490a0be8be,RRR,Railroads,2022
4,3001629,02e92bd6-0159-495e-9d00-8a490a0be8be,TAX,Taxes,2022


In [32]:
# OpenSecrets Data Definitions for Lobbying Data: Lobbying
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20lob_lobbying.htm
columns_lob_lobbying = ['uniqid','registrant_raw','registrant','isfirm','client_raw','client','ultorg','amount',
                        'catcode','source','self','includensfs','use',
                       'ind', 'year', 'type', 'typelong', 'affiliate']

df_lob_lobbying = create_dataframe('../../data/open_secrets/Lobby/lob_lobbying.csv', columns_lob_lobbying, nrows=1000000)

Creating lob_lobbying dataframe...
lob_lobbying dataframe created...


In [33]:
df_lob_lobbying.head()

Unnamed: 0,uniqid_lob_lobbying,registrant_raw_lob_lobbying,registrant_lob_lobbying,isfirm_lob_lobbying,client_raw_lob_lobbying,client_lob_lobbying,ultorg_lob_lobbying,amount_lob_lobbying,catcode_lob_lobbying,source_lob_lobbying,self_lob_lobbying,includensfs_lob_lobbying,use_lob_lobbying,ind_lob_lobbying,year_lob_lobbying,type_lob_lobbying,typelong_lob_lobbying,affiliate_lob_lobbying
0,82c5f661-a637-45ad-a3a6-b5ba18cf8962,ASTRAZENECA PHARMACEUTICALS LP,AstraZeneca Pharmaceuticals,,ASTRAZENECA PHARMACEUTICALS LP,AstraZeneca Pharmaceuticals,AstraZeneca PLC,1370000,H4300,pac,x,,y,y,2021,q4a,FOURTH QUARTER AMENDMENT,
1,85b111b1-5d2e-4107-bc24-0921316e29a5,ECHELON GOVERNMENT AFFAIRS,Echelon Government Affairs,y,THE ALBERS GROUP,Albers Group,Albers Group,10000,Y4000,,n,,y,y,2021,q4,FOURTH QUARTER REPORT,
2,87822a14-12de-478c-a34d-010fa503e539,WTA -- ADVOCATES FOR RURAL BROADBAND,Western Telecommunications Alliance,,WTA -- ADVOCATES FOR RURAL BROADBAND,Western Telecommunications Alliance,Western Telecommunications Alliance,75000,C4000,pac,p,,y,y,2021,q4,FOURTH QUARTER REPORT,
3,87ff989d-9d12-4fef-84ef-ab69cd616894,FINANCIAL EXECUTIVES INTERNATIONAL,Financial Executives International,,FINANCIAL EXECUTIVES INTERNATIONAL,Financial Executives International,Financial Executives International,21650,F5000,Hvr06,p,,y,y,2021,q4,FOURTH QUARTER REPORT,
4,88bac485-00bb-4915-8ca5-8d0ec589ec28,CAPITOL HILL CONSULTING GROUP,Capitol Hill Consulting Group,y,ASSOCIATION FOR ACCESSIBLE MEDICINES (FORMERLY...,Association for Accessible Medicines,Association for Accessible Medicines,40000,H4300,pac,i,,y,,2021,q4,FOURTH QUARTER REPORT,


In [34]:
df_lob_lobbying.head()

Unnamed: 0,uniqid_lob_lobbying,registrant_raw_lob_lobbying,registrant_lob_lobbying,isfirm_lob_lobbying,client_raw_lob_lobbying,client_lob_lobbying,ultorg_lob_lobbying,amount_lob_lobbying,catcode_lob_lobbying,source_lob_lobbying,self_lob_lobbying,includensfs_lob_lobbying,use_lob_lobbying,ind_lob_lobbying,year_lob_lobbying,type_lob_lobbying,typelong_lob_lobbying,affiliate_lob_lobbying
0,82c5f661-a637-45ad-a3a6-b5ba18cf8962,ASTRAZENECA PHARMACEUTICALS LP,AstraZeneca Pharmaceuticals,,ASTRAZENECA PHARMACEUTICALS LP,AstraZeneca Pharmaceuticals,AstraZeneca PLC,1370000,H4300,pac,x,,y,y,2021,q4a,FOURTH QUARTER AMENDMENT,
1,85b111b1-5d2e-4107-bc24-0921316e29a5,ECHELON GOVERNMENT AFFAIRS,Echelon Government Affairs,y,THE ALBERS GROUP,Albers Group,Albers Group,10000,Y4000,,n,,y,y,2021,q4,FOURTH QUARTER REPORT,
2,87822a14-12de-478c-a34d-010fa503e539,WTA -- ADVOCATES FOR RURAL BROADBAND,Western Telecommunications Alliance,,WTA -- ADVOCATES FOR RURAL BROADBAND,Western Telecommunications Alliance,Western Telecommunications Alliance,75000,C4000,pac,p,,y,y,2021,q4,FOURTH QUARTER REPORT,
3,87ff989d-9d12-4fef-84ef-ab69cd616894,FINANCIAL EXECUTIVES INTERNATIONAL,Financial Executives International,,FINANCIAL EXECUTIVES INTERNATIONAL,Financial Executives International,Financial Executives International,21650,F5000,Hvr06,p,,y,y,2021,q4,FOURTH QUARTER REPORT,
4,88bac485-00bb-4915-8ca5-8d0ec589ec28,CAPITOL HILL CONSULTING GROUP,Capitol Hill Consulting Group,y,ASSOCIATION FOR ACCESSIBLE MEDICINES (FORMERLY...,Association for Accessible Medicines,Association for Accessible Medicines,40000,H4300,pac,i,,y,,2021,q4,FOURTH QUARTER REPORT,


In [35]:
# OpenSecrets Data Definition for Lobbyists
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20lob_lobbyists.htm
columns_lob_lobbyist = ['uniqid', 'lobbyist_lastname_std', 'lobbyist_firstname_std', 'lobbyist_lastname_raw', 
                     'lobbyist_firstname_raw', 'lobbyist_id', 'year', 'officialposition', 'cid', 'formercongmem']

df_lob_lobbyist = create_dataframe('../../data/open_secrets/Lobby/lob_lobbyist.csv', columns_lob_lobbyist, nrows=1000000)

Creating lob_lobbyist dataframe...
lob_lobbyist dataframe created...


In [36]:
df_lob_lobbyist.head()

Unnamed: 0,uniqid_lob_lobbyist,lobbyist_lastname_std_lob_lobbyist,lobbyist_firstname_std_lob_lobbyist,lobbyist_lastname_raw_lob_lobbyist,lobbyist_firstname_raw_lob_lobbyist,lobbyist_id_lob_lobbyist,year_lob_lobbyist,officialposition_lob_lobbyist,cid_lob_lobbyist,formercongmem_lob_lobbyist
0,06C29C84-250F-478B-872A-2F647D9DC044,O'BRIEN,LAWRENCE F. III,O'Brien,Lawrence F III,Y0000046486L,2004,,,n
1,3A22C685-EC94-46AA-9C45-4AA4A7044C28,BRAGG,PATRICIA DUNMIRE,Bragg,Patricia Dunmire,Y0000020554L,2001,,,n
2,5CBE61EC-87F1-401E-9D57-620975C9A1F8,COSTELLO,RYAN,Costello,Ryan,Y0000027292L,2002,,N00031064,y
3,8D1DB726-0BE9-46C1-AADE-BE8BCAFF929F,SMITH,G. WAYNE,Smith,G Wayne,Y0000027005L,2004,,,n
4,EF5095CF-92E6-49BE-A40D-35E6464EA394,SCHMITZ,JOHN P.,Schmitz,John P,Y0000019948L,2002,,,n


In [760]:
# OpenSecrets Data Definitions for Lobbying Data: Report types
# No documentation provided on OpenSecrets.com
columns_lob_rpt = ['typelong', 'typeshort']

df_lob_rpt = create_dataframe('../../data/open_secrets/Lobby/lob_rpt.csv', columns_lob_rpt)

Creating lob_rpt dataframe...
lob_rpt dataframe created...


In [762]:
df_lob_rpt.head()

Unnamed: 0,typelong_lob_rpt,typeshort_lob_rpt
0,MID-YEAR REPORT,m
1,MID-YEAR AMENDMENT,ma
2,MID-YEAR (NO ACTIVITY),mn
3,MID-YEAR AMENDMENT (NO ACTIVITY),man
4,MID-YEAR TERMINATION,mt


In [37]:
install_if_needed('xlrd')
import xlrd

xlrd is already installed.


In [38]:
# Candidate ids
# This dataset is very different, so load it independently.
columns_crp_ids = ['blank_excel_column', 'cid', 'crpname', 'party', 'distidrunfor', 'feccandid'] # Blank excel column is necessary.
columns_crp_ids = dict(enumerate(columns_crp_ids))
df_crp_ids = pd.read_excel('../../data/open_secrets/CRP_IDs.xls', header=None, skiprows=15)
df_crp_ids = df_crp_ids.drop(df_crp_ids.columns[0], axis=1)
df_crp_ids = df_crp_ids.rename(columns=columns_crp_ids)

In [39]:
df_crp_ids.head()

Unnamed: 0,cid,crpname,party,distidrunfor,feccandid
0,N00034296,"Aalders, Tim",R,UT03,H2UT03280
1,N00047923,"Aazami, Shervin",D,CA32,H2CA30291
2,N00051397,"Abahsain, Jill",D,MN07,H2MN07162
3,N00051715,"Abbott, Martha",3,VTS2,S2VT00359
4,N00048268,"Abdelhamid, Rana",D,NY12,H2NY12197


## Experiments ##

#### Link lobbies' end ####

In [42]:
# # See top lobbying by total contributions.
# df_lob_indus_sorted = df_lob_indus.sort_values(by='total_lob_indus', ascending=False)
# df_lob_indus_sorted.head(1)

In [43]:
# # US Chamber of Commerce is the top lobby.
# df_lob_lobbying[df_lob_lobbying['ultorg_lob_lobbying'] == 'US Chamber of Commerce'].sample(1)

In [44]:
df_lobbies = pd.merge(df_lob_indus, df_lob_lobbying, left_on='client_lob_indus', right_on='ultorg_lob_lobbying', how='outer')
df_lobbies = df_lobbies[df_lobbies['year_lob_indus'] == df_lobbies['year_lob_lobbying']]

In [45]:
df_lobbies.sample(5)

Unnamed: 0,client_lob_indus,sub_lob_indus,total_lob_indus,year_lob_indus,catcode_lob_indus,uniqid_lob_lobbying,registrant_raw_lob_lobbying,registrant_lob_lobbying,isfirm_lob_lobbying,client_raw_lob_lobbying,client_lob_lobbying,ultorg_lob_lobbying,amount_lob_lobbying,catcode_lob_lobbying,source_lob_lobbying,self_lob_lobbying,includensfs_lob_lobbying,use_lob_lobbying,ind_lob_lobbying,year_lob_lobbying,type_lob_lobbying,typelong_lob_lobbying,affiliate_lob_lobbying
9200062,Florida Sugar Cane League,Rio Grande Valley Sugar Growers,31000.0,2001.0,A1200,9579045A-DCCD-4FEE-A350-400B2A7408D1,PMA GROUP,PMA Group,y,FLORIDA SUGAR CANE LEAGUE,Florida Sugar Cane League,Florida Sugar Cane League,20000.0,A1200,pac,i,,y,,2001.0,m,MID-YEAR REPORT,
13962991,Ninesigma Corp,Ninesigma Corp,40000.0,2016.0,G5000,CBB61435-16E2-4C74-96D9-1073C174ED7C,LNE Group,LNE Group,y,NineSigma,Ninesigma Corp,Ninesigma Corp,20000.0,G5000,webGG,n,,y,y,2016.0,q2,SECOND QUARTER REPORT,
4300511,Blue Cross/Blue Shield,Highmark Inc,530000.0,2010.0,F3200,EF435EA9-51DF-4CC7-97B3-C8A8E06FE8B5,Independence Blue Cross Family of Companies,Independence Blue Cross,,INDEPENDENCE BLUE CROSS FAMILY OF COMPANIES,Independence Blue Cross,Blue Cross/Blue Shield,0.0,F3200,pac,s,,n,,2010.0,q4,FOURTH QUARTER REPORT,
18662145,Washington Intern Student Housing,Washington Intern Student Housing,10000.0,2019.0,F4500,9CE314D6-0D8D-4172-8248-F004136B7193,Broydrick & Associates,Broydrick & Assoc,y,WISH,Washington Intern Student Housing,Washington Intern Student Housing,0.0,F4500,WAA22,n,,y,y,2019.0,q2,SECOND QUARTER REPORT,
12166079,Massachusetts Educational Finance Authority,Massachusetts Educational Finance Authority,80000.0,2020.0,F1410,E689CD1E-66F8-4754-9419-6DCB5A46C67A,CHARTWELL STRATEGY GROUP LLC,Chartwell Strategy Group,y,Massachusetts Educational Financing Authority,Massachusetts Educational Finance Authority,Massachusetts Educational Finance Authority,20000.0,F1410,websb,n,,y,y,2020.0,q2,SECOND QUARTER REPORT,


---

#### Link candidate's end ####

In [48]:
df_cands22_clean = df_cands22.copy()
df_cands22_clean = df_cands22_clean.drop(['feccandid_cands22'], axis=1)

In [49]:
# df_cands22_clean.sample(3)

In [50]:
df_cmtes22_clean = df_cmtes22.copy()
df_cmtes22_clean = df_cmtes22_clean.drop(['feccandid_cmtes22'], axis=1)

In [51]:
# df_cmtes22_clean.sample(3)

In [52]:
df_leadpacs = df_cmtes22_clean[df_cmtes22_clean['recipid_cmtes22'].str.startswith('N')] # Get lead pacs only

In [53]:
# df_leadpacs.sample(3)

In [54]:
df_pacs22_clean = df_pacs22.copy()
df_pacs22_clean = df_pacs22_clean.drop(['fecrecno_pacs22', 'feccandid_pacs22'], axis=1)

In [55]:
# df_pacs22_clean.sample(3)

In [56]:
df_cands = pd.merge(df_cands22_clean, df_leadpacs, left_on='cid_cands22', right_on='recipid_cmtes22', how='outer')
df_cands = pd.merge(df_cands, df_pacs22_clean, left_on='cid_cands22', right_on='cid_pacs22', how='outer')

In [57]:
df_cands['cycle_cands22'] = df_cands['cycle_cands22'].astype('Int64')

In [58]:
# df_cands.sample(3)

In [59]:
df_cands = df_cands.drop(['cycle_cmtes22', 'recipid_cmtes22', 'recipcode_cmtes22',
                                      'party_cmtes22', 'cycle_pacs22', 'cid_pacs22'], axis=1)

In [60]:
df_cands.sample(5)

Unnamed: 0,cycle_cands22,cid_cands22,firstlastp_cands22,party_cands22,distidrunfor_cands22,distidcurr_cands22,currcand_cands22,cyclecand_cands22,crpico_cands22,recipcode_cands22,nopacs_cands22,cmteid_cmtes22,pacshort_cmtes22,affiliate_cmtes22,ultorg_cmtes22,primcode_cmtes22,source_cmtes22,sensitive_cmtes22,foreign_cmtes22,active_cmtes22,pacid_pacs22,amount_pacs22,date_pacs22,realcode_pacs22,type_pacs22,di_pacs22
906285,2022,N00046843,Derrick Van Orden (R),R,WI03,,Y,Y,O,RW,,C00775940,Van Orden for WI-03 2022,,Van Orden for WI-03 2022,Z1100,Rept,N,0.0,1.0,C00694323,-1.0,2022-11-28,Z9600,24K,D
930134,2022,N00048315,Jen Kiggans (R),R,VA02,,Y,Y,C,RW,,C00776120,Kiggans for Congress,,Kiggans for Congress,Z1100,Rept,N,0.0,1.0,C00101626,2500.0,2022-08-24,F1100,24K,D
189597,2022,N00030584,Sarah Palin (R),R,AK01,,Y,Y,C,RL,,C00811323,Sarah for Alaska,,Sarah for Alaska,Z1100,Rept,N,0.0,1.0,C00694323,-5.0,2022-11-18,Z9600,24K,D
1110829,2022,N00051055,Jennifer Pace (R),R,IN07,,,Y,C,RL,,C00808873,JENNIFER PACE COMMITTEE TO ELECT ...,,,,,N,0.0,0.0,,,NaT,,,
1015924,2022,N00049229,Herschel Walker (R),R,GAS2,,Y,Y,C,RL,,C00772962,Walker for GA Senate Republican Nominee Fund 2022,,Walker for GA Senate Republican Nominee Fund 2022,Z1100,Name,N,0.0,1.0,C00694323,-34.0,2022-10-11,Z9600,24K,D


---

#### Link both ends ####

In [63]:
# Although this df is used for df_leadpacs,df_cands, we filtered
# non-candidate committees out in those two dataframes, so we'll
# return to the full list of committees to get the pac info.
df_cands_or_cmtes = pd.merge(df_cands, df_cmtes22_clean, left_on='pacid_pacs22', right_on='cmteid_cmtes22', how='outer')

In [64]:
df_cands_or_cmtes = df_cands_or_cmtes.rename(columns=lambda col: col[:-2] + '_cands' if col.endswith('_x') else col)
df_cands_or_cmtes = df_cands_or_cmtes.rename(columns=lambda col: col[:-2] + '_noncands' if col.endswith('_y') else col)

In [65]:
df_cands_or_indus = pd.merge(df_cands_or_cmtes, df_lob_indus, left_on='ultorg_cmtes22_noncands', right_on='client_lob_indus', how='outer')
df_cands_or_indus = df_cands_or_indus[df_cands_or_indus['cycle_cands22'] == df_cands_or_indus['year_lob_indus']]

In [66]:
# Remove party labels from names: '3', 'R', 'D', 'I', 'L', 'U', 'i'
# df_cands22_clean['party_cands22'].unique() # List of parties for cell below.
df_cands_or_indus['firstlast_cands22'] = df_cands_or_indus['firstlastp_cands22'].apply(
    lambda x: x.replace(" (3)", "").replace(" (R)", "").replace(" (D)", "").replace(" (I)", "").replace(" (L)", "").replace(" (U)", "").replace(" (i)", "") if isinstance(x, str) else x
)

In [67]:
# df_cands_or_indus.head()

---

In [69]:
# df_cands_or_indus.head()

In [70]:
df_cands_and_indus = df_cands_or_indus[df_cands_or_indus['cid_cands22'].notna()]

In [71]:
# len(df_cands_and_indus)

In [72]:
# df_cands_and_indus.head()

In [73]:
# df_cands_and_indus.to_csv('df_cands_and_indus.csv')

---

#### One-by-one tests for the U.S. House Ways & Means committee members ####

In [76]:
member_name = 'Jodey Arrington'
member_cid = 'N00038285'

In [77]:
# member_name = 'Don Beyer'
# member_cid = 'N00036018'

In [78]:
# member_name = 'Earl Blumenauer'
# member_cid = 'N00007727'

In [79]:
# member_name = 'Vernon Buchanan'
# member_cid = 'N00027626'

In [80]:
# member_name = 'Mike Kelly'
# member_cid = 'N00031647'

In [81]:
# Funds according to the aggregate table. (Not accurate)
df_cands_or_indus[df_cands_or_indus['cid_cands22'] == member_cid]['amount_pacs22'].sum()

1767918.0

In [82]:
df_cands_or_indus[df_cands_or_indus['firstlast_cands22'] == member_name].to_csv('df_cands_or_indus_' + member_cid + '.csv')

In [83]:
df_member_by_name = df_cands_or_indus[df_cands_or_indus['firstlast_cands22'] == member_name]

In [84]:
df_member_by_name.head()

Unnamed: 0,cycle_cands22,cid_cands22,firstlastp_cands22,party_cands22,distidrunfor_cands22,distidcurr_cands22,currcand_cands22,cyclecand_cands22,crpico_cands22,recipcode_cands22,nopacs_cands22,cmteid_cmtes22_cands,pacshort_cmtes22_cands,affiliate_cmtes22_cands,ultorg_cmtes22_cands,primcode_cmtes22_cands,source_cmtes22_cands,sensitive_cmtes22_cands,foreign_cmtes22_cands,active_cmtes22_cands,pacid_pacs22,amount_pacs22,date_pacs22,realcode_pacs22,type_pacs22,di_pacs22,cycle_cmtes22,cmteid_cmtes22_noncands,pacshort_cmtes22_noncands,affiliate_cmtes22_noncands,ultorg_cmtes22_noncands,recipid_cmtes22,recipcode_cmtes22,party_cmtes22,primcode_cmtes22_noncands,source_cmtes22_noncands,sensitive_cmtes22_noncands,foreign_cmtes22_noncands,active_cmtes22_noncands,client_lob_indus,sub_lob_indus,total_lob_indus,year_lob_indus,catcode_lob_indus,firstlast_cands22
14526,2022,N00038285,Jodey Arrington (R),R,TX19,TX19,Y,Y,I,RW,,C00588657,Texans for Jodey Arrington,,Texans for Jodey Arrington,Z1100,Rept,N,0.0,1.0,C00236414,1500.0,2021-04-26,F3100,24K,D,2022.0,C00236414,TransAmerica,,AEGON NV,C00236414,PB,,F3100,WAA21,n,1.0,1.0,AEGON NV,Transamerica,510000.0,2022.0,F3100,Jodey Arrington
14531,2022,N00038285,Jodey Arrington (R),R,TX19,TX19,Y,Y,I,RW,,C00588657,Texans for Jodey Arrington,,Texans for Jodey Arrington,Z1100,Rept,N,0.0,1.0,C00236414,-1500.0,2021-04-19,F3100,24K,D,2022.0,C00236414,TransAmerica,,AEGON NV,C00236414,PB,,F3100,WAA21,n,1.0,1.0,AEGON NV,Transamerica,510000.0,2022.0,F3100,Jodey Arrington
182188,2022,N00038285,Jodey Arrington (R),R,TX19,TX19,Y,Y,I,RW,,C00588657,Texans for Jodey Arrington,,Texans for Jodey Arrington,Z1100,Rept,N,0.0,1.0,C00034157,2500.0,2022-08-02,F3200,24K,D,2022.0,C00034157,AFLAC Inc,,AFLAC Inc,C00034157,PB,,F3200,WAA21,n,0.0,1.0,AFLAC Inc,AFLAC Inc,6110000.0,2022.0,F3200,Jodey Arrington
182214,2022,N00038285,Jodey Arrington (R),R,TX19,TX19,Y,Y,I,RW,,C00588657,Texans for Jodey Arrington,,Texans for Jodey Arrington,Z1100,Rept,N,0.0,1.0,C00034157,2500.0,2022-08-19,F3200,24K,D,2022.0,C00034157,AFLAC Inc,,AFLAC Inc,C00034157,PB,,F3200,WAA21,n,0.0,1.0,AFLAC Inc,AFLAC Inc,6110000.0,2022.0,F3200,Jodey Arrington
223839,2022,N00038285,Jodey Arrington (R),R,TX19,TX19,Y,Y,I,RW,,C00588657,Texans for Jodey Arrington,,Texans for Jodey Arrington,Z1100,Rept,N,0.0,1.0,C00109017,5000.0,2022-01-31,C4100,24K,D,2022.0,C00109017,AT&T Inc,AT&T Inc,AT&T Inc,C00109017,PB,,C4100,WAA21,n,0.0,1.0,AT&T Inc,AT&T Inc,11410000.0,2022.0,C4100,Jodey Arrington


In [85]:
df_member_by_name['amount_pacs22'].sum()

1767918.0

In [86]:
# Most frequent donors.
df_member_by_name['client_lob_indus'].value_counts().head(50)

client_lob_indus
American Hospital Assn                            140
Blue Cross/Blue Shield                            110
American Bankers Assn                              32
Apollo Global Management                           15
Credit Union National Assn                         13
National Assn of Realtors                          12
American Medical Assn                              12
Lockheed Martin                                    12
Illumina Inc                                       10
Independent Community Bankers of America           10
UnitedHealth Group                                 10
General Motors                                      9
Honeywell International                             8
Abbott Laboratories                                 8
National Federation of Independent Business         8
Carlyle Group                                       7
Wine & Spirits Wholesalers of America               7
AstraZeneca PLC                                     6
Pfizer Inc 

In [87]:
# Biggest donors.
df_donors = df_member_by_name.groupby('client_lob_indus')['amount_pacs22'].sum()
top_10_donors = df_donors.sort_values(ascending=False).head(50)
print(top_10_donors)

client_lob_indus
American Hospital Assn                            280000.0
Blue Cross/Blue Shield                            132000.0
American Bankers Assn                              80000.0
Credit Union National Assn                         45500.0
Apollo Global Management                           37500.0
UnitedHealth Group                                 37500.0
Carlyle Group                                      35000.0
National Assn of Realtors                          30000.0
American Medical Assn                              30000.0
AT&T Inc                                           20000.0
Abbott Laboratories                                20000.0
Valero Energy                                      20000.0
Koch Industries                                    20000.0
National Cotton Council                            17000.0
Illumina Inc                                       15800.0
Chevron Corp                                       15000.0
General Motors                         

In [88]:
# Total donations by a particular donor.
donor_sum = df_member_by_name[df_member_by_name['client_lob_indus'] == 'American Hospital Assn']['amount_pacs22'].sum()
print(donor_sum)

280000.0


In [89]:
df_member_by_cid = df_cands_or_indus[df_cands_or_indus['cid_cands22'] == member_cid]

In [90]:
df_member_by_cid['amount_pacs22'].sum()

1767918.0

---

#### Sanity checks ####

In [93]:
# Sanity check.
df_pacs22[df_pacs22['cid_pacs22'] == member_cid]['amount_pacs22'].sum()

1020548

In [94]:
# Sanity check.
df_pac_other22[df_pac_other22['recipid_pac_other22'] == member_cid]['amount_pac_other22'].sum()

1026562

In [95]:
# Sanity check.
df_indivs22[df_indivs22['recipid_indivs22'] == member_cid]['amount_indivs22'].sum()

33559

In [96]:
# Final check.
df_pacs22[df_pacs22['cid_pacs22'] == member_cid]['amount_pacs22'].sum() \
    + df_pac_other22[df_pac_other22['recipid_pac_other22'] == member_cid]['amount_pac_other22'].sum() \
    + df_indivs22[df_indivs22['recipid_indivs22'] == member_cid]['amount_indivs22'].sum()

2080669

In [97]:
# Save csv
df_pacs22[df_pacs22['cid_pacs22'] == member_cid].sort_values(by='amount_pacs22', ascending=False).to_csv('pacs22_' + member_cid + '.csv')
df_pac_other22[df_pac_other22['recipid_pac_other22'] == member_cid].sort_values(by='amount_pac_other22', ascending=False).to_csv('pac-other22_' + member_cid + '.csv')
df_indivs22[df_indivs22['recipid_indivs22'] == member_cid].sort_values(by='amount_indivs22', ascending=False).to_csv('indivs22_' + member_cid + '.csv')

In [98]:
# # Preview
# display(df_pacs22[df_pacs22['cid_pacs22'] == member_cid].head())
# display(df_pac_other22[df_pac_other22['recipid_pac_other22'] == member_cid].head())
# display(df_indivs22[df_indivs22['recipid_indivs22'] == member_cid].head())

---

#### Group tests for the U.S. House Ways & Means committee members ####

In [101]:
# # SAVE THIS.
# ways_and_means_members22 = ['Jodey Arrington', 'Don Beyer', 'Earl Blumenauer', 'Vernon Buchanan', 'Mike Carey', 
#                             'Judy Chu', 'Danny K Davis', 'Suzan DelBene', 'Lloyd Doggett', 'Ron Estes', 
#                             'Dwight Evans', 'Randy Feenstra', 'Drew Ferguson', 'Michelle Fischbach', 'Brian Fitzpatrick', 
#                             'Kevin Hern', 'Brian Higgins', 'Mike Kelly', 'Dan Kildee', 'David Kustoff', 'Darin LaHood', 
#                             'John Larson', 'Nicole Malliotakis', 'Carol Miller', 'Blake Moore', 'Gwen Moore', 'Greg Murphy', 
#                             'Richard E Neal', 'Jimmy Panetta', 'Linda Sanchez', 'Brad Schneider', 'David Schweikert', 
#                             'Terri Sewell', 'Adrian Smith', 'Jason Smith', 'Lloyd Smucker', 'Michelle Steel', 
#                             'Greg Steube', 'Claudia Tenney', 'Mike Thompson', 'Beth Van Duyne']

---

#### Misc experiments ####

In [104]:
# df_test_same_year = df_member[df_member['year_lob_indus'] == df_member['cycle_cands22']]

In [105]:
# df_test_same_year.head()

In [106]:
# df_test_same_year['amount_pacs22'].sum()

In [107]:
# df_lob_indus[df_lob_indus['client_lob_indus'] == 'AstraZeneca PLC']

In [108]:
# df_cands_or_indus[df_cands_or_indus['firstlast_cands22'] == 'Jodey Arrington'].to_csv('jodey_arrington.csv')

A few cells down I'm using catcodes to link industries, so double-check if that's the better way.

In [110]:
# df_exp1 = pd.merge(df_pacs22, df_cands22, left_on='cid_pacs22', right_on='cid_cands22', how='inner')
# df_exp1 = pd.merge(df_exp1, df_cmtes22, left_on='cid_pacs22', right_on='recipid_cmtes22', how='inner')

In [111]:
# df_exp2 = pd.merge(df_exp1, df_lob_indus[:len(df_exp1)], left_on='realcode_pacs22', right_on='catcode_lob_indus', how='inner')

In [112]:
# df_exp2.sample(3)

---

### Ways and Means ###

#### Member Details ####

In [694]:
wm_dems = ["Richard Neal",
"Brad Schneider",
"Jimmy Panetta",
"Bill Pascrell",
"Linda Sánchez",
"Terri Sewell",
"Gwen Moore",
"Tom Suozzi",
"Mike Thompson",
"John Larson",
"Stephanie Murphy",
"Donald Beyer",
"Earl Blumenauer",
"Brendan Boyle",
"Judy Chu",
"Danny Davis",
"Suzan DelBene",
"Lloyd Doggett",
"Dwight Evans",
"Jimmy Gomez",
"Brian Higgins",
"Steven Horsford",
"Dan Kildee",
"Ronald Kind"]

df_wm_dem_members = pd.DataFrame(wm_dems, columns=["firstlastp"])
df_wm_dem = pd.merge(df_cands22, df_wm_dem_members, left_on="firstlast_cands22", right_on="firstlastp", how="outer")

In [696]:
wm_rep = ["Jason Smith", 
"Vern Buchanan",
"Kevin Hern",
"Lloyd Smucker",
"Mike Kelly",
"Nicole Malliotakis",
"Randy Feenstra",
"Jodey Arrington",
"Greg Steube",
"Ron Estes",
"Adrian Smith",
"Beth Van Duyne",
"Carol Miller",
"Michelle Fischbach",
"Michelle Steel",
"Mike Carey",
"Blake Moore",
"Brad Wenstrup",
"Brian Fitzpatrick",
"Claudia Tenney",
"Darin LaHood",
"David Kustoff",
"David Schweikert",
"Drew Ferguson",
"Gregory Murphy"]

df_wm_rep_members = pd.DataFrame(wm_rep, columns=["firstlastp"])
df_wm_members = pd.merge(df_wm_dem, df_wm_rep_members, left_on="firstlast_cands22", right_on="firstlastp", how="inner")

In [698]:
df_wm_members.head()

Unnamed: 0,cycle_cands22,feccandid_cands22,cid_cands22,firstlastp_cands22,party_cands22,distidrunfor_cands22,distidcurr_cands22,currcand_cands22,cyclecand_cands22,crpico_cands22,recipcode_cands22,nopacs_cands22,firstlast_cands22,firstlastp_x,firstlastp_y
0,2022.0,H6NE03115,N00027623,Adrian Smith (R),R,NE03,NE03,Y,Y,I,RW,,Adrian Smith,,Adrian Smith
1,2022.0,H0TX24209,N00045167,Beth Van Duyne (R),R,TX24,TX24,Y,Y,I,RW,,Beth Van Duyne,,Beth Van Duyne
2,2022.0,H0UT01205,N00046598,Blake Moore (R),R,UT01,UT01,Y,Y,I,RW,,Blake Moore,,Blake Moore
3,2022.0,H2OH02085,N00033310,Brad Wenstrup (R),R,OH02,OH02,Y,Y,I,RW,,Brad Wenstrup,,Brad Wenstrup
4,2022.0,H6PA08277,N00038779,Brian Fitzpatrick (R),R,PA01,PA01,Y,Y,I,RW,,Brian Fitzpatrick,,Brian Fitzpatrick


In [700]:
df_wm_members = df_wm_members.rename(columns={'feccandid_cands22': 'feccandid_member', 'cid_cands22': 'cid_member',
                              'firstlast_cands22': 'firstlast_member', 'party_cands22': 'party_member',
                              'currcand_cands22': 'currcand_member', 'cyclecand_cands22': 'cyclecand_member',
                              'crpico_cands22': 'crpico_member', 'recipcode_cands22': 'recipcode_member'})
df_wm_members = df_wm_members.drop(['cycle_cands22', 'firstlastp_cands22', 'distidrunfor_cands22', 'distidcurr_cands22', 'nopacs_cands22', 'firstlastp_x', 'firstlastp_y'], axis=1)

In [702]:
df_wm_members.head()

Unnamed: 0,feccandid_member,cid_member,party_member,currcand_member,cyclecand_member,crpico_member,recipcode_member,firstlast_member
0,H6NE03115,N00027623,R,Y,Y,I,RW,Adrian Smith
1,H0TX24209,N00045167,R,Y,Y,I,RW,Beth Van Duyne
2,H0UT01205,N00046598,R,Y,Y,I,RW,Blake Moore
3,H2OH02085,N00033310,R,Y,Y,I,RW,Brad Wenstrup
4,H6PA08277,N00038779,R,Y,Y,I,RW,Brian Fitzpatrick


#### Members' Leadpac Details ####

In [712]:
# Note: a candidate may have multiple committees for continuity between
# cycles where accounts need to be reconciled over time.
df_wm_leadpacs = pd.merge(df_wm_members, df_cmtes22, left_on='cid_member', right_on='recipid_cmtes22', how='inner')
df_wm_leadpacs = df_wm_leadpacs.rename(columns={'cmteid_cmtes22': 'cmteid_leadpac', 'pacshort_cmtes22': 'pacshort_leadpac',
                                                'affiliate_cmtes22': 'affiliate_leadpac', 'ultorg_cmtes22': 'ultorg_leadpac',
                                                'recipid_cmtes22': 'recipid_leadpac', 'primcode_cmtes22': 'primcode_leadpac',
                                                'active_cmtes22': 'active_leadpac'})

df_wm_leadpacs = df_wm_leadpacs.drop(['cycle_cmtes22', 'recipcode_cmtes22', 'feccandid_cmtes22', 'party_cmtes22',
                                      'source_cmtes22', 'sensitive_cmtes22', 'foreign_cmtes22'], axis=1)

In [777]:
df_wm_leadpacs.head()

Unnamed: 0,feccandid_member,cid_member,party_member,currcand_member,cyclecand_member,crpico_member,recipcode_member,firstlast_member,cmteid_leadpac,pacshort_leadpac,affiliate_leadpac,ultorg_leadpac,recipid_leadpac,primcode_leadpac,active_leadpac
0,H6NE03115,N00027623,R,Y,Y,I,RW,Adrian Smith,C00412890,Adrian Smith for Congress,,Adrian Smith for Congress,N00027623,Z1100,1
1,H0TX24209,N00045167,R,Y,Y,I,RW,Beth Van Duyne,C00714865,Beth Van Duyne for Congress,,Beth Van Duyne for Congress,N00045167,Z1100,1
2,H0TX24209,N00045167,R,Y,Y,I,RW,Beth Van Duyne,C00718957,Van Duyne for TX-24,,Van Duyne for TX-24,N00045167,Z1100,1
3,H0UT01205,N00046598,R,Y,Y,I,RW,Blake Moore,C00738872,Blake Moore for Congress,,Blake Moore for Congress,N00046598,Z1100,1
4,H2OH02085,N00033310,R,Y,Y,I,RW,Brad Wenstrup,C00497818,Wenstrup for Congress,,Wenstrup for Congress,N00033310,Z1100,1


#### Members' leadpacs' revenues ####
(Lacks identifying details)

In [634]:
# Link all transactions to candidate pacs.
# Get revenue only, filter out expenditures
# THIS MAY DISTORT THINGS! LOOK INTO THIS LATER.
df_pacs22_revenues = df_pacs22[df_pacs22['amount_pacs22'] > 0]

In [724]:
# Keep the "di" field, for now track both direct and indirect income:
# https://raw.githubusercontent.com/nketchum/si608exploration/refs/heads/main/docs/direct_vs_indirect_contrib.txt
df_wm_revenues = pd.merge(df_wm_leadpacs, df_pacs22_revenues, left_on='cid_member', right_on='cid_pacs22')
df_wm_revenues = df_wm_revenues.rename(columns={'fecrecno_pacs22': 'fecrecno_revenue', 'pacid_pacs22': 'pacid_revenue',
                                                'amount_pacs22': 'amount_revenue', 'date_pacs22': 'date_revenue',
                                                'realcode_pacs22': 'realcode_revenue', 'type_pacs22': 'type_revenue',
                                                'di_pacs22': 'di_revenue', 'feccandid_pacs22': 'feccandid_revenue'})
df_wm_revenues = df_wm_revenues.drop(['cycle_pacs22', 'cid_pacs22'], axis=1)

In [726]:
df_wm_revenues.sample(5)

Unnamed: 0,feccandid_member,cid_member,party_member,currcand_member,cyclecand_member,crpico_member,recipcode_member,firstlast_member,cmteid_leadpac,pacshort_leadpac,affiliate_leadpac,ultorg_leadpac,recipid_leadpac,primcode_leadpac,active_leadpac,fecrecno_revenue,pacid_revenue,amount_revenue,date_revenue,realcode_revenue,type_revenue,di_revenue,feccandid_revenue
12244,H8OK01157,N00040829,R,Y,Y,I,RW,Kevin Hern,C00636092,Hern for Congress,,Hern for Congress,N00040829,Z1100,1,4012620231720640011,C00586859,1000,2022-09-13,F4600,24K,D,H8OK01157
4819,H4NY22051,N00036351,R,Y,Y,I,RW,Claudia Tenney,C00561183,Tenney for Congress,,Tenney for Congress,N00036351,Z1100,1,4012620221394370281,C00694323,5,2021-01-02,Z9600,24K,D,H4NY22051
9903,H6GA03113,N00039090,R,Y,Y,I,RW,Drew Ferguson,C00607838,Drew Ferguson for Congress,,Drew Ferguson for Congress,N00039090,Z1100,1,4020920221406086895,C00375360,1500,2021-12-14,H1130,24K,D,H6GA03113
11705,H6TX19099,N00038285,R,Y,Y,I,RW,Jodey Arrington,C00588657,Texans for Jodey Arrington,,Texans for Jodey Arrington,N00038285,Z1100,1,4010420231665460606,C00211318,2500,2022-11-04,F5100,24K,D,H6TX19099
3058,H6PA08277,N00038779,R,Y,Y,I,RW,Brian Fitzpatrick,C00607416,Brian Fitzpatrick for Congress,,Brian Fitzpatrick for Congress,N00038779,Z1100,1,4012620221390079592,C00694323,1,2021-01-07,Z9600,24K,D,H6PA08277


#### Members' leadpacs' transactions ####

(Money flows + details of money flow)

In [732]:
df_wm_transactions = pd.merge(df_wm_revenues, df_cmtes22, left_on='pacid_revenue', right_on='cmteid_cmtes22', how='inner')
df_wm_transactions = df_wm_transactions.rename(columns={'cmteid_cmtes22': 'cmteid_transaction', 'pacshort_cmtes22': 'pacshort_transaction', 
                                                        'affiliate_cmtes22': 'affiliate_transaction', 'ultorg_cmtes22': 'ultorg_transaction', 
                                                        'recipid_cmtes22': 'recipid_transaction', 'recipcode_cmtes22': 'recipcode_transaction', 
                                                        'feccandid_cmtes22': 'feccandid_transaction', 'party_cmtes22': 'party_transaction', 
                                                        'source_cmtes22': 'source_transaction'})
df_wm_transactions = df_wm_transactions.drop(['cycle_cmtes22', 'primcode_cmtes22', 'sensitive_cmtes22', 'foreign_cmtes22', 'active_cmtes22'], axis=1)

In [734]:
df_wm_transactions

Unnamed: 0,feccandid_member,cid_member,party_member,currcand_member,cyclecand_member,crpico_member,recipcode_member,firstlast_member,cmteid_leadpac,pacshort_leadpac,affiliate_leadpac,ultorg_leadpac,recipid_leadpac,primcode_leadpac,active_leadpac,fecrecno_revenue,pacid_revenue,amount_revenue,date_revenue,realcode_revenue,type_revenue,di_revenue,feccandid_revenue,cmteid_transaction,pacshort_transaction,affiliate_transaction,ultorg_transaction,recipid_transaction,recipcode_transaction,feccandid_transaction,party_transaction,primcode_transaction,source_transaction
0,H6NE03115,N00027623,R,Y,Y,I,RW,Adrian Smith,C00412890,Adrian Smith for Congress,,Adrian Smith for Congress,N00027623,Z1100,1,2022720231732787170,C00456335,2500,2022-10-06,F3100,24K,D,H6NE03115,C00456335,Physicians Mutual Insurance,,Physicians Mutual Insurance,C00456335,PB,,,F3100,WAA22
1,H6NE03115,N00027623,R,Y,Y,I,RW,Adrian Smith,C00412890,Adrian Smith for Congress,,Adrian Smith for Congress,N00027623,Z1100,1,2031520231734695012,C00224725,2500,2022-12-23,T8100,24K,D,H6NE03115,C00224725,Harley-Davidson Inc,,Harley-Davidson Inc,C00224725,PB,,,T8100,WAA19
2,H6NE03115,N00027623,R,Y,Y,I,RW,Adrian Smith,C00412890,Adrian Smith for Congress,,Adrian Smith for Congress,N00027623,Z1100,1,4010420231665460439,C00000422,1500,2022-10-31,H1100,24K,D,H6NE03115,C00000422,American Medical Assn,American Medical Assn,American Medical Assn,C00000422,PB,,,H1100,WAA21
3,H6NE03115,N00027623,R,Y,Y,I,RW,Adrian Smith,C00412890,Adrian Smith for Congress,,Adrian Smith for Congress,N00027623,Z1100,1,4010820221388306236,C00411900,2000,2021-12-09,H4100,24K,D,H6NE03115,C00411900,Edwards Lifesciences,,Edwards Lifesciences,C00411900,PB,,,H4100,WAA21
4,H6NE03115,N00027623,R,Y,Y,I,RW,Adrian Smith,C00412890,Adrian Smith for Congress,,Adrian Smith for Congress,N00027623,Z1100,1,4011120231682686527,C00118943,5000,2022-10-31,F3300,24K,D,H6NE03115,C00118943,Massachusetts Mutual Life Insurance,Massachusetts Mutual Life Insurance,Massachusetts Mutual Life Insurance,C00118943,PB,,,F3300,WAA21
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19057,H8KS04112,N00040712,R,Y,Y,I,RW,Ron Estes,C00632067,Ron Estes for Congress,,Ron Estes for Congress,N00040712,Z1100,1,4122820211386740562,C00107235,2000,2021-11-12,F5100,24K,D,H8KS04112,C00107235,PricewaterhouseCoopers,,PricewaterhouseCoopers,C00107235,PB,,,F5100,WAA21
19058,H8KS04112,N00040712,R,Y,Y,I,RW,Ron Estes,C00632067,Ron Estes for Congress,,Ron Estes for Congress,N00040712,Z1100,1,4122920221645454695,C00797670,2900,2022-04-14,J5100,24K,D,H8KS04112,C00797670,American Israel Public Affairs Cmte,American Israel Public Affairs Cmte,American Israel Public Affairs Cmte,C00797670,PI,,,J5100,WAA22
19059,H8KS04112,N00040712,R,Y,Y,I,RW,Ron Estes,C00632067,Ron Estes for Congress,,Ron Estes for Congress,N00040712,Z1100,1,4122920221645458512,C00797670,5000,2022-07-13,J5100,24K,D,H8KS04112,C00797670,American Israel Public Affairs Cmte,American Israel Public Affairs Cmte,American Israel Public Affairs Cmte,C00797670,PI,,,J5100,WAA22
19060,H8KS04112,N00040712,R,Y,Y,I,RW,Ron Estes,C00632067,Ron Estes for Congress,,Ron Estes for Congress,N00040712,Z1100,1,4122920221645458513,C00797670,2100,2022-07-13,J5100,24K,D,H8KS04112,C00797670,American Israel Public Affairs Cmte,American Israel Public Affairs Cmte,American Israel Public Affairs Cmte,C00797670,PI,,,J5100,WAA22


In [604]:
# Donations by United Postmasters & Managers of America, who gave to Claudia Tenney, N00036351, linked from "pacid_pacs22" in df_wm_transactions
df_pacs22[df_pacs22['pacid_pacs22'] == 'C00100404']

Unnamed: 0,cycle_pacs22,fecrecno_pacs22,pacid_pacs22,cid_pacs22,amount_pacs22,date_pacs22,realcode_pacs22,type_pacs22,di_pacs22,feccandid_pacs22
1469,2022,4010320231645675381,C00100404,N00035215,100,2022-06-07,L1500,24K,D,H2IL02172
1470,2022,4010320231645675382,C00100404,N00029891,500,2022-06-13,L1500,24K,D,H8VA11062
1471,2022,4010320231645675383,C00100404,N00025766,2000,2022-06-14,L1500,24K,D,H2LA02149
1472,2022,4010320231645675384,C00100404,N00036351,2000,2022-06-21,L1500,24K,D,H4NY22051
1473,2022,4010320231645675385,C00100404,N00030670,2000,2022-06-21,L1500,24K,D,S6IN00191
...,...,...,...,...,...,...,...,...,...,...
747943,2022,4121720221637693851,C00100404,N00044223,1000,2022-09-27,L1500,24K,D,S0AZ00350
747944,2022,4121720221637693852,C00100404,N00046030,5000,2022-09-28,L1500,24K,D,H0NY02234
747945,2022,4121720221637693853,C00100404,N00029139,1000,2022-09-28,L1500,24K,D,H8IL14067
747946,2022,4121720221637693854,C00100404,N00045905,1000,2022-09-29,L1500,24K,D,H0IN01150


In [564]:
# Using the committee in 
df_cmtes22[df_cmtes22['cmteid_cmtes22'] == 'C00100404']

Unnamed: 0,cycle_cmtes22,cmteid_cmtes22,pacshort_cmtes22,affiliate_cmtes22,ultorg_cmtes22,recipid_cmtes22,recipcode_cmtes22,feccandid_cmtes22,party_cmtes22,primcode_cmtes22,source_cmtes22,sensitive_cmtes22,foreign_cmtes22,active_cmtes22
485,2022,C00100404,United Postmasters & Managers of America,United Postmasters & Managers of America,United Postmasters & Managers of America,C00100404,PL,,,L1500,WAA21,n,0,1


In [740]:
df_cmtes22[df_cmtes22['cmteid_cmtes22'] == 'C00456335']

Unnamed: 0,cycle_cmtes22,cmteid_cmtes22,pacshort_cmtes22,affiliate_cmtes22,ultorg_cmtes22,recipid_cmtes22,recipcode_cmtes22,feccandid_cmtes22,party_cmtes22,primcode_cmtes22,source_cmtes22,sensitive_cmtes22,foreign_cmtes22,active_cmtes22
3184,2022,C00456335,Physicians Mutual Insurance,,Physicians Mutual Insurance,C00456335,PB,,,F3100,WAA22,N,0,1
