# SI 608 Project – Workspace #
#### General scratchpad workspace that preloads all the dataframes. ####
See <code>./modules</code> to review how libraries are installed and imported, as well as where the data is loaded, cleaned, and formatted. This is only here as a helpful tool, make a copy and do whatever you'd like. Or don't use this at all if that's preferable.

### Initialize
Init file contains helper functions used throughout the project.

In [3]:
%run modules/init.ipynb

Initializing project...
pandas is already installed.
matplotlib is already installed.
networkx is already installed.
numpy is already installed.
...initialization complete.


#### Load datasets ####

This module provides a single function for all of the *contribution* data from OpenSecrets.

In [5]:
%run modules/data.ipynb

Loading data module...
...data module loaded.


In [6]:
# OpenSecrets Data Definition: Individual Contribution Data
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20for%20Individual%20Contribution%20Data.htm
columns_indivs22 = ['cycle', 'fectransid', 'contribid', 'contrib_last', 'contrib_first', 'recipid', 'orgname', 
                    'ultorg', 'realcode', 'date', 'amount', 'street', 'city', 'state',
                    'zip', 'recipcode', 'type', 'cmteid', 'otherid', 'gender', 'microfilm',
                    'occupation', 'employer', 'source']

# This dataset is huge, and crashes my computer. For now, limiting to 1000000 rows
# until a better way is found to trim off this dataset to only the last cycle or two
df_indivs22 = create_dataframe('../../data/open_secrets/CampaignFin22/indivs22.csv', columns_indivs22, nrows=1000000)

Creating dataframe...
dataframe created...


In [7]:
df_indivs22.head()

Unnamed: 0,cycle,fectransid,contribid,contrib_last,contrib_first,recipid,orgname,ultorg,realcode,date,amount,street,city,state,zip,recipcode,type,cmteid,otherid,gender,microfilm,occupation,employer,source
0,2022,4061520221505841534,r0014256510,DILLARD,DANIEL,C00000935,[24T Contribution],,Z9500,2021-08-29,35,,HIDALGO,TX,78557,DP,24T,C00401224,C00000935,M,202205149504021121,NOT EMPLOYED,NOT EMPLOYED,Rept
1,2022,4061520221505841535,r0015503614,WHITE,SCOTTO,C00633404,[24T Contribution],,Z9500,2021-08-29,15,,YONKERS,NY,10701,PI,24T,C00401224,C00633404,M,202205149507911003,NOT EMPLOYED,NOT EMPLOYED,Rept
2,2022,4061520221505841536,p0003861308,DOMINGUEZ,SALLIE,C00632398,[24T Contribution],,Z9500,2021-08-29,10,,CAMPBELL,CA,95011,PI,24T,C00401224,C00632398,F,202205149504046504,HEALTHCARE,MCDONALD FAMILY DENTAL,Rept
3,2022,4061520221505841543,p0003689535,BRADLEY,JANNETTE,C00678839,[24T Contribution],,Z9500,2021-08-29,26,,OAK LAWN,IL,60453,PI,24T,C00401224,C00678839,F,202205149503347140,LIBRARIAN,MEDICAL CENTER,Rept
4,2022,4061520221505841545,q0002181378,BHATT,TANYA,C00580068,[24T Contribution],,Z9500,2021-08-29,50,,MIAMI BEACH,FL,33141,PI,24T,C00401224,C00580068,F,202205149503225635,MKTG,FRYE FINANCIAL,Rept


In [8]:
# OpenSecrets Table Definition: Committee table
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20for%20Cmtes.htm
columns_cmtes22 = ['cycle', 'cmteid', 'pacshort', 'affiliate', 'ultorg', 'recipid', 
                    'recipcode', 'feccandid', 'party', 'primcode', 'source', 'sensitive',
                    'foreign', 'active']

df_cmtes22 = create_dataframe('../../data/open_secrets/CampaignFin22/cmtes22.csv', columns_cmtes22)

Creating dataframe...
dataframe created...


In [9]:
df_cmtes22.head()

Unnamed: 0,cycle,cmteid,pacshort,affiliate,ultorg,recipid,recipcode,feccandid,party,primcode,source,sensitive,foreign,active
0,2022,C00000059,Hallmark Cards,,Hallmark Cards,C00000059,PB,,,C1400,WAA21,Y,0,1
1,2022,C00000422,American Medical Assn,American Medical Assn,American Medical Assn,C00000422,PB,,,H1100,WAA21,n,0,1
2,2022,C00000489,Teamsters Local 886,Teamsters Union,Teamsters Union,C00000489,PL,,,LT300,WAA21,n,0,1
3,2022,C00000547,KANSAS MEDICAL SOCIETY POLITICAL ACTION COMMIT...,,,C00000547,,,U,,,,0,0
4,2022,C00000729,American Dental Assn,American Dental Assn,American Dental Assn,C00000729,PB,,,H1400,WAA21,n,0,1


In [10]:
# OpenSecrets Data Definition: 527 Committees
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20527%20Cmtes.htm
columns_cmtes527 = ['cycle', 'rpt', 'ein', 'crp527name', 'affiliate', 'ultorg', 
                    'recipcode', 'cmteid', 'cid', 'eccmteid', 'party', 
                    'primcode', 'source', 'ffreq', 'ctype', 'csource', 'viewpt',
                    'comments', 'state']

df_cmtes527 = create_dataframe('../../data/open_secrets/527/cmtes527.csv', columns_cmtes527)

Creating dataframe...
dataframe created...


In [11]:
df_cmtes527.head()

Unnamed: 0,cycle,rpt,ein,crp527name,affiliate,ultorg,recipcode,cmteid,cid,eccmteid,party,primcode,source,ffreq,ctype,csource,viewpt,comments,state
0,2002,Q302,861006189,American Electronics Assn,,American Electronics Assn,PB,,,,,C5000,WebPN,Q,F,,N,,AZ
1,2008,Q308,262108560,California 2008 GOP Delegation Corporate,,California 2008 GOP Delegation,RP,,,,R,Z5100,Name,Q,F,,C,,CA
2,2000,Q400,912101097,Alabama League of Environmental Action,,Alabama League of Environmental Action,PI,,,,,JE300,Name,Q,S,Name,L,,AL
3,2012,Q412,522257109,International Brotherhood of Electrical Workers,,International Brotherhood of Electrical Workers,PL,C00027342,,,,LC150,PAC,Q,F,Name,L,,DC
4,2008,Q407,900049259,Pacific Northwest Regional Council of Carpenters,,Carpenters & Joiners Union,PL,C00001016,,C70004205,,LB100,PAC,Q,F,Expen,L,,WA


In [12]:
# OpenSecrets Data Definition: Candidates
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20Candidates%20Data.htm
columns_cands22 = ['cycle', 'feccandid', 'cid', 'firstlastp', 'party', 'distidrunfor', 
                    'distidcurr', 'currcand', 'cyclecand', 'crpico', 'recipcode', 
                    'nopacs']

df_cands22 = create_dataframe('../../data/open_secrets/CampaignFin22/cands22.csv', columns_cands22)

Creating dataframe...
dataframe created...


In [13]:
df_cands22.head()

Unnamed: 0,cycle,feccandid,cid,firstlastp,party,distidrunfor,distidcurr,currcand,cyclecand,crpico,recipcode,nopacs
0,2022,H0AK00105,N00039029,Thomas Lamb (3),3,AK01,,,,,3N,
1,2022,H0AL01055,N00044245,Jerry Carl (R),R,AL01,AL01,Y,Y,I,RW,
2,2022,H0AL01063,N00044288,Wes Lambert (R),R,AL01,,,,,RN,
3,2022,H0AL01097,N00044750,James Averhart (D),D,AL01,,,,,DN,
4,2022,H0AL02087,N00030768,Martha Roby (R),R,AL02,,,,,RN,


In [14]:
# OpenSecrets Data Definition for PAC to PAC Data (Pac_other table)
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20PAC%20to%20PAC%20Data.htm
columns_pac_other22 = ['cycle', 'fecrecno', 'filerid', 'donorcmte', 'contriblendtrans', 'city', 'state', 
                            'zip', 'fecoccemp', 'primcode', 'date', 'amount', 'recipid', 'party', 'otherid',
                            'recipcode', 'recipprimcode', 'amend', 'report', 'pg', 'microfilm', 'type',
                            'realcode', 'source']

df_pac_other22 = create_dataframe('../../data/open_secrets/CampaignFin22/pac_other22.csv', columns_pac_other22)

Creating dataframe...
dataframe created...


In [15]:
df_pac_other22.head()

Unnamed: 0,cycle,fecrecno,filerid,donorcmte,contriblendtrans,city,state,zip,fecoccemp,primcode,date,amount,recipid,party,otherid,recipcode,recipprimcode,amend,report,pg,microfilm,type,realcode,source
0,2022,1011320230265253021,C00032979,Teamsters Union,BOHANNAN VICTORY FUND,IOWA CITY,IA,52245,,LT300,2022-11-15,-1000,C00811059,D,C00811059,DP,Z4200,A,30G,P,202212089550408190,24K,LT300,PAC
1,2022,1011320230265254787,C00032979,Teamsters Union,WARNOCK VICTORY FUND,WASHINGTON,DC,20003,,LT300,2022-11-22,15000,C00740597,D,C00740597,DP,Z4200,A,30G,P,202212089550408210,24K,LT300,PAC
2,2022,1011320230265254797,C00032979,Teamsters Union,BLUE POWER PAC,COLLINSWOOD,NJ,8108,,LT300,2022-11-03,5000,C00575894,D,C00575894,PI,J2100,A,30G,P,202212089550408207,24K,LT300,PAC
3,2022,1011320230265254799,C00032979,Teamsters Union,COMMONWEALTH PAC,MERRIFIELD,VA,22116,,LT300,2022-11-03,5000,C00498931,D,C00498931,PI,J2100,A,30G,P,202212089550408208,24K,LT300,PAC
4,2022,1011320230265254826,C00032979,Teamsters Union,RHODE ISLAND DEMOCRATIC STATE CMTE,PROVIDENCE,RI,92940,,LT300,2022-11-03,5000,C00136200,D,C00136200,DP,Z5200,A,30G,P,202212089550408209,24K,LT300,PAC


In [16]:
# OpenSecrets Data Definition for Lobbyists
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20lob_lobbyists.htm
columns_lob_lobbyist = ['uniqid', 'lobbyist_lastname_std', 'lobbyist_firstname_std', 'lobbyist_lastname_raw', 
                     'lobbyist_firstname_raw', 'lobbyist_id', 'year', 'officialposition', 'cid', 'formercongmem']

df_lob_lobbyist = create_dataframe('../../data/open_secrets/Lobby/lob_lobbyist.csv', columns_lob_lobbyist)

Creating dataframe...
dataframe created...


In [17]:
df_lob_lobbyist.head()

Unnamed: 0,uniqid,lobbyist_lastname_std,lobbyist_firstname_std,lobbyist_lastname_raw,lobbyist_firstname_raw,lobbyist_id,year,officialposition,cid,formercongmem
0,06C29C84-250F-478B-872A-2F647D9DC044,O'BRIEN,LAWRENCE F. III,O'Brien,Lawrence F III,Y0000046486L,2004,,,n
1,3A22C685-EC94-46AA-9C45-4AA4A7044C28,BRAGG,PATRICIA DUNMIRE,Bragg,Patricia Dunmire,Y0000020554L,2001,,,n
2,5CBE61EC-87F1-401E-9D57-620975C9A1F8,COSTELLO,RYAN,Costello,Ryan,Y0000027292L,2002,,N00031064,y
3,8D1DB726-0BE9-46C1-AADE-BE8BCAFF929F,SMITH,G. WAYNE,Smith,G Wayne,Y0000027005L,2004,,,n
4,EF5095CF-92E6-49BE-A40D-35E6464EA394,SCHMITZ,JOHN P.,Schmitz,John P,Y0000019948L,2002,,,n


In [18]:
# OpenSecrets Data Definitions for Lobbying Data: Lobbying
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20lob_lobbying.htm
columns_lob_lobbying = ['uniqid','registrant_raw','registrant','isfirm','client_raw','client','ultorg','amount',
                        'catcode','source','self','includensfs','use',
                       'ind', 'year', 'type', 'typelong', 'affiliate']

df_lob_lobbying = create_dataframe('../../data/open_secrets/Lobby/lob_lobbying.csv', columns_lob_lobbying)

Creating dataframe...
dataframe created...


In [19]:
df_lob_lobbying.head()

Unnamed: 0,uniqid,registrant_raw,registrant,isfirm,client_raw,client,ultorg,amount,catcode,source,self,includensfs,use,ind,year,type,typelong,affiliate
0,82c5f661-a637-45ad-a3a6-b5ba18cf8962,ASTRAZENECA PHARMACEUTICALS LP,AstraZeneca Pharmaceuticals,,ASTRAZENECA PHARMACEUTICALS LP,AstraZeneca Pharmaceuticals,AstraZeneca PLC,1370000,H4300,pac,x,,y,y,2021,q4a,FOURTH QUARTER AMENDMENT,
1,85b111b1-5d2e-4107-bc24-0921316e29a5,ECHELON GOVERNMENT AFFAIRS,Echelon Government Affairs,y,THE ALBERS GROUP,Albers Group,Albers Group,10000,Y4000,,n,,y,y,2021,q4,FOURTH QUARTER REPORT,
2,87822a14-12de-478c-a34d-010fa503e539,WTA -- ADVOCATES FOR RURAL BROADBAND,Western Telecommunications Alliance,,WTA -- ADVOCATES FOR RURAL BROADBAND,Western Telecommunications Alliance,Western Telecommunications Alliance,75000,C4000,pac,p,,y,y,2021,q4,FOURTH QUARTER REPORT,
3,87ff989d-9d12-4fef-84ef-ab69cd616894,FINANCIAL EXECUTIVES INTERNATIONAL,Financial Executives International,,FINANCIAL EXECUTIVES INTERNATIONAL,Financial Executives International,Financial Executives International,21650,F5000,Hvr06,p,,y,y,2021,q4,FOURTH QUARTER REPORT,
4,88bac485-00bb-4915-8ca5-8d0ec589ec28,CAPITOL HILL CONSULTING GROUP,Capitol Hill Consulting Group,y,ASSOCIATION FOR ACCESSIBLE MEDICINES (FORMERLY...,Association for Accessible Medicines,Association for Accessible Medicines,40000,H4300,pac,i,,y,,2021,q4,FOURTH QUARTER REPORT,


In [20]:
# OpenSecrets Data Definition for Lobbying Data: Lobby issues
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20lob_issues.htm
columns_lob_issue = ['si_id', 'uniqid', 'issueid', 'issue', 'specificissue', 5, 6, 7, 8, 9, 10, 'year']

df_lob_issue = create_dataframe('../../data/open_secrets/Lobby/lob_issue.csv', columns_lob_issue)

Creating dataframe...
dataframe created...


In [21]:
df_lob_issue.head()

Unnamed: 0,si_id,uniqid,issueid,issue,specificissue,5,6,7,8,9,10,year
0,3001624,02e92bd6-0159-495e-9d00-8a490a0be8be,DIS,Disaster & Emergency Planning,Issues affecting manufacturer of railroad and ...,environmental,transportation,trade,labor,shipping,disaster planning and tax issues.,2022
1,3001625,02e92bd6-0159-495e-9d00-8a490a0be8be,ENV,Environment & Superfund,Issues affecting manufacturer of railroad and ...,environmental,transportation,trade,labor,shipping,disaster planning and tax issues.,2022
2,3001627,02e92bd6-0159-495e-9d00-8a490a0be8be,ROD,Roads & Highways,Issues affecting manufacturer of railroad and ...,environmental,transportation,trade,labor,shipping,disaster planning and tax issues.,2022
3,3001628,02e92bd6-0159-495e-9d00-8a490a0be8be,RRR,Railroads,Issues affecting manufacturer of railroad and ...,environmental,transportation,trade,labor,shipping,disaster planning and tax issues.,2022
4,3001629,02e92bd6-0159-495e-9d00-8a490a0be8be,TAX,Taxes,Issues affecting manufacturer of railroad and ...,environmental,transportation,trade,labor,shipping,disaster planning and tax issues.,2022


In [22]:
# OpenSecrets Data Definition for Lobbying Data: Lobby industries
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20lob_indus.htm
columns_lob_indus = ['client', 'sub', 'total', 'year', 'catcode']

df_lob_indus = create_dataframe('../../data/open_secrets/Lobby/lob_indus.csv', columns_lob_indus)

Creating dataframe...
dataframe created...


In [23]:
df_lob_indus.head()

Unnamed: 0,client,sub,total,year,catcode
0,National Assn for County Community & Econ Develop,National Assn for County Community & Econ Develop,0,1998,X3000
1,Fox Valley Technical College,Fox Valley Technical College,80000,2015,H5200
2,Employers Cncl on Flexible Compensation,Employers Cncl on Flexible Compensation,580000,2001,J9000
3,American Federation of TV/Radio Artists,American Federation of TV/Radio Artists,30000,2011,LG400
4,Visiting Nurse Assn of America,Visiting Nurse Assn/Manchester & S NH,40000,2005,H3100


In [24]:
# OpenSecrets Data Definition for Lobbying Data: Lobby bills
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20lob_bills.htm
columns_lob_bills = ['b_id', 'si_id', 'congno', 'bill_name']

df_lob_bills = create_dataframe('../../data/open_secrets/Lobby/lob_bills.csv', columns_lob_bills)

Creating dataframe...
dataframe created...


In [25]:
df_lob_bills.head()

Unnamed: 0,b_id,si_id,congno,bill_name
0,s1461-117,2820018,117,S.1461
1,hr463-117,2820018,117,H.R.463
2,s910-116,2820035,116,S.910
3,hr2405-116,2820035,116,H.R.2405
4,hr3816-117,2820067,117,H.R.3816


In [26]:
# OpenSecrets Data Definition for Lobbying Data: Lobby agencies
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20lob_agency.htm
columns_lob_agency = ['uniqid', 'agencyid', 'agency']

df_lob_agency = create_dataframe('../../data/open_secrets/Lobby/lob_agency.csv', columns_lob_agency)

Creating dataframe...
dataframe created...


In [27]:
df_lob_agency.head()

Unnamed: 0,uniqid,agencyid,agency
0,BB7367A7-7B60-4DED-AA2D-A94771A9EBE8,1,US Senate
1,BB7367A7-7B60-4DED-AA2D-A94771A9EBE8,2,US House of Representatives
2,04366C6F-B0CE-4C28-87BF-EE1CC8A9BB41,2,US House of Representatives
3,04366C6F-B0CE-4C28-87BF-EE1CC8A9BB41,34,Dept of Health & Human Services
4,04366C6F-B0CE-4C28-87BF-EE1CC8A9BB41,134,Centers for Disease Control & Prevention


In [28]:
# OpenSecrets Data Definition: PAC table (PACs to Candidates)
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20for%20PAC%20to%20Cands%20Data.htm
columns_campfin22_pacs22 = ['cycle', 'fecrecno', 'pacid', 'cid', 'amount', 'date', 'realcode', 
                            'type', 'di', 'feccandid']

df_campfin22_pacs22 = create_dataframe('../../data/open_secrets/CampaignFin22/pacs22.csv', columns_campfin22_pacs22)

Creating dataframe...
dataframe created...


In [29]:
df_campfin22_pacs22.head()

Unnamed: 0,cycle,fecrecno,pacid,cid,amount,date,realcode,type,di,feccandid
0,2022,1011020220264924462,C00406124,N00029513,500,2021-10-18,B4000,24K,D,H8IN07184
1,2022,1011320230265253020,C00032979,N00041569,-2500,2022-11-21,Z9600,24K,D,H8IL14174
2,2022,1011320230265254779,C00032979,N00041511,-5000,2022-11-18,Z9600,24K,D,H8MN08043
3,2022,1011320230265254798,C00032979,N00035575,5000,2022-11-03,Z9600,24K,D,H4FL15155
4,2022,1011320230265254809,C00032979,N00041511,4000,2022-11-03,Z9600,24K,D,H8MN08043


In [30]:
# OpenSecrets Data Dictionary 527 Contribution Data - from IRS Form 8872A
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20527%20Receipts.htm
columns_rcpts527 = ['id', 'rpt', 'formid', 'schaid', 'contribid', 'contrib', 
                    'amount', 'date', 'orgname', 'ultorg', 'realcode', 
                    'recipid', 'recipcode', 'party', 'recipient', 'city', 'state',
                    'zip', 'zip4', 'pmsa', 'employer', 'occupation', 'ytd', 'gender', 'source']

df_rcpts527 = create_dataframe('../../data/open_secrets/527/rcpts527.csv', columns_rcpts527)

Creating dataframe...
dataframe created...


In [31]:
df_rcpts527.head()

Unnamed: 0,id,rpt,formid,schaid,contribid,contrib,amount,date,orgname,ultorg,realcode,recipid,recipcode,party,recipient,city,state,zip,zip4,pmsa,employer,occupation,ytd,gender,source
0,981,Q210,9595837,2017490,,WEST LA DEMOCRATIC CLUB,1,2010-04-18,West La Democratic Club,,Z9600,270160261,PI,,ActBlue Technical Services,BURBANK,CA,91502,,4480,,,473,,Rept
1,982,Q210,9595837,2017492,,WINOGRAD FOR CONGRESS 2010,259,2010-04-18,Winograd For Congress 2010,,Z9600,270160261,PI,,ActBlue Technical Services,BURBANK,CA,91502,,4480,,,1049,,Rept
2,983,Q210,9595837,2017387,,FDL ACTION PAC,4,2010-04-18,Fdl Action Pac,,Z9600,270160261,PI,,ActBlue Technical Services,WASHINGTON,DC,20016,,8840,,,1524,,Rept
3,984,Q210,9595837,2017390,,FRANKEN MVPS,190,2010-04-18,Franken Mvps,,Z9600,270160261,PI,,ActBlue Technical Services,MINNEAPOLIS,MN,55458,,5120,,,662,,Rept
4,985,Q210,9595837,2017393,,FRIENDS OF BRENT BARTON,49,2010-04-18,Friends Of Brent Barton,,Z9600,270160261,PI,,ActBlue Technical Services,PORTLAND,OR,97214,,6440,,,241,,Rept


In [32]:
# OpenSecrets Data Dictionary 527 Expenditure Data - from IRS Form 8872B
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20527%20Expenditures.htm
columns_expends527 = ['rpt', 'formid', 'schbid', 'orgname', 'ein', 'recipient', 
                    'recipientcrp', 'amount', 'date', 'expcode', 'source', 
                    'purpose', 'addr1', 'addr2', 'city', 'state', 'zip',
                    'employer', 'occupation']

df_expends527 = create_dataframe('../../data/open_secrets/527/expends527.csv', columns_expends527)

Creating dataframe...
dataframe created...


In [33]:
df_expends527.head()

Unnamed: 0,rpt,formid,schbid,orgname,ein,recipient,recipientcrp,amount,date,expcode,source,purpose,addr1,addr2,city,state,zip,employer,occupation
0,Q210,9595787,2016057,Republican State Leadership Cmte,50532524,VERIZON,Verizon Communications,125,2010-04-16,A70,@new,TELEPHONE,PO BOX 660720,,DALLAS,TX,75266,,
1,Q210,9595787,2016059,Republican State Leadership Cmte,50532524,VERIZON WIRELESS,Verizon Wireless,141,2010-04-09,A70,@new,CELL PHONE,PO BOX 25505,,LEHIGH VALLEY,PA,18002,,
2,Q210,9595791,2016223,GOPAC,521237780,ADP,Automatic Data Processing Inc,414,2010-04-09,W10,@new,PAYROLL TAXES,8094 SAND PIPER CIRCLE,,WHITE MARSH,MD,21236,,
3,Q210,9595791,2016225,GOPAC,521237780,ADP,Automatic Data Processing Inc,78,2010-04-23,W10,@new,PAYROLL SERVICES,8094 SAND PIPER CIRCLE,,WHITE MARSH,MD,21236,,
4,Q210,9595791,2016228,GOPAC,521237780,AMERICAN EXPRESS,American Express,1,2010-04-26,U00,tempU,AMEX PYMT $6634.00 DETAIL FOLLOWS,P.O. BOX 1270,,NEWARK,NJ,7101,,


In [34]:
# OpenSecrets Data Dictionary for Expenditure Data - from FEC electronic filings
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20Expenditures.htm
columns_expends22 = ['cycle', 'id', 'transid', 'crpfilerid', 
                     'recipcode', 'pacshort', 'crprecipname', 
                     'expcode', 'amount', 'date', 'city', 'state', 
                     'zip', 'cmteid_ef', 'candid', 'type',
                     'descrip', 'pg', 'elecother', 'enttype',
                     'source']

df_expends22 = create_dataframe('../../data/open_secrets/Expend22/expends22.csv', columns_expends22, nrows=1000000)

Creating dataframe...
dataframe created...


In [35]:
df_expends22.head()

Unnamed: 0,cycle,id,transid,crpfilerid,recipcode,pacshort,crprecipname,expcode,amount,date,city,state,zip,cmteid_ef,candid,type,descrip,pg,elecother,enttype,source
0,2022,1,500252553,C00143230,DP,New York State Democratic Cmte,ActBlue,F40,1,2021-05-01,Somerville,MA,21443132,,,,Credit Card Processing Fee,,,ORG,@auto
1,2022,2,500252556,C00143230,DP,New York State Democratic Cmte,ActBlue,F40,10,2021-05-02,Somerville,MA,21443132,,,,Credit Card Processing Fee,,,ORG,@auto
2,2022,3,500252557,C00143230,DP,New York State Democratic Cmte,ActBlue,F40,4,2021-05-02,Somerville,MA,21443132,,,,Credit Card Processing Fee,,,ORG,@auto
3,2022,4,500252608,C00143230,DP,New York State Democratic Cmte,ActBlue,F40,6,2021-05-09,Somerville,MA,21443132,,,,Credit Card Processing Fee,,,ORG,@auto
4,2022,5,500252609,C00143230,DP,New York State Democratic Cmte,ActBlue,F40,1,2021-05-16,Somerville,MA,21443132,,,,Credit Card Processing Fee,,,ORG,@auto


In [36]:
install_if_needed('xlrd')
import xlrd

xlrd is already installed.


In [37]:
# Candidate ids
# This dataset is very different, so load it independently.
columns_crp_ids = ['blank_excel_column', 'cid', 'crpname', 'party', 'distidrunfor', 'feccandid'] # Blank excel column is necessary.
columns_crp_ids = dict(enumerate(columns_crp_ids))
df_crp_ids = pd.read_excel('../../data/open_secrets/CRP_IDs.xls', nrows=10000, header=None, skiprows=15)
df_crp_ids = df_crp_ids.drop(df_crp_ids.columns[0], axis=1)
df_crp_ids = df_crp_ids.rename(columns=columns_crp_ids)

In [38]:
df_crp_ids.head()

Unnamed: 0,cid,crpname,party,distidrunfor,feccandid
0,N00034296,"Aalders, Tim",R,UT03,H2UT03280
1,N00047923,"Aazami, Shervin",D,CA32,H2CA30291
2,N00051397,"Abahsain, Jill",D,MN07,H2MN07162
3,N00051715,"Abbott, Martha",3,VTS2,S2VT00359
4,N00048268,"Abdelhamid, Rana",D,NY12,H2NY12197
