# SI 608 Project – Workspace #
#### General scratchpad workspace that preloads all the dataframes. ####
See <code>./modules</code> to review how libraries are installed and imported, as well as where the data is loaded, cleaned, and formatted. This is only here as a helpful tool, make a copy and do whatever you'd like. Or don't use this at all if that's preferable.

[OpenSecrets Data Dictionary Index](../../docs/open_source_data_dictionary.md)
<br><small><em>(View the index with markdown preview)</em></small>

### Initialize
Init file contains helper functions used throughout the project.

In [None]:
%run modules/init.ipynb

#### Load datasets ####

This module provides a single function for all of the *contribution* data from OpenSecrets.

In [None]:
%run modules/data.ipynb

In [None]:
# OpenSecrets Data Definition: 527 Committees
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20527%20Cmtes.htm
columns_cmtes527 = ['cycle', 'rpt', 'ein', 'crp527name', 'affiliate', 'ultorg', 
                    'recipcode', 'cmteid', 'cid', 'eccmteid', 'party', 
                    'primcode', 'source', 'ffreq', 'ctype', 'csource', 'viewpt',
                    'comments', 'state']

df_cmtes527 = create_dataframe('../../data/open_secrets/527/cmtes527.csv', columns_cmtes527)

In [None]:
df_cmtes527.head(3)

In [None]:
# OpenSecrets Data Dictionary 527 Expenditure Data - from IRS Form 8872B
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20527%20Expenditures.htm
columns_expends527 = ['rpt', 'formid', 'schbid', 'orgname', 'ein', 'recipient', 
                    'recipientcrp', 'amount', 'date', 'expcode', 'source', 
                    'purpose', 'addr1', 'addr2', 'city', 'state', 'zip',
                    'employer', 'occupation']

df_expends527 = create_dataframe('../../data/open_secrets/527/expends527.csv', columns_expends527)

In [None]:
df_expends527.head(3)

In [None]:
# OpenSecrets Data Dictionary 527 Contribution Data - from IRS Form 8872A
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20527%20Receipts.htm
columns_rcpts527 = ['id', 'rpt', 'formid', 'schaid', 'contribid', 'contrib', 
                    'amount', 'date', 'orgname', 'ultorg', 'realcode', 
                    'recipid', 'recipcode', 'party', 'recipient', 'city', 'state',
                    'zip', 'zip4', 'pmsa', 'employer', 'occupation', 'ytd', 'gender', 'source']

df_rcpts527 = create_dataframe('../../data/open_secrets/527/rcpts527.csv', columns_rcpts527)

In [None]:
df_rcpts527.head(3)

In [None]:
# OpenSecrets Data Definition: Candidates
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20Candidates%20Data.htm
columns_cands22 = ['cycle', 'feccandid', 'cid', 'firstlastp', 'party', 'distidrunfor', 
                    'distidcurr', 'currcand', 'cyclecand', 'crpico', 'recipcode', 
                    'nopacs']

df_cands22 = create_dataframe('../../data/open_secrets/CampaignFin22/cands22.csv', columns_cands22)

# Remove party labels from names: '3', 'R', 'D', 'I', 'L', 'U', 'i'
df_cands22['firstlast_cands22'] = df_cands22['firstlastp_cands22'].apply(
    lambda x: x.replace(" (3)", "").replace(" (R)", "").replace(" (D)", "").replace(" (I)", "").replace(" (L)", "").replace(" (U)", "").replace(" (i)", "") if isinstance(x, str) else x
)

In [None]:
df_cands22.head(3)

In [None]:
# OpenSecrets Table Definition: Committee table
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20for%20Cmtes.htm
columns_cmtes22 = ['cycle', 'cmteid', 'pacshort', 'affiliate', 'ultorg', 'recipid', 
                    'recipcode', 'feccandid', 'party', 'primcode', 'source', 'sensitive',
                    'foreign', 'active']

df_cmtes22 = create_dataframe('../../data/open_secrets/CampaignFin22/cmtes22.csv', columns_cmtes22)

In [None]:
# df_cmtes22.head(3)

In [None]:
# Candidate lead pac committees.
df_lead_cmtes22 = df_cmtes22[(df_cmtes22['cmteid_cmtes22'] != df_cmtes22['recipid_cmtes22']) & (df_cmtes22['recipid_cmtes22'].str.startswith('N'))]

In [None]:
df_lead_cmtes22.head(3)

In [None]:
# Non-lead pac committees
df_pac_cmtes22 = df_cmtes22[(df_cmtes22['cmteid_cmtes22'] == df_cmtes22['recipid_cmtes22']) & (df_cmtes22['recipid_cmtes22'].str.startswith('C'))]

In [None]:
df_pac_cmtes22.head(3)

In [None]:
# OpenSecrets Data Definition for PAC to PAC Data (Pac_other table)
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20PAC%20to%20PAC%20Data.htm
columns_pac_other22 = ['cycle', 'fecrecno', 'filerid', 'donorcmte', 'contriblendtrans', 'city', 'state', 
                            'zip', 'fecoccemp', 'primcode', 'date', 'amount', 'recipid', 'party', 'otherid',
                            'recipcode', 'recipprimcode', 'amend', 'report', 'pg', 'microfilm', 'type',
                            'realcode', 'source']

df_pac_other22 = create_dataframe('../../data/open_secrets/CampaignFin22/pac_other22.csv', columns_pac_other22)

In [None]:
# Identify the donor pacid.
# The "filerid" is the donor if "type" starts with "1" and "otherid" if "type" starts with "2".
df_pac_other22['donorid_pac_other22'] = df_pac_other22.apply(
    lambda row: row['otherid_pac_other22'] if row['type_pac_other22'].startswith('1') 
    else (row['filerid_pac_other22'] if row['type_pac_other22'].startswith('2') else None),
    axis=1
)

In [None]:
# df_pac_other22.head(3)

In [None]:
# Notice that candidates are never filers in pac_other22.
df_pac_other22[df_pac_other22['filerid_pac_other22'].str.startswith('N', na=False)]

In [None]:
df_pac_to_pac = df_pac_other22[~ df_pac_other22['recipid_pac_other22'].str.startswith('N', na=False)]

In [None]:
df_pac_to_pac.head(3)

In [None]:
# Flows from pacs to candidates
df_pac_to_cand = df_pac_other22[df_pac_other22['recipid_pac_other22'].str.startswith('N', na=False)]

In [None]:
df_pac_to_cand.head(3)

In [None]:
# OpenSecrets Data Definition: PAC table (PACs to Candidates)
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20for%20PAC%20to%20Cands%20Data.htm
# "pacid" who represents "realcode" (industry or ideology) "di" (directly or indirectly) contributes "amount" to to "cid".
columns_pacs22 = ['cycle', 'fecrecno', 'pacid', 'cid', 'amount', 'date', 'realcode', 
                            'type', 'di', 'feccandid']

df_pacs22 = create_dataframe('../../data/open_secrets/CampaignFin22/pacs22.csv', columns_pacs22)

In [None]:
df_pacs22.head(3)

In [None]:
#df_pacs22 get all non-leads giving to cids

In [None]:
# OpenSecrets Data Definition: Individual Contribution Data
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20for%20Individual%20Contribution%20Data.htm
columns_indivs22 = ['cycle', 'fectransid', 'contribid', 'contrib_last', 'contrib_first', 'recipid', 'orgname', 
                    'ultorg', 'realcode', 'date', 'amount', 'street', 'city', 'state',
                    'zip', 'recipcode', 'type', 'cmteid', 'otherid', 'gender', 'microfilm',
                    'occupation', 'employer', 'source']

# This dataset is huge, and crashes my computer. For now, limiting to 5000000 rows
# until a better way is found to trim off this dataset to only the last cycle or two
df_indivs22 = create_dataframe('../../data/open_secrets/CampaignFin22/indivs22.csv', columns_indivs22, nrows=5000000)

In [None]:
df_indivs22.head(3)

In [None]:
# OpenSecrets Data Dictionary for Expenditure Data - from FEC electronic filings
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20Expenditures.htm
columns_expends22 = ['cycle', 'id', 'transid', 'crpfilerid', 
                     'recipcode', 'pacshort', 'crprecipname', 
                     'expcode', 'amount', 'date', 'city', 'state', 
                     'zip', 'cmteid_ef', 'candid', 'type',
                     'descrip', 'pg', 'elecother', 'enttype',
                     'source']

df_expends22 = create_dataframe('../../data/open_secrets/Expend22/expends22.csv', columns_expends22, nrows=1000000)

In [None]:
df_expends22.head(3)

In [None]:
# OpenSecrets Data Definition for Lobbying Data: Lobby agencies
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20lob_agency.htm
columns_lob_agency = ['uniqid', 'agencyid', 'agency']

df_lob_agency = create_dataframe('../../data/open_secrets/Lobby/lob_agency.csv', columns_lob_agency)

In [None]:
df_lob_agency.head(3)

In [None]:
# OpenSecrets Data Definition for Lobbying Data: Lobby bills
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20lob_bills.htm
columns_lob_bills = ['b_id', 'si_id', 'congno', 'bill_name']

df_lob_bills = create_dataframe('../../data/open_secrets/Lobby/lob_bills.csv', columns_lob_bills)

In [None]:
df_lob_bills.head(3)

In [None]:
# OpenSecrets Data Definition for Lobbying Data: Lobby industries
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20lob_indus.htm
columns_lob_indus = ['client', 'sub', 'total', 'year', 'catcode']

df_lob_indus = create_dataframe('../../data/open_secrets/Lobby/lob_indus.csv', columns_lob_indus)

In [None]:
df_lob_indus.head(3)

In [None]:
# OpenSecrets Data Definition for Lobbying Data: Lobby issues
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20lob_issues.htm
columns_lob_issue = ['si_id', 'uniqid', 'issueid', 'issue', 'specificissue', 5, 6, 7, 8, 9, 10, 'year']

df_lob_issue = create_dataframe('../../data/open_secrets/Lobby/lob_issue.csv', columns_lob_issue)

In [None]:
df_lob_issue.head(3)

In [None]:
# OpenSecrets Data Definition for Lobbying Data: Lobby issues (no specific issue)
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20lob_issues.htm
columns_lob_issue_no_specific = ['si_id', 'uniqid', 'issueid', 'issue', 'year']

df_lob_issue_no_specific = create_dataframe('../../data/open_secrets/Lobby/lob_issue_NoSpecficIssue.csv', columns_lob_issue_no_specific)

In [None]:
df_lob_issue_no_specific.head(3)

In [None]:
# OpenSecrets Data Definitions for Lobbying Data: Lobbying
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20lob_lobbying.htm
columns_lob_lobbying = ['uniqid','registrant_raw','registrant','isfirm','client_raw','client','ultorg','amount',
                        'catcode','source','self','includensfs','use',
                       'ind', 'year', 'type', 'typelong', 'affiliate']

df_lob_lobbying = create_dataframe('../../data/open_secrets/Lobby/lob_lobbying.csv', columns_lob_lobbying, nrows=1000000)

In [None]:
df_lob_lobbying.head(3)

In [None]:
df_lob_lobbying.head(3)

In [None]:
# OpenSecrets Data Definition for Lobbyists
# https://www.opensecrets.org/resources/datadictionary/Data%20Dictionary%20lob_lobbyists.htm
columns_lob_lobbyist = ['uniqid', 'lobbyist_lastname_std', 'lobbyist_firstname_std', 'lobbyist_lastname_raw', 
                     'lobbyist_firstname_raw', 'lobbyist_id', 'year', 'officialposition', 'cid', 'formercongmem']

df_lob_lobbyist = create_dataframe('../../data/open_secrets/Lobby/lob_lobbyist.csv', columns_lob_lobbyist, nrows=1000000)

In [None]:
df_lob_lobbyist.head(3)

In [None]:
# OpenSecrets Data Definitions for Lobbying Data: Report types
# No documentation provided on OpenSecrets.com
columns_lob_rpt = ['typelong', 'typeshort']

df_lob_rpt = create_dataframe('../../data/open_secrets/Lobby/lob_rpt.csv', columns_lob_rpt)

In [None]:
df_lob_rpt.head(3)

In [None]:
install_if_needed('xlrd')
import xlrd

In [None]:
# Candidate ids
# This dataset is very different, so load it independently.
columns_crp_ids = ['blank_excel_column', 'cid', 'crpname', 'party', 'distidrunfor', 'feccandid'] # Blank excel column is necessary.
columns_crp_ids = dict(enumerate(columns_crp_ids))
df_crp_ids = pd.read_excel('../../data/open_secrets/CRP_IDs.xls', header=None, skiprows=15)
df_crp_ids = df_crp_ids.drop(df_crp_ids.columns[0], axis=1)
df_crp_ids = df_crp_ids.rename(columns=columns_crp_ids)

In [None]:
df_crp_ids.head(3)

In [None]:
from io import StringIO
crp_filepath = '../../data/open_secrets/CRP_Categories.txt'
with open(crp_filepath, 'r') as file:
    lines = file.readlines()

header_line_index = next(i for i, line in enumerate(lines) if line.startswith('Catcode'))
table_data = ''.join(lines[header_line_index:])
df_crp_cats = pd.read_csv(StringIO(table_data), sep='\t')
df_crp_cats.columns = df_crp_cats.columns.str.lower().str.replace(' ', '_')

In [None]:
df_crp_cats.head(3)

---

#### Sanity checks ####

One-by-one tests for the U.S. House Ways & Means committee members:

In [None]:
other_pacid = 'C00000422' # American Medical Association

In [None]:
member_name = 'Adrian Smith'
member_cid = 'N00027623' # Candidate ID
member_pid = 'C00412890' # Lead PAC ID

In [None]:
member_name2 = 'Jodey Arrington'
member_cid2 = 'N00038285'
member_pid2 = 'C00588657'

In [None]:
# member_name = 'Don Beyer'
# member_cid = 'N00036018'

In [None]:
# member_name = 'Earl Blumenauer'
# member_cid = 'N00007727'

In [None]:
# member_name = 'Vernon Buchanan'
# member_cid = 'N00027626'

In [None]:
# member_name = 'Mike Kelly'
# member_cid = 'N00031647'

In [None]:
# Member info
print(f"MEMBER INFO: {member_name}, CID: {member_cid}, PID: {member_pid}")
print("------------------------------------------------------------------------")
print("DATASET: pacs22")
display(df_pacs22[df_pacs22['pacid_pacs22'] == member_pid].sample())
print("------------------------------------------------------------------------")
print("DATASET: cmtes22")
display(df_cmtes22[df_cmtes22['recipid_cmtes22'] == member_cid].sample())

In [None]:
# Sanity check.
df_pacs22[df_pacs22['cid_pacs22'] == member_cid]['amount_pacs22'].sum()

In [None]:
# Sanity check.
df_pac_other22[df_pac_other22['recipid_pac_other22'] == member_cid]['amount_pac_other22'].sum()

In [None]:
# Sanity check.
df_indivs22[df_indivs22['recipid_indivs22'] == member_cid]['amount_indivs22'].sum()

In [None]:
# Final check.
df_pacs22[df_pacs22['cid_pacs22'] == member_cid]['amount_pacs22'].sum() \
    + df_pac_other22[df_pac_other22['recipid_pac_other22'] == member_cid]['amount_pac_other22'].sum() \
    + df_indivs22[df_indivs22['recipid_indivs22'] == member_cid]['amount_indivs22'].sum()

In [None]:
# # Save csv
# df_pacs22[df_pacs22['cid_pacs22'] == member_cid].sort_values(by='amount_pacs22', ascending=False).to_csv('pacs22_' + member_cid + '.csv')
# df_pac_other22[df_pac_other22['recipid_pac_other22'] == member_cid].sort_values(by='amount_pac_other22', ascending=False).to_csv('pac-other22_' + member_cid + '.csv')
# df_indivs22[df_indivs22['recipid_indivs22'] == member_cid].sort_values(by='amount_indivs22', ascending=False).to_csv('indivs22_' + member_cid + '.csv')

In [None]:
# # Preview
# display(df_pacs22[df_pacs22['cid_pacs22'] == member_cid].head(3))
# display(df_pac_other22[df_pac_other22['recipid_pac_other22'] == member_cid].head(3))
# display(df_indivs22[df_indivs22['recipid_indivs22'] == member_cid].head(3))

---

### Ways and Means ###

#### Member Details ####

In [None]:
wm_dems = [
    'Richard Neal',
    'Brad Schneider',
    'Jimmy Panetta',
    'Bill Pascrell',
    'Linda Sánchez',
    'Terri Sewell',
    'Gwen Moore',
    'Tom Suozzi',
    'Mike Thompson',
    'John Larson',
    'Stephanie Murphy',
    'Donald Beyer',
    'Earl Blumenauer',
    'Brendan Boyle',
    'Judy Chu',
    'Danny Davis',
    'Suzan DelBene',
    'Lloyd Doggett',
    'Dwight Evans',
    'Jimmy Gomez',
    'Brian Higgins',
    'Steven Horsford',
    'Dan Kildee',
    'Ronald Kind'
]

df_wm_dem_members = pd.DataFrame(wm_dems, columns=['firstlastp'])
df_wm_dem = pd.merge(df_cands22, df_wm_dem_members, left_on='firstlast_cands22', right_on='firstlastp', how='inner')

In [None]:
print(f"len: {len(df_wm_dem)}")
df_wm_dem.head(3)

In [None]:
wm_rep = [
    'Jason Smith', 
    'Vern Buchanan',
    'Kevin Hern',
    'Lloyd Smucker',
    'Mike Kelly',
    'Nicole Malliotakis',
    'Randy Feenstra',
    'Jodey Arrington',
    'Greg Steube',
    'Ron Estes',
    'Adrian Smith',
    'Beth Van Duyne',
    'Carol Miller',
    'Michelle Fischbach',
    'Michelle Steel',
    'Mike Carey',
    'Blake Moore',
    'Brad Wenstrup',
    'Brian Fitzpatrick',
    'Claudia Tenney',
    'Darin LaHood',
    'David Kustoff',
    'David Schweikert',
    'Drew Ferguson',
    'Gregory Murphy'
]

df_wm_rep_members = pd.DataFrame(wm_rep, columns=['firstlastp'])
df_wm_rep = pd.merge(df_cands22, df_wm_rep_members, left_on='firstlast_cands22', right_on='firstlastp', how='inner')

In [None]:
print(f"len: {len(df_wm_rep)}")
df_wm_rep.head(3)

In [None]:
df_wm_members = pd.concat([df_wm_dem, df_wm_rep])
df_wm_members = df_wm_members.reset_index(drop=True)

In [None]:
# df_wm_members = df_wm_members.rename(columns={'feccandid_cands22': 'feccandid_member', 'cid_cands22': 'cid_member',
#                               'firstlast_cands22': 'firstlast_member', 'party_cands22': 'party_member',
#                               'currcand_cands22': 'currcand_member', 'cyclecand_cands22': 'cyclecand_member',
#                               'crpico_cands22': 'crpico_member', 'recipcode_cands22': 'recipcode_member'})

df_wm_members = df_wm_members.drop(['cycle_cands22', 'firstlastp_cands22', 'distidrunfor_cands22', 'distidcurr_cands22', 'nopacs_cands22'], axis=1)

In [None]:
print(f"len: {len(df_wm_members)}")
df_wm_members.head(3)

#### Members' Leadpac Details ####

In [None]:
# Note: candidates may have multiple committees for reconciling accounts across cycles.
df_wm_leadpacs = pd.merge(df_wm_members, df_cmtes22, left_on='cid_cands22', right_on='recipid_cmtes22', how='inner')
df_wm_leadpacs = df_wm_leadpacs.rename(columns={'cmteid_cmtes22': 'cmteid_cmtes22lead', 'pacshort_cmtes22': 'pacshort_cmtes22lead',
                                                'affiliate_cmtes22': 'affiliate_cmtes22lead', 'ultorg_cmtes22': 'ultorg_cmtes22lead',
                                                'recipid_cmtes22': 'recipid_cmtes22lead', 'primcode_cmtes22': 'primcode_cmtes22lead',
                                                'active_cmtes22': 'active_cmtes22lead'})
# df_wm_leadpacs = pd.merge(df_wm_members, df_cmtes22, left_on='cid_member', right_on='recipid_cmtes22', how='inner')
# df_wm_leadpacs = df_wm_leadpacs.rename(columns={'cmteid_cmtes22': 'cmteid_leadpac', 'pacshort_cmtes22': 'pacshort_leadpac',
#                                                 'affiliate_cmtes22': 'affiliate_leadpac', 'ultorg_cmtes22': 'ultorg_leadpac',
#                                                 'recipid_cmtes22': 'recipid_leadpac', 'primcode_cmtes22': 'primcode_leadpac',
#                                                 'active_cmtes22': 'active_leadpac'})

df_wm_leadpacs = df_wm_leadpacs.drop(['cycle_cmtes22', 'recipcode_cmtes22', 'feccandid_cmtes22', 'party_cmtes22',
                                      'source_cmtes22', 'sensitive_cmtes22', 'foreign_cmtes22'], axis=1)

In [None]:
print(f"len: {len(df_wm_leadpacs)}")
df_wm_leadpacs.head(3)

#### Members' leadpacs' revenues ####
(Lacks identifying details)

In [None]:
# Link all transactions to candidate pacs.
# Get revenue only, filter out expenditures
# THIS MAY DISTORT THINGS! LOOK INTO THIS LATER.
df_pacs22_revenues = df_pacs22[df_pacs22['amount_pacs22'] > 0]

In [None]:
df_pacs22_revenues.head(3)

In [None]:
# Keep the "di" field, for now track both direct and indirect income:
# https://raw.githubusercontent.com/nketchum/si608exploration/refs/heads/main/docs/direct_vs_indirect_contrib.txt
df_wm_revenues = pd.merge(df_wm_leadpacs, df_pacs22_revenues, left_on='cid_cands22', right_on='cid_pacs22', how='inner')
# df_wm_revenues = pd.merge(df_wm_leadpacs, df_pacs22_revenues, left_on='cid_member', right_on='cid_pacs22', how='inner')
# df_wm_revenues = df_wm_revenues.rename(columns={'fecrecno_pacs22': 'fecrecno_revenue', 'pacid_pacs22': 'pacid_revenue',
#                                                 'amount_pacs22': 'amount_revenue', 'date_pacs22': 'date_revenue',
#                                                 'realcode_pacs22': 'realcode_revenue', 'type_pacs22': 'type_revenue',
#                                                 'di_pacs22': 'di_revenue', 'feccandid_pacs22': 'feccandid_revenue'})

df_wm_revenues = df_wm_revenues.drop(['cycle_pacs22', 'cid_pacs22'], axis=1)

In [None]:
df_wm_revenues.head(3)

#### Members' leadpacs' transactions ####

(Money flows + details of money flow)

In [None]:
df_wm_transactions = pd.merge(df_wm_revenues, df_cmtes22, left_on='pacid_pacs22', right_on='cmteid_cmtes22', how='inner')
# df_wm_transactions = pd.merge(df_wm_revenues, df_cmtes22, left_on='pacid_revenue', right_on='cmteid_cmtes22', how='inner')
# df_wm_transactions = df_wm_transactions.rename(columns={'cmteid_cmtes22': 'cmteid_transaction', 'pacshort_cmtes22': 'pacshort_transaction', 
#                                                         'affiliate_cmtes22': 'affiliate_transaction', 'ultorg_cmtes22': 'ultorg_transaction', 
#                                                         'recipid_cmtes22': 'recipid_transaction', 'recipcode_cmtes22': 'recipcode_transaction', 
#                                                         'feccandid_cmtes22': 'feccandid_transaction', 'party_cmtes22': 'party_transaction', 
#                                                         'source_cmtes22': 'source_transaction'})

df_wm_transactions = df_wm_transactions.drop(['cycle_cmtes22', 'primcode_cmtes22', 'sensitive_cmtes22', 'foreign_cmtes22', 'active_cmtes22'], axis=1)

In [None]:
df_wm_transactions.to_csv('outputs/df_wm_transactions.csv')
df_wm_transactions.head(3)

---

### Oh Shit ###

In [None]:
df_pacs22

In [None]:
df_pac_other22[df_pac_other22['recipid_pac_other22'] == 'N00027623']

---

#### Empircal Test ####

In [None]:
df_wm_transactions.sample()

In [None]:
# Non-sandboxed test.
# Confirm that Adrian Smith, in particular, has received four contributions from the American Medical Association, in particular.
df_wm_transactions[(df_wm_transactions['cid_cands22'] == 'N00027623') & (df_wm_transactions['cmteid_cmtes22'] == 'C00000422')]

---

### Toy Tests ###
Do not delete anything in this section.

In [None]:
# # Entities that has given to Adrian.
# df_pacs22_revenues[df_pacs22_revenues['cid_pacs22'] == 'N00027623'].head(3)

In [None]:
# # Taking on of the "pacid_pacs22" from above to see the giver.
# df_cmtes22[df_cmtes22['cmteid_cmtes22'] == 'C00456335'].head(5)

In [None]:
# # SANDBOX TESTING.
# # Test two members on two donors.

# # 1. Select two or three members and their lead pacs.
# df_test_leadpacs = df_wm_leadpacs[(df_wm_leadpacs['cid_cands22'] == member_cid) | (df_wm_leadpacs['cid_cands22'] == member_cid2)]
# df_test_leadpacs.to_csv('outputs/df_test_leadpacs.csv')
# df_test_leadpacs.head(3)

# # 2. Attach each individual revenue-producing transactions to each associated member.
# df_test_revenues = pd.merge(df_test_leadpacs, df_pacs22_revenues, left_on='cid_cands22', right_on='cid_pacs22', how='inner')
# df_test_revenues.to_csv('outputs/df_test_revenues.csv')
# df_test_revenues.sample(5)

# # 3. Select two non-lead-pac committees to identify transactions, which are 
# #    the American Medical Assocation C00000422 and Physicians Mutual Insurance C00456335
# df_test_cmtes22 = df_cmtes22[(df_cmtes22['cmteid_cmtes22'] == 'C00000422') | (df_cmtes22['cmteid_cmtes22'] == 'C00456335')]
# df_test_cmtes22.to_csv('outputs/df_test_cmtes22.csv')
# df_test_cmtes22.head(3) 

# # 4. Expand transactions by adding committee details to each one.
# df_test_comb = pd.merge(df_test_revenues, df_test_cmtes22, left_on='pacid_pacs22', right_on='cmteid_cmtes22', how='inner')
# df_test_comb.to_csv('outputs/df_test_comb.csv')
# df_test_comb.head(3)

---

### Experiments ###

In [None]:
# 1. Given our member, we follow donations like this one. 
# pacid, a member of realcode, gives amount di ([in]directly) to cid
df_pacs22[(df_pacs22['cid_pacs22'] == member_cid) & (df_pacs22['pacid_pacs22'] == other_pacid)]

In [None]:
# 2. We find out who the donor is.
# cmteid, aka afilliate of ultorg, 
df_cmtes22[df_cmtes22['cmteid_cmtes22'] == 'C00000422']

In [None]:
# df_cmtes22[df_cmtes22['cmteid_cmtes22'] != df_cmtes22['recipid_cmtes22']]

In [None]:
# len(df_cmtes22)

In [None]:
# df_cmtes22[(df_cmtes22['cmteid_cmtes22'] == df_cmtes22['recipid_cmtes22']) & (df_cmtes22['recipid_cmtes22'].str.startswith('N'))]

In [None]:
# df_cmtes22[(df_cmtes22['cmteid_cmtes22'] != df_cmtes22['recipid_cmtes22']) & (df_cmtes22['recipid_cmtes22'].str.startswith('N'))]

In [None]:
# df_cmtes22[(df_cmtes22['cmteid_cmtes22'] == df_cmtes22['recipid_cmtes22']) & (df_cmtes22['recipid_cmtes22'].str.startswith('C'))]

In [None]:
# # 3. Now we their ideology
# df_crp_cats[df_crp_cats['catcode'] == 'H1100']