[View in Colaboratory](https://colab.research.google.com/github/spongeclipper/korruption/blob/master/PAC2PAC.ipynb)

In [59]:
#@title Download the pac2pac donations list for 2017-2018.
import collections, datetime, os, urllib, zipfile

# Download the file, and unzip it.
urllib.request.urlretrieve('https://www.fec.gov/files/bulk-downloads/2018/oth18.zip', 'oth18.zip')
with zipfile.ZipFile('oth18.zip') as zip_ref:
  zip_ref.extractall('.')

# Populate 'donations' with the list of transactions.  
Donation = collections.namedtuple('Donation',
                                  ['Recipient',
                                   'Donor',
                                   'Amount',
                                   'Election',
                                   'Date',
                                  ])
donations = []
# Populate 'pacs' with the list of PACs.
Pac = collections.namedtuple('Pac', ['Name', 'Zip', 'Employer', 'Occupation'])
pacs = {}  # Indexed by ID

# Parse that shit!
with open('itoth.txt', 'r') as raw:
  for l in raw.readlines():
    parts = l.strip().split('|')
    
    if not parts[15]:
      continue  # This is not a donation?
      
    
    d = Donation(
        Recipient=parts[0],
        Donor=parts[15],
        Amount=int(parts[14]),
        Election=parts[3],
        Date=datetime.datetime.strptime(parts[13], '%m%d%Y'),
    )
    donations.append(d)
    
    pacs[parts[15]] = Pac(
        Name=parts[7],
        Zip=parts[10],
        Employer=parts[11],
        Occupation=parts[12],
    )

print('Parsed %d donations and %d entities' % (len(donations), len(pacs)))
  

ValueError: ignored

In [0]:
def getName(id):
  if id in pacs:
    return pacs[id].Name
  return id

In [35]:
#@title Some basic stats
import operator

# Who's the biggest donor?
d_amts = collections.defaultdict(int)
r_amts = collections.defaultdict(int)
for d in donations:
  d_amts[d.Donor] += d.Amount
  r_amts[d.Recipient] += d.Amount
    
print('Top 10 donors')
sum_d_amts = [(getName(x[0]), x[1]) for x in sorted(d_amts.items(), key=operator.itemgetter(1))]
sum_d_amts.reverse()
for (n,amt) in sum_d_amts[0:10]:
  print(n,amt)
  
print('\nTop 10 recipients')
sum_r_amts = [(getName(x[0]), x[1]) for x in sorted(r_amts.items(), key=operator.itemgetter(1))]
sum_r_amts.reverse()
for (n,amt) in sum_r_amts[0:10]:
  print(n,amt)


Top 10 donors
NRCC-NATIONAL REPUBLICAN CONGRESSIONAL COMMITTEE 50323410
TEAM RYAN 46007785
NATIONAL REPUBLICAN CONGRESSIONAL COMMITTEE 33045433
DEMOCRATIC CONGRESSIONAL CAMPAIGN COMMITTEE 32961812
REPUBLICAN NATIONAL COMMITTEE 32312588
TRUMP MAKE AMERICA GREAT AGAIN COMMITTEE 30975445
DNC STATE PARTY VICTORY FUND 29853560
NATIONAL REPUBLICAN SENATORIAL COMMITTEE 24853451
DONALD J. TRUMP FOR PRESIDENT, INC. 24761794
RYAN FOR CONGRESS 18506281

Top 10 recipients
NRCC-NATIONAL REPUBLICAN CONGRESSIONAL COMMITTEE 99775476
DEMOCRATIC CONGRESSIONAL CAMPAIGN COMMITTEE 49528851
TEAM RYAN 49482693
REPUBLICAN NATIONAL COMMITTEE 34620706
DONALD J. TRUMP FOR PRESIDENT, INC. 31519295
DNC STATE PARTY VICTORY FUND 27915723
NATIONAL REPUBLICAN SENATORIAL COMMITTEE 26964070
TRUMP MAKE AMERICA GREAT AGAIN COMMITTEE 24497927
SENATE MAJORITY PAC 23779417
DEMOCRATIC SENATORIAL CAMPAIGN COMMITTEE 20874831


In [54]:
#@title Can we find "passthrough" PACs by looking for those with mostly-matched negative and positive value transactions?
pacTransactions = collections.defaultdict(list)
for d in donations:
  # Recipient records a balance, and donor a loss
  pacTransactions[d.Recipient].append(d.Amount)
  pacTransactions[d.Donor].append(0-d.Amount)
  
matchedVsUnmatched = collections.defaultdict(tuple)  # let's go through each PAC and count (matched, unmatched)
for pac, transactions in pacTransactions.items():
  matched = 0
  unmatched = 0
  while transactions:
    x = transactions[0]
    if 0-x in transactions[1:]:
      matched+=1
      ts = transactions[1:]
      idx = ts.index(0-x)
      ts = ts[:idx] + ts[idx+1:]
      transactions = ts
    else:
      unmatched+=1
      transactions = transactions[1:]
  matchedVsUnmatched[pac] = (matched, unmatched)

# Sort by matched - unmatched
s = sorted(matchedVsUnmatched.items(), key=lambda x: x[1][0]-x[1][1])
s.reverse()
[(pacs[k].Name, v) for k,v in s[:10]]

[('DNC STATE PARTY VICTORY FUND', (1920, 660)),
 ('NRCC-NATIONAL REPUBLICAN CONGRESSIONAL COMMITTEE', (1800, 584)),
 ('NATIONAL REPUBLICAN SENATORIAL COMMITTEE', (1078, 102)),
 ('DEMOCRATIC SENATORIAL CAMPAIGN COMMITTEE', (1328, 360)),
 ('DEMOCRATIC CONGRESSIONAL CAMPAIGN COMMITTEE', (1898, 1302)),
 ('HEARTLAND VALUES PAC', (435, 67)),
 ('NEW DEMOCRAT COALITION PAC', (531, 196)),
 ('ALAMO PAC', (385, 73)),
 ('TOMORROW IS MEANINGFUL PAC-TIM PAC', (324, 38)),
 ('PROJECT WEST PAC', (292, 23))]