# Donation and expense information for top-10 candidates by cash flow

In [1]:
import pyspark
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pyspark.sql import functions as F

%matplotlib inline

In [2]:
sc = pyspark.sql.SparkSession.Builder()\
        .appName('ron-desantis')\
        .master('yarn')\
        .config('spark.executor.instances', '1')\
        .config('spark.yarn.executor.memoryOverhead', '3g')\
        .config('spark.executor.memory', '9g')\
        .config('spark.executor.cores', '2')\
        .enableHiveSupport()\
        .getOrCreate()

In [3]:
sc

In [4]:
!whoami

matt


In [5]:
!pwd

/home/matt/eda


In [40]:
top10_expends_query = """
SELECT cands18.CID AS cid, cands18.firstlastp AS name, expends18.total_expenditure AS total_expenditure
FROM pq_crp_cands18 cands18
LEFT JOIN (
  SELECT CRPFilerid, SUM(Amount) AS total_expenditure
  FROM pq_crp_expends18
  GROUP BY (CRPFilerid)
) expends18
ON cands18.CID = expends18.CRPFilerid
ORDER BY expends18.total_expenditure DESC
LIMIT 10
"""
top10 = sc.sql(top10_expends_query)

In [41]:
top10.show()

+---------+--------------------+-----------------+
|      cid|                name|total_expenditure|
+---------+--------------------+-----------------+
|N00028836|Lee L Mercer Jr. (D)|            7.2E8|
|N00023864|    Donald Trump (R)|      2.3519454E7|
|N00040675|      Jon Ossoff (D)|      1.6784475E7|
|N00003535|   Sherrod Brown (D)|      1.2163302E7|
|N00024817|      Doug Jones (D)|      1.1181153E7|
|N00004357|       Paul Ryan (R)|        9922739.0|
|N00004367|   Tammy Baldwin (D)|        9875947.0|
|N00004367|   Tammy Baldwin (D)|        9875947.0|
|N00027605|      Jon Tester (D)|        9824317.0|
|N00039122|     David Trone (D)|        9533775.0|
+---------+--------------------+-----------------+



#### `pacs` type codes

In [42]:
typeDf = sc.sql("SELECT `type`, COUNT(*) AS c, SUM(amount) FROM pq_crp_pacs18 GROUP BY `type` ORDER BY c DESC")

In [43]:
typeDf.toPandas()

Unnamed: 0,type,c,sum(amount)
0,24K,176462,320375008.0
1,24E,6287,75545670.0
2,24A,5584,82391379.0
3,24Z,2089,1147853.0
4,24C,739,4768107.0
5,22Z,105,-248115.0
6,24F,43,443713.0
7,24N,6,37632.0
8,24R,2,2000.0
9,24P,1,-1000.0


#### `pac_other` type codes

In [37]:
typeDf = sc.sql("SELECT `type`, COUNT(*) AS c, SUM(amount) FROM pq_crp_pac_other18 GROUP BY `type` ORDER BY c DESC")

In [39]:
typeDf.toPandas()

Unnamed: 0,type,c,sum(amount)
0,24K,38465,245625685.0
1,18K,37001,215905735.0
2,18G,9114,251887171.0
3,24G,8539,276709701.0
4,18J,7190,30766899.0
5,16C,2979,133457268.0
6,20C,879,18120094.0
7,22Z,618,-2392657.0
8,31K,603,12337607.0
9,24Z,377,5198207.0


## Contributions to candidates by PACs and goodness

In [46]:
top10_pacs_query = """
SELECT cands18.CID AS cid, cands18.firstlastp AS name, pg.pac_good AS pac_good, pb.pac_bad AS pac_bad
FROM pq_crp_cands18 cands18
LEFT JOIN (
  SELECT CID, SUM(Amount) AS pac_good
  FROM pq_crp_pacs18
  WHERE `type` IN ('24C', '24E', '24F', '24K', '24Z')
  GROUP BY (CID)
) pg
ON cands18.cid = pg.CID
LEFT JOIN (
  SELECT CID, SUM(Amount) AS pac_bad
  FROM pq_crp_pacs18
  WHERE `type` IN ('24A', '24N')
  GROUP BY (CID)
) pb
ON cands18.CID = pb.CID
ORDER BY pac_good + pac_bad DESC
LIMIT 10
"""
top10_pacs = sc.sql(top10_pacs_query)

In [48]:
top10_pacs.toPandas()

Unnamed: 0,cid,name,pac_good,pac_bad
0,N00040675,Jon Ossoff (D),1886138.0,14508839.0
1,N00035477,Karen Handel (R),5027613.0,6515642.0
2,N00041027,Roy Moore (R),1953920.0,7628394.0
3,N00041870,Conor Lamb (D),1323364.0,7859513.0
4,N00023864,Donald Trump (R),7928749.0,189540.0
5,N00040607,Luther Strange (R),7690549.0,371010.0
6,N00024817,Doug Jones (D),4905033.0,1542913.0
7,N00040729,Robert E. Quist (D),872526.0,5408468.0
8,N00041623,Kevin Nicholson (R),6092596.0,28000.0
9,N00004367,Tammy Baldwin (D),2225950.0,3158411.0


## Contributions to PACs by some PAC and goodness

In [60]:
top10_pacpacs_query = """
SELECT cmtes18.cmteid AS cmteid,
  cmtes18.pacshort AS name,
  IF(ISNULL(r.amount_received), 0, r.amount_received) AS amount_received,
  IF(ISNULL(pg.amount_donated), 0, pg.amount_donated) AS amount_donated,
  IF(ISNULL(n.amount_national), 0, n.amount_national) AS amount_national,
  IF(ISNULL(d.amount_disbursement), 0, d.amount_disbursement) AS amount_disbursement
  
FROM pq_crp_cmtes18 cmtes18
LEFT JOIN (
  SELECT Filerid, SUM(Amount) AS amount_received
  FROM pq_crp_pac_other18
  WHERE `type` LIKE '1%'
  GROUP BY (Filerid)
) r
ON cmtes18.cmteid = r.Filerid
LEFT JOIN (
  SELECT Filerid, SUM(Amount) AS amount_donated
  FROM pq_crp_pac_other18
  WHERE `type` LIKE '2%'
  GROUP BY (Filerid)
) pg
ON cmtes18.cmteid = pg.Filerid
LEFT JOIN (
  SELECT Filerid, SUM(Amount) AS amount_national
  FROM pq_crp_pac_other18
  WHERE `type` LIKE '3%'
  GROUP BY (Filerid)
) n
ON cmtes18.cmteid = n.Filerid
LEFT JOIN (
  SELECT Filerid, SUM(Amount) AS amount_disbursement
  FROM pq_crp_pac_other18
  WHERE `type` LIKE '4%'
  GROUP BY (Filerid)
) d
ON cmtes18.cmteid = d.amount_disbursement
ORDER BY amount_received + amount_donated + amount_national + amount_disbursement DESC
LIMIT 10
"""
top10_pacpacs = sc.sql(top10_pacpacs_query)

In [61]:
top10_pacpacs.toPandas()

Unnamed: 0,cmteid,name,amount_received,amount_donated,amount_national,amount_disbursement
0,C00075820,National Republican Congressional Cmte,58739050.0,6506499.0,21758501.0,0.0
1,C00545947,Team Ryan,7384967.0,42093606.0,0.0,0.0
2,C00000935,Democratic Congressional Campaign Cmte,30700041.0,12266892.0,3749203.0,0.0
3,C00042366,Democratic Senatorial Campaign Cmte,14795782.0,12513082.0,4907125.0,0.0
4,C00580100,Donald J Trump for President,31162291.0,357004.0,0.0,0.0
5,C00003418,Republican National Cmte,9480589.0,14899492.0,6340990.0,0.0
6,C00010603,DNC Services Corp,7023488.0,17369013.0,3726591.0,0.0
7,C00027466,National Republican Senatorial Cmte,17125558.0,4275588.0,4824448.0,0.0
8,C00618371,Trump Make America Great Again Cmte,310553.0,24187374.0,0.0,0.0
9,C00330894,Ryan for Congress,12121787.0,7116393.0,0.0,0.0


## Individual contributions

In [74]:
top10_indivs_query = """
SELECT indivs.contribid AS indivs_contribid,
  indivs.contrib AS donor_name,
  indivs.recipid AS recipid,
  IF(ISNULL(cands.firstlastp), cmtes.pacshort, cands.firstlastp) AS recip_name,
  SUM(indivs.amount) AS total_amt
FROM pq_crp_indivs18 indivs
LEFT JOIN pq_crp_cands18 cands
ON indivs.recipid = cands.CID
LEFT JOIN pq_crp_cmtes18 cmtes
ON indivs.recipid = cmtes.cmteid
GROUP BY indivs_contribid, donor_name, indivs.recipid, recip_name
ORDER BY total_amt DESC
LIMIT 10
"""
top10_indivs = sc.sql(top10_indivs_query)

In [75]:
top10_indivs_pd = top10_indivs.toPandas()
top10_indivs_pd

Unnamed: 0,indivs_contribid,donor_name,recipid,recip_name,total_amt
0,U00000036521,"STEYER, THOMAS F",C00547349,NextGen Climate Action,29213731
1,,AMERICAN ACTION NETWORK,C00504530,Congressional Leadership Fund,15290970
2,,UNITED BROTHERHOOD OF CARPENTERS AND JOINERS,C00490847,Working for Working Americans,14387064
3,q0001186349,"TRONE, DAVID",N00039122,David Trone (D),10000000
4,,DEMOCRATIC GOVERNORS ASSOCIATION (DGA),C00503789,DGA Action,7175000
5,,REPUBLICAN GOVERNORS ASSOCIATION,C00490730,RGA Right Direction,7033827
6,U0000003039A,"WALL, KATHALEEN",N00042222,Kathaleen Wall (R),6169727
7,q0001114562,"TRONE, DAVID",N00039122,David Trone (D),6000000
8,U00000036901,"UIHLEIN, RICHARD",C00487470,Club for Growth Action,5500000
9,,MERRILL LYNCH,C00473918,Women Vote!,5402626


## For a single candidate - who's spending money for/against?

In [64]:
top10_pacs_pd = top10_pacs.toPandas()
top10_pacs_pd

Unnamed: 0,cid,name,pac_good,pac_bad
0,N00040675,Jon Ossoff (D),1886138.0,14508839.0
1,N00035477,Karen Handel (R),5027613.0,6515642.0
2,N00041027,Roy Moore (R),1953920.0,7628394.0
3,N00041870,Conor Lamb (D),1323364.0,7859513.0
4,N00023864,Donald Trump (R),7928749.0,189540.0
5,N00040607,Luther Strange (R),7690549.0,371010.0
6,N00024817,Doug Jones (D),4905033.0,1542913.0
7,N00040729,Robert E. Quist (D),872526.0,5408468.0
8,N00041623,Kevin Nicholson (R),6092596.0,28000.0
9,N00004367,Tammy Baldwin (D),2225950.0,3158411.0


In [65]:
topcid = top10_pacs_pd.iloc[0]['cid']

In [164]:
def get_pac_support_direct(cid, limit=10):
    """
    Get PAC contributions made directly to a candidate
    """
    
    pacs_query = f"""
    SELECT cmtes18.cmteid AS cmteid,
      cmtes18.pacshort AS cmte_name,
      IF(ISNULL(pg.pac_good), 0, pg.pac_good) AS pac_good,
      IF(ISNULL(pb.pac_bad), 0, pac_bad) AS pac_bad
    FROM pq_crp_cmtes18 cmtes18
    LEFT JOIN (
      SELECT pacid, SUM(Amount) AS pac_good
      FROM pq_crp_pacs18
      WHERE `type` IN ('24C', '24E', '24F', '24K', '24Z')
      AND cid = '{cid}'
      GROUP BY (pacid)
    ) pg
    ON cmtes18.cmteid = pg.pacid
    LEFT JOIN (
      SELECT pacid, SUM(Amount) AS pac_bad
      FROM pq_crp_pacs18
      WHERE `type` IN ('24A', '24N')
      AND cid = '{cid}'
      GROUP BY (pacid)
    ) pb
    ON cmtes18.cmteid = pb.pacid
    ORDER BY pac_good + pac_bad DESC
    LIMIT {limit}
    """
    pacs_df = sc.sql(pacs_query)
    return pacs_df.toPandas()

In [97]:
get_pac_support_direct('N00040675')

Unnamed: 0,cmteid,cmte_name,pac_good,pac_bad
0,C00504530,Congressional Leadership Fund,0.0,6196944.0
1,C00075820,National Republican Congressional Cmte,0.0,6015567.0
2,C90017302,America First Policies,0.0,1524990.0
3,C90005471,Planned Parenthood Action Fund,576632.0,0.0
4,C90013145,US Chamber of Commerce,0.0,500100.0
5,C00341396,Moveon.org,251010.0,0.0
6,C00495028,House Majority PAC,229909.0,0.0
7,C00003418,Republican National Cmte,0.0,112356.0
8,C00000935,Democratic Congressional Campaign Cmte,99064.0,0.0
9,C00646653,Engage Georgia,68431.0,0.0


In [163]:
def get_individual_support_direct(cid, limit=10):
    """
    Get individual contributions made to a candidate by CID
    """
    
    indiv_query = f"""
    SELECT indivs.contribid AS indivs_contribid,
      indivs.contrib AS donor_name,
      SUM(indivs.amount) AS total_amt
    FROM pq_crp_indivs18 indivs
    WHERE indivs.recipid = '{cid}'
    GROUP BY indivs_contribid, donor_name
    ORDER BY total_amt DESC
    LIMIT {limit}
    """
    indiv_df = sc.sql(indiv_query)
    return indiv_df.toPandas()

In [103]:
get_individual_support_direct('N00040675')

Unnamed: 0,indivs_contribid,donor_name,total_amt
0,q0000482557,"OSSOFF, T JONATHAN",50000
1,h3001645502,"ROSS, JOHANNA",27700
2,m0001258156,"BARKSDALE, KATHLEEN",16200
3,n0001598560,"SALTZ, SUSAN",15400
4,k0001403916,"LAMB, ALYSE",14575
5,U0000004158,"TAYLOR, DALE",14400
6,p00029358691,"CRYER, JON",13800
7,m00017704691,"MILNER, RENANNE",13738
8,i3003258296@,"FEINBERG, IRIS",13500
9,n0001440275,"BRICE, EDWARD",13400


In [172]:
def get_pacpac_breakdown(pacid, limit=10):
    """
    Get inter-PAC transfers involving this PAC (to/from specified in column headers)
    """
    
    pacpac_query = f"""
    SELECT cmtes18.cmteid AS cmteid,
      cmtes18.pacshort AS name,
      IF(ISNULL(r.amount_received), 0, r.amount_received) AS amount_received_from,
      IF(ISNULL(pg.amount_donated), 0, pg.amount_donated) AS costs_for,
      IF(ISNULL(pb.amount_donated), 0, pb.amount_donated) AS costs_against,
      IF(ISNULL(nd.national_contrib), 0, nd.national_contrib) AS national_contrib_to,
      IF(ISNULL(nf.national_contrib), 0, nf.national_contrib) AS national_contrib_from

    FROM pq_crp_cmtes18 cmtes18
    LEFT JOIN (
      SELECT filerid, otherid AS donor, SUM(amount) AS amount_received
      FROM pq_crp_pac_other18
      WHERE `type` LIKE '1%'
        AND filerid = '{pacid}'
      GROUP BY filerid, otherid
    ) r
    ON cmtes18.cmteid = r.donor
    LEFT JOIN (
      SELECT filerid AS donor, otherid, SUM(Amount) AS amount_donated
      FROM pq_crp_pac_other18
      WHERE `type` LIKE '2%'
        AND `type` NOT IN ('24A', '24N')
        AND otherid = '{pacid}'
      GROUP BY donor, otherid
    ) pg
    ON cmtes18.cmteid = pg.donor
    LEFT JOIN (
      SELECT filerid AS donor, otherid, SUM(Amount) AS amount_donated
      FROM pq_crp_pac_other18
      WHERE `type` IN ('24A', '24N')
        AND otherid = '{pacid}'
      GROUP BY donor, otherid
    ) pb
    ON cmtes18.cmteid = pb.donor
    LEFT JOIN (
      SELECT filerid, otherid AS donor, SUM(Amount) AS national_contrib
      FROM pq_crp_pac_other18
      WHERE `type` LIKE '3%'
        AND type NOT IN ('30K')
        AND otherid = '{pacid}'
      GROUP BY filerid, donor
    ) nd
    ON cmtes18.cmteid = nd.filerid
    LEFT JOIN (
      SELECT filerid, otherid AS donor, SUM(Amount) AS national_contrib
      FROM pq_crp_pac_other18
      WHERE `type` LIKE '3%'
        AND type NOT IN ('30K')
        AND filerid = '{pacid}'
      GROUP BY filerid, donor
    ) nf
    ON cmtes18.cmteid = nf.donor
    
    ORDER BY amount_received_from + costs_for + costs_against + national_contrib_to + national_contrib_from DESC
    LIMIT {limit}
    """
    
    pacpac_df = sc.sql(pacpac_query)
    return pacpac_df.toPandas()

In [156]:
get_pacpac_breakdown('C00504530')

Unnamed: 0,cmteid,name,amount_received_from,costs_for,costs_against,national_contrib_to,national_contrib_from
0,C00255752,American Society of Anesthesiologists,25000.0,75000.0,0.0,0.0,0.0
1,C00415752,Border Health,50000.0,50000.0,0.0,0.0,0.0
2,C00653154,American Economic Renewal,0.0,65000.0,0.0,0.0,0.0
3,C00435321,Nustar Energy,25000.0,25000.0,0.0,0.0,0.0
4,C00576199,Startups for America,0.0,22507.0,0.0,0.0,0.0
5,C00024968,American Optometric Assn,0.0,15000.0,0.0,0.0,0.0
6,C00276311,Blue Cross/Blue Shield of Nebraska,0.0,5000.0,0.0,0.0,0.0
7,C00000018,,0.0,0.0,0.0,0.0,0.0
8,C00000059,Hallmark Cards,0.0,0.0,0.0,0.0,0.0
9,C00000422,American Medical Assn,0.0,0.0,0.0,0.0,0.0


In [169]:
def get_pac_indiv_support(pacid, limit=10):
    indivpac_query = f"""
    SELECT contribid, contrib AS contributor, sum(amount) AS total_amt
    FROM pq_crp_indivs18
    WHERE recipid='{pacid}'
    GROUP BY contribid, contributor
    ORDER BY total_amt DESC
    LIMIT {limit}
    """
    indivpac_df = sc.sql(indivpac_query)
    return indivpac_df.toPandas()

In [170]:
get_pac_indiv_support('C00504530')

Unnamed: 0,contribid,contributor,total_amt
0,,AMERICAN ACTION NETWORK,15290970
1,U00000045761,"ANSARY, HUSHANG",1500000
2,,VALERO SERVICES INC,1500000
3,U00000000731,"FOSTER, PAUL L",1000000
4,U00000038291,"COHEN, STEVEN A",1000000
5,,CHEVRON POLICY GOVERNMENT & PUBLIC AFFAIRS,1000000
6,U00000036551,"GRIFFIN, KENNETH C MR",1000000
7,,HILLWOOD DEVELOPMENT COMPANY LLC,1000000
8,U00000033071,"MARCUS, BERNARD",754600
9,U00000036581,"JOHNSON, CHARLES B",600000


## Data dump: Case study for `'Jon Ossoff'`

In [187]:
candidate = 'Jon Ossoff'
cid = sc.sql(f"SELECT * FROM pq_crp_cands18 WHERE `firstlastp` LIKE '%{candidate}%'").toPandas()['cid'].iloc[0]

pac_support_direct = get_pac_support_direct(cid)
indiv_support_direct = get_individual_support_direct(cid)

In [189]:
indiv_support_direct

Unnamed: 0,indivs_contribid,donor_name,total_amt
0,q0000482557,"OSSOFF, T JONATHAN",50000
1,h3001645502,"ROSS, JOHANNA",27700
2,m0001258156,"BARKSDALE, KATHLEEN",16200
3,n0001598560,"SALTZ, SUSAN",15400
4,k0001403916,"LAMB, ALYSE",14575
5,U0000004158,"TAYLOR, DALE",14400
6,p00029358691,"CRYER, JON",13800
7,m00017704691,"MILNER, RENANNE",13738
8,i3003258296@,"FEINBERG, IRIS",13500
9,n0001440275,"BRICE, EDWARD",13400


In [190]:
pac_support_direct

Unnamed: 0,cmteid,cmte_name,pac_good,pac_bad
0,C00504530,Congressional Leadership Fund,0.0,6196944.0
1,C00075820,National Republican Congressional Cmte,0.0,6015567.0
2,C90017302,America First Policies,0.0,1524990.0
3,C90005471,Planned Parenthood Action Fund,576632.0,0.0
4,C90013145,US Chamber of Commerce,0.0,500100.0
5,C00341396,Moveon.org,251010.0,0.0
6,C00495028,House Majority PAC,229909.0,0.0
7,C00003418,Republican National Cmte,0.0,112356.0
8,C00000935,Democratic Congressional Campaign Cmte,99064.0,0.0
9,C00646653,Engage Georgia,68431.0,0.0


In [193]:
cmte_pacpacs = {}
for idx, cmte in pac_support_direct.iterrows():
    print(f'Getting PAC support for {cmte.cmte_name}...')
    cmte_pacpacs[cmte.cmteid] = get_pacpac_breakdown(cmte.cmteid)

Getting PAC support for Congressional Leadership Fund...
Getting PAC support for National Republican Congressional Cmte...
Getting PAC support for America First Policies...
Getting PAC support for Planned Parenthood Action Fund...
Getting PAC support for US Chamber of Commerce...
Getting PAC support for Moveon.org...
Getting PAC support for House Majority PAC...
Getting PAC support for Republican National Cmte...
Getting PAC support for Democratic Congressional Campaign Cmte...
Getting PAC support for Engage Georgia...


In [194]:
cmte_indivs = {}
for idx, cmte in pac_support_direct.iterrows():
    print(f'Getting individual support for {cmte.cmte_name}...')
    cmte_indivs[cmte.cmteid] = get_pac_indiv_support(cmte.cmteid)

Getting individual support for Congressional Leadership Fund...
Getting individual support for National Republican Congressional Cmte...
Getting individual support for America First Policies...
Getting individual support for Planned Parenthood Action Fund...
Getting individual support for US Chamber of Commerce...
Getting individual support for Moveon.org...
Getting individual support for House Majority PAC...
Getting individual support for Republican National Cmte...
Getting individual support for Democratic Congressional Campaign Cmte...
Getting individual support for Engage Georgia...


In [218]:
ossoff = {
    'cand_name': candidate,
    'cand_strrep': 'Jon Ossoff (D): GA06',
    'indiv_support_direct': indiv_support_direct,
    'pac_support_direct': pac_support_direct,
    'cmte_pacpacs': cmte_pacpacs,
    'cmte_indivs': cmte_indivs
}

In [196]:
import pickle

In [198]:
def dump_cand_data(obj):
    with open(obj['cand_name'] + '.pickle', 'wb') as pickle_file:
        pickle.dump(obj, pickle_file)

In [219]:
dump_cand_data(ossoff)

#### Sanity check

In [220]:
with open('Jon Ossoff.pickle', 'rb') as pickle_file:
    ossoff2 = pickle.load(pickle_file)

In [221]:
ossoff2['cand_name']

'Jon Ossoff'

In [222]:
ossoff2['cand_strrep']

'Jon Ossoff (D): GA06'

In [202]:
ossoff2['indiv_support_direct']

Unnamed: 0,indivs_contribid,donor_name,total_amt
0,q0000482557,"OSSOFF, T JONATHAN",50000
1,h3001645502,"ROSS, JOHANNA",27700
2,m0001258156,"BARKSDALE, KATHLEEN",16200
3,n0001598560,"SALTZ, SUSAN",15400
4,k0001403916,"LAMB, ALYSE",14575
5,U0000004158,"TAYLOR, DALE",14400
6,p00029358691,"CRYER, JON",13800
7,m00017704691,"MILNER, RENANNE",13738
8,i3003258296@,"FEINBERG, IRIS",13500
9,n0001440275,"BRICE, EDWARD",13400


In [203]:
ossoff2['pac_support_direct']

Unnamed: 0,cmteid,cmte_name,pac_good,pac_bad
0,C00504530,Congressional Leadership Fund,0.0,6196944.0
1,C00075820,National Republican Congressional Cmte,0.0,6015567.0
2,C90017302,America First Policies,0.0,1524990.0
3,C90005471,Planned Parenthood Action Fund,576632.0,0.0
4,C90013145,US Chamber of Commerce,0.0,500100.0
5,C00341396,Moveon.org,251010.0,0.0
6,C00495028,House Majority PAC,229909.0,0.0
7,C00003418,Republican National Cmte,0.0,112356.0
8,C00000935,Democratic Congressional Campaign Cmte,99064.0,0.0
9,C00646653,Engage Georgia,68431.0,0.0


In [204]:
ossoff2['cmte_pacpacs']

{'C00504530':       cmteid                                   name  amount_received_from  \
 0  C00255752  American Society of Anesthesiologists               25000.0   
 1  C00415752                          Border Health               50000.0   
 2  C00653154              American Economic Renewal                   0.0   
 3  C00435321                          Nustar Energy               25000.0   
 4  C00576199                   Startups for America                   0.0   
 5  C00024968               American Optometric Assn                   0.0   
 6  C00276311     Blue Cross/Blue Shield of Nebraska                   0.0   
 7  C00000018                                                          0.0   
 8  C00000059                         Hallmark Cards                   0.0   
 9  C00000422                  American Medical Assn                   0.0   
 
    costs_for  costs_against  national_contrib_to  national_contrib_from  
 0    75000.0            0.0                  0.0   

In [205]:
ossoff2['cmte_indivs']

{'C00504530':       contribid                                 contributor  total_amt
 0                                   AMERICAN ACTION NETWORK   15290970
 1  U00000045761                             ANSARY, HUSHANG    1500000
 2                                       VALERO SERVICES INC    1500000
 3  U00000000731                              FOSTER, PAUL L    1000000
 4  U00000038291                             COHEN, STEVEN A    1000000
 5                CHEVRON POLICY GOVERNMENT & PUBLIC AFFAIRS    1000000
 6  U00000036551                       GRIFFIN, KENNETH C MR    1000000
 7                          HILLWOOD DEVELOPMENT COMPANY LLC    1000000
 8  U00000033071                             MARCUS, BERNARD     754600
 9  U00000036581                          JOHNSON, CHARLES B     600000,
 'C00075820':       contribid            contributor  total_amt
 0  U00000036821    MERCER, ROBERT L MR     474600
 1  h10012238391    MONTAG, THOMAS K MR     466400
 2  U00000032651  SINQUEFIEL

# Export list of all 2018 candidates ordered by total PAC money spent

In [213]:
all_2018_cands = sc.sql(f"""
SELECT cands.cid, cands.firstlastp, cands.distidrunfor
FROM pq_crp_pacs18 pacs
LEFT JOIN pq_crp_cands18 cands
ON pacs.cid = cands.cid
GROUP BY cands.cid, cands.firstlastp, cands.distidrunfor
ORDER BY sum(pacs.amount) DESC
""").toPandas()

In [216]:
all_2018_cands['strrep'] = all_2018_cands['firstlastp'] + ': ' + all_2018_cands['distidrunfor']

In [217]:
all_2018_cands.to_pickle('all_2018_cands.pickle')