# Analysis of GOP Donor Movements After Rubio and Bush Drop-outs

In [1]:
import pandas as pd
import datetime as dt

## Load and Find Candidate Committees

In [2]:
cn_headers = open("../data/cn_header_file.csv").read().strip().split(',')

In [6]:
# Added by Steve

open('../data/cn_header_file.csv').read()

'CAND_ID,CAND_NAME,CAND_PTY_AFFILIATION,CAND_ELECTION_YR,CAND_OFFICE_ST,CAND_OFFICE,CAND_OFFICE_DISTRICT,CAND_ICI,CAND_STATUS,CAND_PCC,CAND_ST1,CAND_ST2,CAND_CITY,CAND_ST,CAND_ZIP\n'

In [7]:
campaigns_to_commitees = pd.read_csv("../data/cn.txt", sep="|", names=cn_headers)

In [8]:
# Added by Steve
pd.read_csv('../data/cn.txt', sep='|')

Unnamed: 0,H0AK00097,"COX, JOHN R.",REP,2014,AK,H,00,C,N,C00525261,P.O. BOX 1092,Unnamed: 11,ANCHOR POINT,AK.1,99556
0,H0AL02087,"ROBY, MARTHA",REP,2016,AL,H,2.0,I,C,C00462143,PO BOX 195,,MONTGOMERY,AL,36101.0
1,H0AL02095,"JOHN, ROBERT E JR",IND,2016,AL,H,2.0,C,N,,1465 W OVERBROOK RD,,MILLBROOK,AL,36054.0
2,H0AL05049,"CRAMER, ROBERT E ""BUD"" JR",DEM,2008,AL,H,5.0,C,P,C00239038,PO BOX 2621,,HUNTSVILLE,AL,35804.0
3,H0AL05163,"BROOKS, MO",REP,2016,AL,H,5.0,I,C,C00464149,7610 FOXFIRE DRIVE,,HUNTSVILLE,AL,35802.0
4,H0AL06088,"COOKE, STANLEY KYLE",REP,2010,AL,H,6.0,C,N,C00464222,723 CHERRY BROOK ROAD,,KIMBERLY,AL,35091.0
5,H0AL07086,"SEWELL, TERRYCINA ANDREA",DEM,2016,AL,H,7.0,I,C,C00458976,PO BOX 1964,,BIRMINGHAM,AL,35201.0
6,H0AL07094,"HILLIARD, EARL FREDERICK JR",DEM,2010,AL,H,7.0,O,P,C00460410,PO BOX 12804,,BIRMINGHAM,AL,35202.0
7,H0AL07177,"CHAMBERLAIN, DON",REP,2012,AL,H,7.0,C,P,C00482059,512 LAPSLEY ST,,SELMA,AL,36701.0
8,H0AR01083,"CRAWFORD, ERIC ALAN RICK",REP,2016,AR,H,1.0,I,C,C00462374,34 CR 455,,JONESBORO,AR,72404.0
9,H0AR01091,"GREGORY, JAMES CHRISTOPHER",DEM,2010,AR,H,1.0,O,N,C00472126,510 S LILLY ST,,BLYTHEVILLE,AR,72315.0


In [16]:
CAND_NAMES = [
    "KASICH, JOHN R",
    "TRUMP, DONALD J",
    "RUBIO, MARCO",
    "BUSH, JEB",
    'CRUZ, RAFAEL EDWARD "TED"'
]

In [17]:
COMMITTEE_IDS = campaigns_to_commitees[
    (campaigns_to_commitees["CAND_OFFICE"] == "P") &
    (campaigns_to_commitees["CAND_PTY_AFFILIATION"] == "REP") &
    (campaigns_to_commitees["CAND_ELECTION_YR"] == 2016) &
    (campaigns_to_commitees["CAND_NAME"].isin(CAND_NAMES))
].set_index("CAND_NAME")["CAND_PCC"].to_dict()
COMMITTEE_IDS

{'KASICH, JOHN R': 'C00581876',
 'CRUZ, RAFAEL EDWARD "TED"': 'C00574624',
 'RUBIO, MARCO': 'C00458844',
 'BUSH, JEB': 'C00579458',
 'TRUMP, DONALD J': 'C00580100'}

## Load and Clean Individual Donations

BuzzFeed News downloaded the "Contributions by Individuals" master file from the [FEC's website](http://www.fec.gov/finance/disclosure/ftpdet.shtml) on April 25, 2016. It contains information about every single itemized individual donation for every single 2016 campaign for contributors who have given more than $200 to a committee during this election cycle. You can download a copy of that data [here](https://archive.org/details/fec-contributions-master-file-2016-04-25).

First we select only the donations to the five Republican campaign committees we're interested in analyzing.

Then, we simplify each donor's name (removing suffixes and middle names, which committees have different approaches to reporting) and ZIP code (to the first five digits).

FEC campaign filings do not assign any unique identifiers to donors. So finally, to be able to distinguish and track individual donors, the code below assigns each contribution a `donor_uid` based on the donor's first name, last name, and ZIP code. This approach could result in an undercount of donors if, for instance, there are two people named John Smith in a same ZIP code — but should result in good approximations for the analyses below.

### Load All Itemized Individual Donations

In [18]:
ind_headers = open("../data/indiv_header_file.csv").read().strip().split(',')

In [19]:
# Added by Steve
ind_headers

['CMTE_ID',
 'AMNDT_IND',
 'RPT_TP',
 'TRANSACTION_PGI',
 'IMAGE_NUM',
 'TRANSACTION_TP',
 'ENTITY_TP',
 'NAME',
 'CITY',
 'STATE',
 'ZIP_CODE',
 'EMPLOYER',
 'OCCUPATION',
 'TRANSACTION_DT',
 'TRANSACTION_AMT',
 'OTHER_ID',
 'TRAN_ID',
 'FILE_NUM',
 'MEMO_CD',
 'MEMO_TEXT',
 'SUB_ID']

In [20]:
dtypes = {
    "NAME": str,
    "ZIP_CODE": str,
    "TRANSACTION_DT": str,
    "FILE_NUM": str,
    "MEMO_CD": str,
    "MEMO_TXT": str
}

In [21]:
donors = pd.read_csv(
    "../data/itcont.txt",
    sep="|", 
    names=ind_headers,
    dtype=dtypes
)

In [13]:
donors.head()

Unnamed: 0,CMTE_ID,AMNDT_IND,RPT_TP,TRANSACTION_PGI,IMAGE_NUM,TRANSACTION_TP,ENTITY_TP,NAME,CITY,STATE,...,EMPLOYER,OCCUPATION,TRANSACTION_DT,TRANSACTION_AMT,OTHER_ID,TRAN_ID,FILE_NUM,MEMO_CD,MEMO_TEXT,SUB_ID
0,C00004606,N,M4,P,15951124869,15,IND,"ARNOLD, ROBERT",MCPHERSON,KS,...,SELF,OPTOMETRIST,3102015,1000,,SA11AI.20747,1002259,,,4041320151241796098
1,C00004606,N,M4,P,15951124869,15,IND,"BICKLE, DON",HAYS,KS,...,RETIRED,RETIRED,3302015,1000,,SA11AI.20772,1002259,,,4041320151241796099
2,C00004606,N,M4,P,15951124869,15,IND,"ROSSMAN, RICHARD",OLATHE,KS,...,CRAWFORD SALES COMPANY,BUSINESSMAN,3302015,250,,SA11AI.20759,1002259,,,4041320151241796100
3,C00452383,N,M4,P,15951124897,15,IND,"LLEWELLYN, CHARLES",FREDERICK,MD,...,,,3112015,500,,SA11AI.25088,1002261,,,4041320151241796102
4,C00452383,N,M4,P,15951124897,15,IND,"TYNES, TIMOTHY MR.",VERO BEACH,FL,...,,,3022015,250,,SA11AI.25074,1002261,,,4041320151241796103


In [22]:
gop_primary_donors = donors[
    donors["CMTE_ID"].isin(COMMITTEE_IDS.values()) &
    (donors["TRANSACTION_PGI"] == "P")
].copy()

### Clean Up Donation Data

In [24]:
def parse_date(date_string):
    if pd.isnull(date_string):
        return None
    else:
        return dt.datetime.strptime(date_string.strip(), "%m%d%Y")

In [25]:
gop_primary_donors["date"] = gop_primary_donors["TRANSACTION_DT"].apply(parse_date)

In [26]:
def extract_last_first(name):
    return " ".join(name.split(" ")[:2])

In [27]:
gop_primary_donors["last_first"] = gop_primary_donors["NAME"].apply(extract_last_first)
gop_primary_donors["zip_first_five"] = gop_primary_donors["ZIP_CODE"].fillna("").apply(lambda x: x[:5])

In [28]:
# Added by Steve
gop_primary_donors[['ZIP_CODE', 'zip_first_five']].head()

Unnamed: 0,ZIP_CODE,zip_first_five
18207,760282217,76028
18208,597112904,59711
18209,294927523,29492
18210,772190630,77219
18211,331566023,33156


In [29]:
def make_uid(row):
    if pd.isnull(row["last_first"]) or (row["zip_first_five"] == ""):
        return None
    else:
        return "|".join([row["last_first"], row["zip_first_five"]])

In [30]:
gop_primary_donors["donor_uid"] = gop_primary_donors.apply(lambda x: make_uid(x), axis=1)

## Analyze the Data

The code below uses `donor_uid` to find the donors who made their first donation to a campaign committee after Jeb Bush and Marco Rubio dropped out of the 2016 Republican presidential primary—Feb. 20, 2016 and March 3, 2016 respectively. It then counts how many of those donations were made by donors who had previously given to the Bush or Rubio campaigns.

In [43]:
# This method aggregates total donations, by contributor and committee, 
# before and after a given date
def calculate_movements(since_date):
    grp = gop_primary_donors.groupby([
        # (Steve) group by unique donor
        "donor_uid",
        # (Steve) group by unique donor
        gop_primary_donors["date"] > since_date,
        "CMTE_ID",
    ])
    total_contribs = grp["TRANSACTION_AMT"].sum()
    return total_contribs.unstack().unstack().fillna(0)

In [44]:
calculate_movements("2016-02-20").head()

CMTE_ID,C00458844,C00458844,C00574624,C00574624,C00579458,C00579458,C00580100,C00580100,C00581876,C00581876
date,False,True,False,True,False,True,False,True,False,True
donor_uid,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
"AANONSEN, PAUL|20009",0.0,0.0,0.0,0.0,0.0,0.0,323.0,0.0,0.0,0.0
"AARNIO, TERRANCE|97267",0.0,0.0,0.0,0.0,2700.0,0.0,0.0,0.0,0.0,0.0
"AARON, CHARLES|91361",0.0,0.0,250.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"AARON, DAVID|30125",0.0,0.0,0.0,0.0,0.0,0.0,253.0,303.0,0.0,0.0
"AARON, FRED|34240",200.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [42]:
# Added by Steve
gop_primary_donors.groupby([
    'donor_uid',
    gop_primary_donors['date'] > '2016-02-20',
    "CMTE_ID"
])['TRANSACTION_AMT'].sum()

donor_uid                date   CMTE_ID  
AANONSEN, PAUL|20009     False  C00580100     323
AARNIO, TERRANCE|97267   False  C00579458    2700
AARON, CHARLES|91361     False  C00574624     250
AARON, DAVID|30125       False  C00580100     253
                         True   C00580100     303
AARON, FRED|34240        False  C00458844     200
AARON, JOHN|23238        False  C00574624     200
                         True   C00574624     250
AARON, JONATHAN|48304    False  C00458844    2700
AARON, MARCIA|90266      True   C00581876    1000
AARON, MARTIN|77005      False  C00574624     500
AARONSON, JEFFREY|75201  False  C00581876     500
AARSBY, J.|57701         False  C00574624    1400
AARSHEIM, KNUTE|02738    False  C00579458     250
ABADI, MICHAEL|33180     False  C00579458    2500
ABAJIAN, KIM|93103       False  C00579458    2700
ABAJIAN, TED|93103       False  C00458844     620
                                C00579458    2700
ABARE, WILLIAM|32080     False  C00579458    1000
ABATE, J

In [45]:
def select_movements(from_candidate, to_candidate, since_date):
    movements = calculate_movements(since_date)
    return movements[
        # (steve) contributed to from-candidate > 0 before dropout
        (movements[(COMMITTEE_IDS[from_candidate], False)] > 0) &
        # (steve) contributions to to-candidate = 0 before dropout
        (movements[(COMMITTEE_IDS[to_candidate], False)] == 0) &
        # (steve) contributions to to-candidate > 0 after dropout
        (movements[(COMMITTEE_IDS[to_candidate], True)] > 0)
    ][(COMMITTEE_IDS[to_candidate], True)]

*Added by Steve* 

The `select_movements` function really identifies donors who donated to a new candidate for the first time after a candidate they previous donated to dropped out of the race. This seems like a very good way of detecting how donors "changed horses" in the middle of the race. For example, looking at who threw support to Kashich after Rubio:

**What counts**

| donor | before drop? | candidate |
| ----- | ------------ | --------- |
| a     | True         | Rubio     |
| a     | False        | Kasich    |

and this

| donor | before drop? | candidate |
| ----- | ------------ | --------- |
| b     | True         | Rubio     |
| b     | False        | Kasich    |
| b     | False        | Cruz      |


**What doesn't count** 

Donors can't have donated to the new candidate before the drop date

| donor | before drop? | candidate |
| ----- | ------------ | --------- |
| c     | True         | Rubio     |
| c     | True         | Kasich    |
| c     | False        | Kasich    |

In [51]:
# added by Steve, Rubio to Trump

kas_mv = calculate_movements('2016-02-20')
kas_mv[
    (kas_mv[(COMMITTEE_IDS['RUBIO, MARCO'], False)] > 0) 
]

CMTE_ID,C00458844,C00458844,C00574624,C00574624,C00579458,C00579458,C00580100,C00580100,C00581876,C00581876
date,False,True,False,True,False,True,False,True,False,True
donor_uid,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
"AARON, FRED|34240",200.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"AARON, JONATHAN|48304",2700.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"ABAJIAN, TED|93103",620.0,0.0,0.0,0.0,2700.0,0.0,0.0,0.0,0.0,0.0
"ABBOTT, CHARLES|32210",800.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"ABBOTT, JOSH|78735",2700.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"ABBOTT, MARYANN|29609",300.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"ABBOTT, MATTHEW|02043",2700.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"ABDALLAH, RANDALL|48226",1000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"ABDELKADER, ABDELGAWA|33765",1000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"ABDELKADER, ATIF|33765",2700.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [46]:
REMAINING_CANDIDATES = [
    'CRUZ, RAFAEL EDWARD "TED"',
    "KASICH, JOHN R",
    "TRUMP, DONALD J" 
]

DROPOUTS = [ 
    ("BUSH, JEB", "2016-02-20"), 
    ("RUBIO, MARCO", "2016-03-15") 
]

In [47]:
for cand_r in REMAINING_CANDIDATES:
    for cand_drop, date_drop in DROPOUTS:
        m = select_movements(cand_drop, cand_r, date_drop)
        print("{0} from {1}\n{2} donors\n${3:,.0f}\n".format(cand_r, cand_drop, len(m), m.sum()))

CRUZ, RAFAEL EDWARD "TED" from BUSH, JEB
63 donors
$99,710

CRUZ, RAFAEL EDWARD "TED" from RUBIO, MARCO
120 donors
$126,650

KASICH, JOHN R from BUSH, JEB
131 donors
$173,850

KASICH, JOHN R from RUBIO, MARCO
97 donors
$92,168

TRUMP, DONALD J from BUSH, JEB
9 donors
$13,817

TRUMP, DONALD J from RUBIO, MARCO
6 donors
$4,504



---

---

---