In [54]:
import pandas as pd
dems = pd.read_csv('./data/dems_with_endorsement_counts.csv')

In [56]:
list(dems)

['Candidate',
 'State',
 'District',
 'Office.Type',
 'Primary.Status',
 'Primary.Runoff.Status',
 'Primary..',
 'Race',
 'Veteran.',
 'LGBTQ.',
 'Elected.Official.',
 'Self.Funder.',
 'STEM.',
 'Obama.Alum.',
 'Party.Support.',
 'Emily.Endorsed.',
 'Guns.Sense.Candidate.',
 'Biden.Endorsed.',
 'Warren.Endorsed.',
 'Sanders.Endorsed.',
 'Our.Revolution.Endorsed.',
 'Justice.Dems.Endorsed.',
 'PCCC.Endorsed.',
 'Indivisible.Endorsed.',
 'WFP.Endorsed.',
 'VoteVets.Endorsed.',
 'No.Labels.Support.',
 'Election.Month',
 'Neutral.Endorsements',
 'Yes.Endorsements',
 'No.Endorsements']

In [57]:
cols_to_drop = list(dems.iloc[:, 15:27])
cols_to_drop

['Emily.Endorsed.',
 'Guns.Sense.Candidate.',
 'Biden.Endorsed.',
 'Warren.Endorsed.',
 'Sanders.Endorsed.',
 'Our.Revolution.Endorsed.',
 'Justice.Dems.Endorsed.',
 'PCCC.Endorsed.',
 'Indivisible.Endorsed.',
 'WFP.Endorsed.',
 'VoteVets.Endorsed.',
 'No.Labels.Support.']

In [58]:
cols_to_drop.extend(['District', 'Primary.Runoff.Status', 'Election.Month'])

In [59]:
print(cols_to_drop)

['Emily.Endorsed.', 'Guns.Sense.Candidate.', 'Biden.Endorsed.', 'Warren.Endorsed.', 'Sanders.Endorsed.', 'Our.Revolution.Endorsed.', 'Justice.Dems.Endorsed.', 'PCCC.Endorsed.', 'Indivisible.Endorsed.', 'WFP.Endorsed.', 'VoteVets.Endorsed.', 'No.Labels.Support.', 'District', 'Primary.Runoff.Status', 'Election.Month']


In [60]:
# Remove candidate, district
dems = dems.drop(cols_to_drop, axis=1)

In [61]:
list(dems)

['Candidate',
 'State',
 'Office.Type',
 'Primary.Status',
 'Primary..',
 'Race',
 'Veteran.',
 'LGBTQ.',
 'Elected.Official.',
 'Self.Funder.',
 'STEM.',
 'Obama.Alum.',
 'Party.Support.',
 'Neutral.Endorsements',
 'Yes.Endorsements',
 'No.Endorsements']

In [62]:
cols_to_encode = ['Office.Type', 'Primary.Status', 'Race', 'Veteran.', 'LGBTQ.', 'Elected.Official.', 'Self.Funder.', 'STEM.', 'Obama.Alum.', 'Party.Support.']

In [63]:
# Create dummy variables for categorical variables (One Hot Encoding)
dems = pd.get_dummies(dems, columns = cols_to_encode)

In [64]:
list(dems)

['Candidate',
 'State',
 'Primary..',
 'Neutral.Endorsements',
 'Yes.Endorsements',
 'No.Endorsements',
 'Office.Type_Governor',
 'Office.Type_Representative',
 'Office.Type_Senator',
 'Primary.Status_Advanced',
 'Primary.Status_Lost',
 'Race_Nonwhite',
 'Race_Unknown',
 'Race_White',
 'Veteran._No',
 'Veteran._Unknown',
 'Veteran._Yes',
 'LGBTQ._No',
 'LGBTQ._Unknown',
 'LGBTQ._Yes',
 'Elected.Official._No',
 'Elected.Official._Yes',
 'Self.Funder._No',
 'Self.Funder._Yes',
 'STEM._No',
 'STEM._Unknown',
 'STEM._Yes',
 'Obama.Alum._No',
 'Obama.Alum._Yes',
 'Party.Support._Neutral',
 'Party.Support._No',
 'Party.Support._Yes']

In [50]:
# Remove columns for binary variables
drop = ['Primary.Status_Lost', 'Elected.Official._Yes', 'Self.Funder._No', 'Obama.Alum._No']
dems = dems.drop(drop, axis = 1)

In [11]:
dems.to_csv('./data/dem_with_dummies.csv')

In [51]:
dems

Unnamed: 0,State,Primary..,Neutral.Endorsements,Yes.Endorsements,No.Endorsements,Office.Type_Governor,Office.Type_Representative,Office.Type_Senator,Primary.Status_Advanced,Race_Nonwhite,...,LGBTQ._Yes,Elected.Official._No,Self.Funder._Yes,STEM._No,STEM._Unknown,STEM._Yes,Obama.Alum._Yes,Party.Support._Neutral,Party.Support._No,Party.Support._Yes
0,AL,3.420000,11,0,1,1,0,0,0,1,...,0,1,0,1,0,0,0,1,0,0
1,AL,1.740000,11,0,1,1,0,0,0,0,...,1,1,0,1,0,0,0,1,0,0
2,AL,3.270000,11,0,1,1,0,0,0,0,...,0,1,0,1,0,0,0,1,0,0
3,AL,8.000000,11,0,1,1,0,0,0,1,...,0,0,0,1,0,0,0,1,0,0
4,AL,28.980000,11,0,1,1,0,0,0,0,...,0,0,0,1,0,0,0,1,0,0
5,AL,54.599998,11,1,0,1,0,0,1,0,...,0,0,0,1,0,0,0,1,0,0
6,AL,19.230000,12,0,0,0,1,0,0,1,...,0,1,0,0,0,1,0,1,0,0
7,AL,80.769997,12,0,0,0,1,0,1,1,...,0,1,0,1,0,0,0,1,0,0
8,AL,39.560001,12,0,0,0,1,0,0,1,...,0,1,0,1,0,0,0,1,0,0
9,AL,60.439999,12,0,0,0,1,0,1,0,...,0,1,0,1,0,0,0,1,0,0


In [2]:
candidate_dict = {}
for i in range(0, len(dems)):
    candidate_dict[i] = dems['Candidate'][i]

In [3]:
dems.rename(index=candidate_dict, inplace=True)