In [41]:
import pandas as pd
dems = pd.read_csv('../data/dems_with_endorsement_counts.csv')

In [42]:
cols_to_drop = list(dems.iloc[:, 15:27])
cols_to_drop

['Emily.Endorsed.',
 'Guns.Sense.Candidate.',
 'Biden.Endorsed.',
 'Warren.Endorsed.',
 'Sanders.Endorsed.',
 'Our.Revolution.Endorsed.',
 'Justice.Dems.Endorsed.',
 'PCCC.Endorsed.',
 'Indivisible.Endorsed.',
 'WFP.Endorsed.',
 'VoteVets.Endorsed.',
 'No.Labels.Support.']

In [43]:
cols_to_drop.extend(['District', 'Primary.Runoff.Status', 'Election.Month'])

In [44]:
print(cols_to_drop)

['Emily.Endorsed.', 'Guns.Sense.Candidate.', 'Biden.Endorsed.', 'Warren.Endorsed.', 'Sanders.Endorsed.', 'Our.Revolution.Endorsed.', 'Justice.Dems.Endorsed.', 'PCCC.Endorsed.', 'Indivisible.Endorsed.', 'WFP.Endorsed.', 'VoteVets.Endorsed.', 'No.Labels.Support.', 'District', 'Primary.Runoff.Status', 'Election.Month']


In [45]:
# Remove candidate, district
dems = dems.drop(cols_to_drop, axis=1)

In [46]:
list(dems)

['Candidate',
 'State',
 'Office.Type',
 'Primary.Status',
 'Primary..',
 'Race',
 'Veteran.',
 'LGBTQ.',
 'Elected.Official.',
 'Self.Funder.',
 'STEM.',
 'Obama.Alum.',
 'Party.Support.',
 'Neutral.Endorsements',
 'Yes.Endorsements',
 'No.Endorsements']

In [47]:
cols_to_encode = ['Office.Type', 'Primary.Status', 'Race', 'Veteran.', 'LGBTQ.', 'Elected.Official.', 'Self.Funder.', 'STEM.', 'Obama.Alum.', 'Party.Support.']

In [48]:
# Create dummy variables for categorical variables (One Hot Encoding)
dems = pd.get_dummies(dems, columns = cols_to_encode)

In [49]:
list(dems)

['Candidate',
 'State',
 'Primary..',
 'Neutral.Endorsements',
 'Yes.Endorsements',
 'No.Endorsements',
 'Office.Type_Governor',
 'Office.Type_Representative',
 'Office.Type_Senator',
 'Primary.Status_Advanced',
 'Primary.Status_Lost',
 'Race_Nonwhite',
 'Race_Unknown',
 'Race_White',
 'Veteran._No',
 'Veteran._Unknown',
 'Veteran._Yes',
 'LGBTQ._No',
 'LGBTQ._Unknown',
 'LGBTQ._Yes',
 'Elected.Official._No',
 'Elected.Official._Yes',
 'Self.Funder._No',
 'Self.Funder._Yes',
 'STEM._No',
 'STEM._Unknown',
 'STEM._Yes',
 'Obama.Alum._No',
 'Obama.Alum._Yes',
 'Party.Support._Neutral',
 'Party.Support._No',
 'Party.Support._Yes']

In [50]:
# Remove columns for binary variables
drop = ['Primary.Status_Lost', 'Elected.Official._No', 'Self.Funder._No', 'Obama.Alum._No']
dems = dems.drop(drop, axis = 1)

In [52]:
list(dems)

['Candidate',
 'State',
 'Primary..',
 'Neutral.Endorsements',
 'Yes.Endorsements',
 'No.Endorsements',
 'Office.Type_Governor',
 'Office.Type_Representative',
 'Office.Type_Senator',
 'Primary.Status_Advanced',
 'Race_Nonwhite',
 'Race_Unknown',
 'Race_White',
 'Veteran._No',
 'Veteran._Unknown',
 'Veteran._Yes',
 'LGBTQ._No',
 'LGBTQ._Unknown',
 'LGBTQ._Yes',
 'Elected.Official._Yes',
 'Self.Funder._Yes',
 'STEM._No',
 'STEM._Unknown',
 'STEM._Yes',
 'Obama.Alum._Yes',
 'Party.Support._Neutral',
 'Party.Support._No',
 'Party.Support._Yes']

In [53]:
col_names = {}
for name in list(dems):
    col_names[name] = name.replace('.', '_')

In [55]:
col_names

{'Candidate': 'Candidate',
 'State': 'State',
 'Primary..': 'Primary__',
 'Neutral.Endorsements': 'Neutral_Endorsements',
 'Yes.Endorsements': 'Yes_Endorsements',
 'No.Endorsements': 'No_Endorsements',
 'Office.Type_Governor': 'Office_Type_Governor',
 'Office.Type_Representative': 'Office_Type_Representative',
 'Office.Type_Senator': 'Office_Type_Senator',
 'Primary.Status_Advanced': 'Primary_Status_Advanced',
 'Race_Nonwhite': 'Race_Nonwhite',
 'Race_Unknown': 'Race_Unknown',
 'Race_White': 'Race_White',
 'Veteran._No': 'Veteran__No',
 'Veteran._Unknown': 'Veteran__Unknown',
 'Veteran._Yes': 'Veteran__Yes',
 'LGBTQ._No': 'LGBTQ__No',
 'LGBTQ._Unknown': 'LGBTQ__Unknown',
 'LGBTQ._Yes': 'LGBTQ__Yes',
 'Elected.Official._Yes': 'Elected_Official__Yes',
 'Self.Funder._Yes': 'Self_Funder__Yes',
 'STEM._No': 'STEM__No',
 'STEM._Unknown': 'STEM__Unknown',
 'STEM._Yes': 'STEM__Yes',
 'Obama.Alum._Yes': 'Obama_Alum__Yes',
 'Party.Support._Neutral': 'Party_Support__Neutral',
 'Party.Support._No'

In [56]:
col_names['Primary..'] = 'Primary_Vote_Percentage'

In [57]:
dems = dems.rename(index=str, columns=col_names)

In [58]:
list(dems)

['Candidate',
 'State',
 'Primary_Vote_Percentage',
 'Neutral_Endorsements',
 'Yes_Endorsements',
 'No_Endorsements',
 'Office_Type_Governor',
 'Office_Type_Representative',
 'Office_Type_Senator',
 'Primary_Status_Advanced',
 'Race_Nonwhite',
 'Race_Unknown',
 'Race_White',
 'Veteran__No',
 'Veteran__Unknown',
 'Veteran__Yes',
 'LGBTQ__No',
 'LGBTQ__Unknown',
 'LGBTQ__Yes',
 'Elected_Official__Yes',
 'Self_Funder__Yes',
 'STEM__No',
 'STEM__Unknown',
 'STEM__Yes',
 'Obama_Alum__Yes',
 'Party_Support__Neutral',
 'Party_Support__No',
 'Party_Support__Yes']

In [80]:
dems.to_csv('./data/dem_with_dummies.csv')

In [40]:
list(dems)

['Candidate',
 'State',
 'Primary_Vote_Percentage',
 'Neutral_Endorsements',
 'Yes_Endorsements',
 'No_Endorsements',
 'Office_Type_Governor',
 'Office_Type_Representative',
 'Office_Type_Senator',
 'Primary_Status_Advanced',
 'Primary_Status_Lost',
 'Race_Nonwhite',
 'Race_Unknown',
 'Race_White',
 'Veteran__No',
 'Veteran__Unknown',
 'Veteran__Yes',
 'LGBTQ__No',
 'LGBTQ__Unknown',
 'LGBTQ__Yes',
 'Elected_Official__No',
 'Elected_Official__Yes',
 'Self_Funder__No',
 'Self_Funder__Yes',
 'STEM__No',
 'STEM__Unknown',
 'STEM__Yes',
 'Obama_Alum__No',
 'Obama_Alum__Yes',
 'Party_Support__Neutral',
 'Party_Support__No',
 'Party_Support__Yes']

In [59]:
dems.to_csv('../data/dem_with_dummies_renamed.csv')

In [2]:
candidate_dict = {}
for i in range(0, len(dems)):
    candidate_dict[i] = dems['Candidate'][i]

In [3]:
dems.rename(index=candidate_dict, inplace=True)