In [1]:
import numpy as np
import pandas as pd

import pickle

from sklearn import naive_bayes
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold, GridSearchCV
from sklearn.metrics import roc_auc_score, confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.svm import LinearSVC, SVC
from sklearn.neighbors import KNeighborsClassifier

from mlxtend.classifier import StackingClassifier

In [2]:
sf1_numbers = pd.read_csv('sf1_numbers_07.csv')
sf1_percentages = pd.read_csv('sf1_percentages_07.csv')

acs1_numbers = pd.read_csv('acs1_numbers_07.csv')
acs1_percentages = pd.read_csv('acs1_percentages_07.csv')

social_numbers = pd.read_csv('social_numbers_07.csv')
social_percentages = pd.read_csv('social_percentages_07.csv')

economic_numbers = pd.read_csv('economic_numbers_07.csv')
economic_percentages = pd.read_csv('economic_percentages_07.csv')

housing_numbers = pd.read_csv('housing_numbers_07.csv')
housing_percentages = pd.read_csv('housing_percentages_07.csv')

In [3]:
predictions = pd.read_csv('predictions.csv')

In [4]:
full_dfs = [sf1_numbers, sf1_percentages, acs1_numbers, acs1_percentages,
           social_numbers, social_percentages, economic_numbers,
           economic_percentages, housing_numbers, housing_percentages]

dfs = [sf1_numbers, acs1_numbers,social_numbers,economic_numbers,housing_numbers]

dfs_numbers = ['sf1_numbers', 'acs1_numbers', 'social_numbers', 'economic_numbers',
              'housing_numbers']

names = ['sf1_numbers', 'sf1_percentages', 'acs1_numbers',
         'acs1_percentages','social_numbers','social_percentages',
         'economic_numbers','economic_percentages','housing_numbers',
         'housing_percentages']

In [5]:
for df in full_dfs:
    df.drop(['Unnamed: 0'], axis=1, inplace=True)
    
predictions.drop(['Unnamed: 0'], axis=1, inplace=True)

In [6]:
for index, df in enumerate(full_dfs):
    df.name = names[index]

In [7]:
y = predictions.loc[:,'Democrat']

scale_sf1 = StandardScaler()
scale_acs1 = StandardScaler()
scale_social = StandardScaler()
scale_economic = StandardScaler()
scale_housing = StandardScaler()

scalers = {'sf1_numbers': scale_sf1, 'acs1_numbers': scale_acs1, 'social_numbers': scale_social,
          'economic_numbers': scale_economic, 'housing_numbers': scale_housing}

kf = StratifiedKFold(n_splits=5,shuffle=True,random_state=42)

models = {}

In [8]:
nbg = naive_bayes.GaussianNB()
nbb = naive_bayes.BernoulliNB()
nbm = naive_bayes.MultinomialNB()

In [9]:
for df in dfs:
    X = df.loc[:]
    
    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state = 42)
    
    print(df.name,'\n')
    
    model1 = nbg.fit(X_train, y_train)
    
    print('Gaussian Naive Bayes')
    print("Cross Val Score:", np.mean(cross_val_score(model1,X_train,y_train,scoring='roc_auc',cv=kf)))
    print("AUC Score:", roc_auc_score(y_test, model1.predict(X_test)),'\n')
    
    #if roc_auc_score(y_test, model1.predict(X_test)) >= 0.75:
        #models[("{}".format(df.name), "Gaussian Naive Bayes")] = model1
    
    model2 = nbb.fit(X_train, y_train)
    
    print('Bernoulli Naive Bayes')
    print("Cross Val Score:", np.mean(cross_val_score(model2,X_train,y_train,scoring='roc_auc',cv=kf)))
    print("AUC Score:", roc_auc_score(y_test, model2.predict(X_test)),'\n')
    
    
    #if roc_auc_score(y_test, model1.predict(X_test)) >= 0.75:
        #models[("{}".format(df.name), "Bernoulli Naive Bayes")] = model2
    
    model3 = nbm.fit(X_train, y_train)
    
    print('Multinomial Naive Bayes')
    print("Cross Val Score:", np.mean(cross_val_score(model3,X_train,y_train,scoring='roc_auc',cv=kf)))
    print("AUC Score:", roc_auc_score(y_test, model3.predict(X_test)),'\n')
    
    #if roc_auc_score(y_test, model1.predict(X_test)) >= 0.75:
        #models[("{}".format(df.name), "Multinomial Naive Bayes")] = model3

sf1_numbers 

Gaussian Naive Bayes
Cross Val Score: 0.7539096989270548
AUC Score: 0.7192810457516341 

Bernoulli Naive Bayes
Cross Val Score: 0.5
AUC Score: 0.5 

Multinomial Naive Bayes
Cross Val Score: 0.6845638992608245
AUC Score: 0.6643790849673202 

acs1_numbers 

Gaussian Naive Bayes
Cross Val Score: 0.8538940221632589
AUC Score: 0.7807189542483661 

Bernoulli Naive Bayes
Cross Val Score: 0.5374782037274934
AUC Score: 0.5704248366013072 

Multinomial Naive Bayes
Cross Val Score: 0.7936862390094168
AUC Score: 0.7928104575163398 

social_numbers 

Gaussian Naive Bayes
Cross Val Score: 0.8752700219711613
AUC Score: 0.7612745098039216 

Bernoulli Naive Bayes
Cross Val Score: 0.5
AUC Score: 0.5 

Multinomial Naive Bayes
Cross Val Score: 0.7778745642786535
AUC Score: 0.7189542483660131 

economic_numbers 

Gaussian Naive Bayes
Cross Val Score: 0.8948623620799365
AUC Score: 0.8 

Bernoulli Naive Bayes
Cross Val Score: 0.5177653632045078
AUC Score: 0.5089869281045751 

Multinomial Naive 

In [10]:
rf = RandomForestClassifier(n_estimators=10)

params = {'max_depth': range(1,21)}

grid = GridSearchCV(rf, params, scoring='roc_auc', cv=kf)

In [11]:
for df in dfs:
    X = df.loc[:]
    
    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state = 42)
    
    model = grid.fit(X_train, y_train)
    
    print(df.name, "Cross Val Score:", np.mean(cross_val_score(model, X_train, y_train, scoring='roc_auc',cv=kf)))
    print(df.name, "AUC Score:", roc_auc_score(y_test, model.predict(X_test)))
    print(df.name, "Best Depth:", model.best_params_, '\n')
    
    #if roc_auc_score(y_test, model.predict(X_test)) >= 0.75:
    models[("{}".format(df.name), "Random Forest")] = model.best_estimator_

sf1_numbers Cross Val Score: 0.956301093755628
sf1_numbers AUC Score: 0.9222222222222222
sf1_numbers Best Depth: {'max_depth': 15} 

acs1_numbers Cross Val Score: 0.9257101143380838
acs1_numbers AUC Score: 0.8124183006535948
acs1_numbers Best Depth: {'max_depth': 18} 

social_numbers Cross Val Score: 0.8784075278241346
social_numbers AUC Score: 0.7795751633986928
social_numbers Best Depth: {'max_depth': 5} 

economic_numbers Cross Val Score: 0.9275551500162083
economic_numbers AUC Score: 0.853267973856209
economic_numbers Best Depth: {'max_depth': 18} 

housing_numbers Cross Val Score: 0.8424390510379272
housing_numbers AUC Score: 0.7655228758169935
housing_numbers Best Depth: {'max_depth': 6} 



In [12]:
dt = DecisionTreeClassifier()

params = {'max_depth': range(1,21)}

grid = GridSearchCV(dt, params, scoring='roc_auc', cv=kf)

In [13]:
for df in dfs:
    X = df.loc[:]
    
    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state = 42)
    
    model = grid.fit(X_train, y_train)
    
    print(df.name, "Cross Val Score:", np.mean(cross_val_score(model, X_train, y_train, scoring='roc_auc',cv=kf)))
    print(df.name, "AUC Score:", roc_auc_score(y_test, model.predict(X_test)))
    print(df.name, "Best Depth:", model.best_params_, '\n')
    
    #if roc_auc_score(y_test, model.predict(X_test)) >= 0.75:
    models[("{}".format(df.name), "Decision Tree")] = model.best_estimator_

sf1_numbers Cross Val Score: 0.9288350442024548
sf1_numbers AUC Score: 0.930392156862745
sf1_numbers Best Depth: {'max_depth': 19} 

acs1_numbers Cross Val Score: 0.8457910554561717
acs1_numbers AUC Score: 0.7841503267973857
acs1_numbers Best Depth: {'max_depth': 4} 

social_numbers Cross Val Score: 0.8648347800682746
social_numbers AUC Score: 0.788235294117647
social_numbers Best Depth: {'max_depth': 4} 

economic_numbers Cross Val Score: 0.8814548230501096
economic_numbers AUC Score: 0.8084967320261438
economic_numbers Best Depth: {'max_depth': 4} 

housing_numbers Cross Val Score: 0.8072232686204591
housing_numbers AUC Score: 0.7666666666666667
housing_numbers Best Depth: {'max_depth': 4} 



In [14]:
lvm = LinearSVC()

params = {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]}

grid = GridSearchCV(lvm, params, scoring='roc_auc', cv=kf)

In [15]:
for df in dfs:
    X = df.loc[:]
    
    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state = 42)
    
    if df.name in dfs_numbers:
        X_train = scalers[df.name].fit_transform(X_train)
        X_test = scalers[df.name].transform(X_test)
    
    model = grid.fit(X_train, y_train)
    
    print(df.name, "Cross Val Score:", np.mean(cross_val_score(model, X_train, y_train, scoring='roc_auc',cv=kf)))
    print(df.name, "AUC Score:", roc_auc_score(y_test, model.predict(X_test)))
    print(df.name, "Best C:", model.best_params_, '\n')
    
    #if roc_auc_score(y_test, model.predict(X_test)) >= 0.75:
    models[("{}".format(df.name), "Linear SVM")] = model.best_estimator_



sf1_numbers Cross Val Score: 0.7873153105777735
sf1_numbers AUC Score: 0.6936274509803921
sf1_numbers Best C: {'C': 1} 





acs1_numbers Cross Val Score: 0.9125262489344588
acs1_numbers AUC Score: 0.8124183006535948
acs1_numbers Best C: {'C': 10} 





social_numbers Cross Val Score: 0.8876411433007968
social_numbers AUC Score: 0.7614379084967321
social_numbers Best C: {'C': 100} 





economic_numbers Cross Val Score: 0.9257613211511447
economic_numbers AUC Score: 0.8354575163398693
economic_numbers Best C: {'C': 1} 





housing_numbers Cross Val Score: 0.8232961104240955
housing_numbers AUC Score: 0.7805555555555554
housing_numbers Best C: {'C': 10} 





In [16]:
svm = SVC(kernel = 'rbf', gamma = 'scale')

params = {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]}

grid = GridSearchCV(svm, params, scoring='roc_auc', cv=kf)

In [17]:
for df in dfs:
    X = df.loc[:]
    
    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state = 42)
    
    if df.name in dfs_numbers:
        X_train = scalers[df.name].fit_transform(X_train)
        X_test = scalers[df.name].transform(X_test)
    
    model = grid.fit(X_train, y_train)
    
    print(df.name, "Cross Val Score:", np.mean(cross_val_score(model, X_train, y_train, scoring='roc_auc',cv=kf)))
    print(df.name, "AUC Score:", roc_auc_score(y_test, model.predict(X_test)))
    print(df.name, "Best C:", model.best_params_, '\n')
    
    #if roc_auc_score(y_test, model.predict(X_test)) >= 0.75:
    models[("{}".format(df.name), "RBF SVM")] = model.best_estimator_

sf1_numbers Cross Val Score: 0.9192401059738348
sf1_numbers AUC Score: 0.8753267973856209
sf1_numbers Best C: {'C': 10000} 

acs1_numbers Cross Val Score: 0.9402129649862131
acs1_numbers AUC Score: 0.8787581699346406
acs1_numbers Best C: {'C': 10} 

social_numbers Cross Val Score: 0.8807972826195709
social_numbers AUC Score: 0.7704248366013072
social_numbers Best C: {'C': 1} 

economic_numbers Cross Val Score: 0.9381764152189312
economic_numbers AUC Score: 0.8906862745098039
economic_numbers Best C: {'C': 10} 

housing_numbers Cross Val Score: 0.8296170814773905
housing_numbers AUC Score: 0.7738562091503267
housing_numbers Best C: {'C': 1} 



In [18]:
lr = LogisticRegression(solver='liblinear')

params = {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000], 'penalty': ['l1','l2']}

grid = GridSearchCV(lr, params, scoring='roc_auc', cv=kf)

In [19]:
for df in dfs:
    X = df.loc[:]
    
    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state = 42)
    
    if df.name in dfs_numbers:
        X_train = scalers[df.name].fit_transform(X_train)
        X_test = scalers[df.name].transform(X_test)
    
    model = grid.fit(X_train, y_train)
    
    print(df.name, "Cross Val Score:", np.mean(cross_val_score(model, X_train, y_train, scoring='roc_auc',cv=kf)))
    print(df.name, "AUC Score:", roc_auc_score(y_test, model.predict(X_test)))
    print(df.name, "Best C And Penalty:", model.best_params_, '\n')
    
    #if roc_auc_score(y_test, model.predict(X_test)) >= 0.75:
    models[("{}".format(df.name), "Logistic Regression")] = model.best_estimator_

sf1_numbers Cross Val Score: 0.7878354115890872
sf1_numbers AUC Score: 0.7055555555555556
sf1_numbers Best C And Penalty: {'C': 1, 'penalty': 'l2'} 

acs1_numbers Cross Val Score: 0.9169657778151301
acs1_numbers AUC Score: 0.8122549019607843
acs1_numbers Best C And Penalty: {'C': 0.1, 'penalty': 'l1'} 

social_numbers Cross Val Score: 0.8873963309361157
social_numbers AUC Score: 0.7686274509803921
social_numbers Best C And Penalty: {'C': 0.1, 'penalty': 'l2'} 

economic_numbers Cross Val Score: 0.9268663801211015
economic_numbers AUC Score: 0.8444444444444443
economic_numbers Best C And Penalty: {'C': 1000, 'penalty': 'l2'} 

housing_numbers Cross Val Score: 0.8241853757708917
housing_numbers AUC Score: 0.7622549019607844
housing_numbers Best C And Penalty: {'C': 0.01, 'penalty': 'l1'} 



In [20]:
knn = KNeighborsClassifier()

params = {'n_neighbors': range(1,11)}

grid = GridSearchCV(knn, params, scoring='roc_auc', cv=kf)

In [21]:
for df in dfs:
    X = df.loc[:]
    
    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state = 42)
    
    if df.name in dfs_numbers:
        X_train = scalers[df.name].fit_transform(X_train)
        X_test = scalers[df.name].transform(X_test)
    
    model = grid.fit(X_train, y_train)
    
    print(df.name, "Cross Val Score:", np.mean(cross_val_score(model, X_train, y_train, scoring='roc_auc',cv=kf)))
    print(df.name, "AUC Score:", roc_auc_score(y_test, model.predict(X_test)))
    print(df.name, "Best K:", model.best_params_, '\n')
    
    #if roc_auc_score(y_test, model.predict(X_test)) >= 0.75:
    models[("{}".format(df.name), "K Nearest Neighbor")] = model.best_estimator_

sf1_numbers Cross Val Score: 0.9251990002921484
sf1_numbers AUC Score: 0.9238562091503268
sf1_numbers Best K: {'n_neighbors': 2} 

acs1_numbers Cross Val Score: 0.9170514189208119
acs1_numbers AUC Score: 0.8383986928104575
acs1_numbers Best K: {'n_neighbors': 5} 

social_numbers Cross Val Score: 0.8821711565475262
social_numbers AUC Score: 0.7573529411764705
social_numbers Best K: {'n_neighbors': 9} 

economic_numbers Cross Val Score: 0.9275496176057436
economic_numbers AUC Score: 0.851797385620915
economic_numbers Best K: {'n_neighbors': 8} 

housing_numbers Cross Val Score: 0.8114394216261861
housing_numbers AUC Score: 0.7436274509803922
housing_numbers Best K: {'n_neighbors': 10} 



In [22]:
predictions_sf1 = pd.DataFrame()
predictions_acs1 = pd.DataFrame()
predictions_social = pd.DataFrame()
predictions_economic = pd.DataFrame()
predictions_housing = pd.DataFrame()

prediction_dfs = [predictions_sf1, predictions_acs1, predictions_social, predictions_economic, predictions_housing]

for df in prediction_dfs:
    df['target'] = predictions['Democrat']

In [23]:
models

{('sf1_numbers',
  'Random Forest'): RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
             max_depth=15, max_features='auto', max_leaf_nodes=None,
             min_impurity_decrease=0.0, min_impurity_split=None,
             min_samples_leaf=1, min_samples_split=2,
             min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,
             oob_score=False, random_state=None, verbose=0,
             warm_start=False),
 ('acs1_numbers',
  'Random Forest'): RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
             max_depth=18, max_features='auto', max_leaf_nodes=None,
             min_impurity_decrease=0.0, min_impurity_split=None,
             min_samples_leaf=1, min_samples_split=2,
             min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,
             oob_score=False, random_state=None, verbose=0,
             warm_start=False),
 ('social_numbers',
  'Random Forest'): RandomForestClassifier(boot

In [24]:
with open('models07.pickle', 'wb') as to_write:
    pickle.dump(models, to_write)

In [25]:
for key, model in models.items():
    
    if key[0] == 'sf1_numbers':
        if key[1] in ['Linear SVM','RBF SVM','Logistic Regression','K Nearest Neighbor']:
            scaled = scalers['sf1_numbers'].fit_transform(sf1_numbers)
            predictions_sf1[key[1]] = model.predict(scaled)
        else:
            predictions_sf1[key[1]] = model.predict(sf1_numbers)
            
    elif key[0] == 'acs1_numbers':
        if key[1] in ['Linear SVM','RBF SVM','Logistic Regression','K Nearest Neighbor']:
            scaled = scalers['acs1_numbers'].fit_transform(acs1_numbers)
            predictions_acs1[key[1]] = model.predict(scaled)
        else:
            predictions_acs1[key[1]] = model.predict(acs1_numbers)
            
    elif key[0] == 'social_numbers':
        if key[1] in ['Linear SVM','RBF SVM','Logistic Regression','K Nearest Neighbor']:
            scaled = scalers['social_numbers'].fit_transform(social_numbers)
            predictions_social[key[1]] = model.predict(scaled)
        else:
            predictions_social[key[1]] = model.predict(social_numbers)
            
    elif key[0] == 'economic_numbers':
        if key[1] in ['Linear SVM','RBF SVM','Logistic Regression','K Nearest Neighbor']:
            scaled = scalers['economic_numbers'].fit_transform(economic_numbers)
            predictions_economic[key[1]] = model.predict(scaled)
        else:
            predictions_economic[key[1]] = model.predict(economic_numbers)
            
    elif key[0] == 'housing_numbers':
        if key[1] in ['Linear SVM','RBF SVM','Logistic Regression','K Nearest Neighbor']:
            scaled = scalers['housing_numbers'].fit_transform(housing_numbers)
            predictions_housing[key[1]] = model.predict(scaled)
        else:
            predictions_housing[key[1]] = model.predict(housing_numbers)
        

In [26]:
predictions_sf1

Unnamed: 0,target,Random Forest,Decision Tree,Linear SVM,RBF SVM,Logistic Regression,K Nearest Neighbor
0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0
6,0,0,0,0,0,0,0
7,1,1,1,0,1,0,1
8,0,0,0,0,0,0,0
9,1,1,1,0,1,0,1


In [27]:
predictions_acs1.groupby(['target']).mean()

Unnamed: 0_level_0,Random Forest,Decision Tree,Linear SVM,RBF SVM,Logistic Regression,K Nearest Neighbor
target,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.025862,0.105603,0.109914,0.037716,0.102371,0.060345
1,0.935287,0.777778,0.769231,0.901099,0.764347,0.855922


In [28]:
predictions_social.groupby(['target']).mean()

Unnamed: 0_level_0,Random Forest,Decision Tree,Linear SVM,RBF SVM,Logistic Regression,K Nearest Neighbor
target,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.09375,0.105603,0.174569,0.099138,0.127155,0.109914
1,0.73138,0.722833,0.765568,0.695971,0.71917,0.766789


In [29]:
predictions_economic.groupby(['target']).mean()

Unnamed: 0_level_0,Random Forest,Decision Tree,Linear SVM,RBF SVM,Logistic Regression,K Nearest Neighbor
target,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.03125,0.141164,0.092672,0.038793,0.100216,0.047414
1,0.954823,0.852259,0.798535,0.923077,0.805861,0.791209


In [30]:
predictions_housing.groupby(['target']).mean()

Unnamed: 0_level_0,Random Forest,Decision Tree,Linear SVM,RBF SVM,Logistic Regression,K Nearest Neighbor
target,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.162716,0.258621,0.232759,0.181034,0.296336,0.158405
1,0.807082,0.824176,0.772894,0.749695,0.814408,0.700855


In [31]:
sf1_models = []
acs1_models = []
social_models = []
economic_models = []
housing_models = []

In [32]:
for key, model in models.items():
    if key[0] == 'sf1_numbers': #and key[1] in ['Random Forest','RBF SVM','K Nearest Neighbor']:
        sf1_models.append((key[1],model))
    if key[0] == 'acs1_numbers': #and key[1] in ['Random Forest','RBF SVM','K Nearest Neighbor']:
        acs1_models.append((key[1],model))
    if key[0] == 'social_numbers': #and key[1] in ['Random Forest','RBF SVM','K Nearest Neighbor']:
        social_models.append((key[1],model))
    if key[0] == 'economic_numbers': #and key[1] in ['Random Forest','RBF SVM','K Nearest Neighbor']:
        economic_models.append((key[1],model))
    if key[0] == 'housing_numbers': #and key[1] in ['Random Forest','RBF SVM','K Nearest Neighbor']:
        housing_models.append((key[1],model))

In [33]:
voting_sf1 = VotingClassifier(estimators=sf1_models, voting='hard',n_jobs=-1)
voting_acs1 = VotingClassifier(estimators=acs1_models, voting='hard',n_jobs=-1)
voting_social = VotingClassifier(estimators=social_models, voting='hard',n_jobs=-1)
voting_economic = VotingClassifier(estimators=economic_models, voting='hard',n_jobs=-1)
voting_housing = VotingClassifier(estimators=housing_models, voting='hard',n_jobs=-1)

In [34]:
y = predictions['Democrat']
X = sf1_numbers

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state = 42)

voting_sf1.fit(X_train, y_train)
roc_auc_score(y_test, voting_sf1.predict(X_test))

0.9183006535947713

In [35]:
y = predictions['Democrat']
X = acs1_numbers

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state = 42)

voting_acs1.fit(X_train, y_train)
roc_auc_score(y_test, voting_acs1.predict(X_test))

0.8254901960784314

In [36]:
y = predictions['Democrat']
X = social_numbers

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state = 42)

voting_social.fit(X_train, y_train)
roc_auc_score(y_test, voting_social.predict(X_test))

0.7761437908496731

In [37]:
y = predictions['Democrat']
X = economic_numbers

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state = 42)

voting_economic.fit(X_train, y_train)
roc_auc_score(y_test, voting_economic.predict(X_test))

0.840359477124183

In [38]:
y = predictions['Democrat']
X = economic_numbers

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state = 42)

voting_housing.fit(X_train, y_train)
roc_auc_score(y_test, voting_housing.predict(X_test))

0.8068627450980391

In [39]:
voters = [('voting_sf1', voting_sf1), ('voting_acs1', voting_acs1)]

voting = VotingClassifier(estimators = voters, voting='hard', n_jobs=-1)

