# Project #3
## Predicting Voting Results

In [44]:
import geopandas as gpd
import mapclassify
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn import tree
from mpl_toolkits.axes_grid1 import make_axes_locatable

from sklearn.ensemble import RandomForestRegressor

In [32]:
gdf = gpd.read_file('./va_admin_bndry/VirginiaCounty_ClippedToShoreline.shx')
vdf = pd.read_csv('./data/voting_VA.csv')
cdf = pd.read_csv('./data/county_adjacencies.csv')


In [33]:
gdf['FIPS_left'] = pd.to_numeric(gdf['STCOFIPS']) 
df = gdf.merge(cdf,left_on='FIPS_left',right_on='FIPS')

In [34]:
for y in vdf['year'].unique():
    df20 = vdf.loc[ vdf['year']==y,:]

    Dvotes = df20.loc[(df20['party']=='DEMOCRAT'),:].groupby('county_fips')['candidatevotes'].sum()
    Dvotes = Dvotes.rename('dem_votes_' + str(y))
    df = df.merge(Dvotes,left_on='FIPS_left',right_on='county_fips')

    Rvotes = df20.loc[(df20['party']=='REPUBLICAN'),:].groupby('county_fips')['candidatevotes'].sum()
    Rvotes = Rvotes.rename('rep_votes_' + str(y))
    df = df.merge(Rvotes,left_on='FIPS_left',right_on='county_fips')

In [41]:
#cleans data by extracting votes by year for each county
def extract_df(index):
    row = df.iloc[index]
    years = []
    dem_votes = []
    rep_votes = []

    for column in df.columns:
        if column.startswith('dem_votes'):
            year = column.split('_')[-1]
            years.append(year)
            dem_votes.append(row[column])
            rep_votes.append(row['rep_votes_' + year])

    result_df = pd.DataFrame({'dem_votes': dem_votes, 'rep_votes': rep_votes})
    result_df['vote_diff'] = result_df['rep_votes']-result_df['dem_votes']
    return result_df


In [50]:

def ensemble(df):
    # Split data into training rows and testing rows:
    N = df.shape[0]
    df = df.sample(frac=1, random_state=100) # randomize the order in which data appears
    train_size = int(.8*N)
    df_train = df[0:train_size]
    N_train = df_train.shape[0]
    df_test = df[train_size:]
    N_test = df_test.shape[0]

    # Bootstrap:
    T = 1000
    m_depth = 5
    Rsq = np.zeros(T) # preallocate the rsq measure
    y_test_hat = np.zeros([T,N_test]) # preallocate predictions on test set
    df_train.head()

    # Split data into train/test:
    X_train = df_train.drop(['vote_diff'],axis=1)
    y_train = df_train['vote_diff']
    X_test = df_test.drop(['vote_diff'],axis=1)
    y_test = df_test['vote_diff']
    
    for s in range(T):
        # Generate a bootstrap sample:
        df_s = df_train.sample(frac=1, replace=True)
        X_s = df_s.drop('vote_diff',axis=1)
        y_s = df_s['vote_diff']
        # Fit decision tree:
        cart = tree.DecisionTreeRegressor(max_depth=m_depth) # Create a classifier object
        cart = cart.fit(X_s, y_s) # Fit the classifier
        # Compute Rsq:
        y_hat = cart.predict(X_test)
        SSE = np.sum( (y_test-y_hat)**2 )
        TSS = np.sum( (y_test-y_s.mean())**2 )
        Rsq[s] = 1 - SSE/TSS
        # Make and Save Predictions:
        y_test_hat[s,:] = y_hat
    
    # Ensemble predictor:
    y_hat_ensemble = y_test_hat.mean(axis=0) # Average the columns to get the ensemble prediction
    SSE = np.sum( (y_test-y_hat_ensemble)**2 )
    TSS = np.sum( (y_test-y_train.mean())**2 )
    Rsq_ensemble = 1 - SSE/TSS

    return Rsq_ensemble
    # print(Rsq_ensemble)

    # print(y_hat_ensemble.mean())


In [52]:
ensemble(extract_df(0))
ensemble(extract_df(1))
ensemble(extract_df(2))



-6.428732899137919
1221.7515
0.25696501517213255
-8229.983
0.6023659080974642
1628.1399999999999


In [60]:
for i in range(len(df)):
    ensemble(extract_df(2))

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
