In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNetCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import MinMaxScaler

%matplotlib inline
%config InlineBackend.figure_format='retina' 

import warnings
warnings.filterwarnings("ignore")

In [2]:
# code for the font from https://github.com/scentellegher/code_snippets/blob/d6e3a65b0c4e715d982466f77d94c4f8b0827b38/matplotlib_font/Matplotlib_custom_font.ipynb

from matplotlib import font_manager

font_dirs = ['font/']
font_files = font_manager.findSystemFonts(fontpaths=font_dirs)

for font_file in font_files:
    font_manager.fontManager.addfont(font_file)

# set font
plt.rcParams['font.family'] = 'Barlow'

### DATA

In [3]:
# import data

df = pd.read_csv('./data/county_data.csv',  dtype = {'fips' : 'str', 'rucc_grouped' : 'str', 'RUCC_2013' : 'str'})

## MODELS

In [4]:
X_rep = df[['proximity_rep_coloc', 'proximity_rep_sci', 'proximity_rep_res', 'perc_latino_hispanic', 'perc_african_american', 'perc_unemployed', 'perc_graduated', 'perc_urban_pop']]

X_dem = df[['proximity_dem_coloc','proximity_dem_sci', 'proximity_dem_res', 'perc_latino_hispanic', 'perc_african_american', 'perc_unemployed', 'perc_graduated', 'perc_urban_pop']]

y_rep = df['perc_nv_rep']
y_dem = df['perc_nv_dem']

### REPUBLICANS

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X_rep, y_rep, test_size=0.3, random_state=42)


alphas = [0, 1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 0.05, 1e-1, 0.2, 0.5, 1]

l1_ratios = [.1, .5, .7, .9, .95, .99, 1]

elasticnet_cv = ElasticNetCV(alphas=alphas, l1_ratio=l1_ratios, cv=5)

elasticnet_cv.fit(X_train, y_train)

# best hyperparameters
print("Best alpha:", elasticnet_cv.alpha_)
print("Best l1_ratio:", elasticnet_cv.l1_ratio_)

y_pred = elasticnet_cv.predict(X_test)

# Evaluate the model performance
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'{elasticnet_cv} -- Mean Squared Error: {mse} -- r2: {r2}')

Best alpha: 1e-05
Best l1_ratio: 0.1
ElasticNetCV(alphas=[0, 1e-10, 1e-09, 1e-08, 1e-07, 1e-06, 1e-05, 0.0001, 0.001,
                     0.01, 0.05, 0.1, 0.2, 0.5, 1],
             cv=5, l1_ratio=[0.1, 0.5, 0.7, 0.9, 0.95, 0.99, 1]) -- Mean Squared Error: 0.0007123236330253612 -- r2: 0.9711250052858833


In [6]:
# beta coefficients

elasticnet_cv.coef_

array([ 1.08186333, -0.18019839,  0.09060427, -0.00575716, -0.03099309,
        0.01173719, -0.05391006, -0.00997137])

### DEMOCRATS

In [7]:

X_train, X_test, y_train, y_test = train_test_split(X_dem, y_dem, test_size=0.3, random_state=42)


alphas = [0, 1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 0.05, 1e-1, 0.2, 0.5, 1]
l1_ratios = [.1, .5, .7, .9, .95, .99, 1]

elasticnet_cv = ElasticNetCV(alphas=alphas, l1_ratio=l1_ratios, cv=5)

elasticnet_cv.fit(X_train, y_train)

# best hyperparameters
print("Best alpha:", elasticnet_cv.alpha_)
print("Best l1_ratio:", elasticnet_cv.l1_ratio_)

y_pred = elasticnet_cv.predict(X_test)

# Evaluate the model performance
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'{elasticnet_cv} -- Mean Squared Error: {mse} -- r2: {r2}')


Best alpha: 1e-05
Best l1_ratio: 0.1
ElasticNetCV(alphas=[0, 1e-10, 1e-09, 1e-08, 1e-07, 1e-06, 1e-05, 0.0001, 0.001,
                     0.01, 0.05, 0.1, 0.2, 0.5, 1],
             cv=5, l1_ratio=[0.1, 0.5, 0.7, 0.9, 0.95, 0.99, 1]) -- Mean Squared Error: 0.0007081544447812315 -- r2: 0.9708775518283517


In [8]:
# beta coefficients

elasticnet_cv.coef_

array([ 1.15779242, -0.21849417,  0.06245063, -0.00274884,  0.0322112 ,
       -0.00804908,  0.05364218,  0.01893531])