In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.model_selection import train_test_split, cross_val_score, cross_val_predict
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.decomposition import PCA
from sklearn.metrics import mean_squared_error, r2_score


from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input
from tensorflow.keras.regularizers import l1, l2, l1_l2
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor

In [2]:
df = pd.read_csv('../data/full_data_interaction_05_025.csv')

In [3]:
X = df.drop(columns = ['state', 'county_name', 'unemployment_rate_2010', 'population_total_2010'])
y = df['unemployment_rate_2010']

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [5]:
ss = StandardScaler()
Z_train = ss.fit_transform(X_train)
Z_test = ss.transform(X_test)

### Linear Regression

In [6]:
lr = LinearRegression()
lr.fit(Z_train, y_train)

LinearRegression()

In [7]:
lr.score(Z_train, y_train), lr.score(Z_test, y_test), cross_val_score(lr, Z_train, y_train).mean()

(0.5440914846052834, 0.45134894131845105, 0.4834601772545518)

In [8]:
lin_train = lr.predict(Z_train)
lin_test = lr.predict(Z_test)

### PCA data Linear Regression

In [9]:
df = pd.read_csv('../data/final_work_data.csv')

In [10]:
X = df.drop(columns = ['state', 'county_name', 'unemployment_rate_2010', 'population_total_2010'])
y = df['unemployment_rate_2010']

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [12]:
pipe = Pipeline([
    ('poly', PolynomialFeatures(degree=3, include_bias=False)),
    ('ss', StandardScaler()),
    ('pca', PCA()),
    ('model', LinearRegression())
])
params = {
    'pca__n_components': range(10,200,20)
}
gs = GridSearchCV(pipe, param_grid=params)
gs.fit(X_train, y_train)
print(gs.best_score_)
print(gs.score(X_test, y_test))
print(cross_val_score(gs, X_train, y_train).mean())
gs.best_params_

0.4009664055310037
0.38311006597792363
0.38971682481060954


{'pca__n_components': 30}

### Neural network on Linear Regression initeraction data

In [13]:
df = pd.read_csv('../data/final_work_data.csv')

In [14]:
X = df.drop(columns = ['state', 'county_name', 'unemployment_rate_2010', 'population_total_2010'])
y = df['unemployment_rate_2010']

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [16]:
ss = StandardScaler()
Z_train = ss.fit_transform(X_train)
Z_test = ss.transform(X_test)

In [17]:
in_shape = Z_train[0].shape
in_shape

(24,)

In [18]:
model_1 = Sequential()

model_1.add(Dense(128, activation= 'relu', input_shape= in_shape))
model_1.add(Dropout(.3))

model_1.add(BatchNormalization())
model_1.add(Dense(128, activation= 'relu'))
"""
model_1.add(BatchNormalization())
model_1.add(Dense(64, activation= 'relu'))
"""
model_1.add(Dense(32, activation= 'relu'))
model_1.add(Dropout(.5))

model_1.add(Dense(1, activation= None))

In [19]:
model_1.compile(optimizer= 'adam', loss= 'mse', metrics= ['mse', 'RootMeanSquaredError', 'MeanAbsolutePercentageError'])

In [20]:
history_1 = model_1.fit(Z_train, y_train,
          epochs= 300,
          validation_data= (Z_test, y_test),
          verbose= 1
         )

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300


Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300


Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78/300
Epoch 79/300
Epoch 80/300
Epoch 81/300
Epoch 82/300
Epoch 83/300
Epoch 84/300
Epoch 85/300


Epoch 86/300
Epoch 87/300
Epoch 88/300
Epoch 89/300
Epoch 90/300
Epoch 91/300
Epoch 92/300
Epoch 93/300
Epoch 94/300
Epoch 95/300
Epoch 96/300
Epoch 97/300
Epoch 98/300
Epoch 99/300
Epoch 100/300
Epoch 101/300
Epoch 102/300
Epoch 103/300
Epoch 104/300
Epoch 105/300
Epoch 106/300
Epoch 107/300
Epoch 108/300
Epoch 109/300
Epoch 110/300
Epoch 111/300
Epoch 112/300
Epoch 113/300


Epoch 114/300
Epoch 115/300
Epoch 116/300
Epoch 117/300
Epoch 118/300
Epoch 119/300
Epoch 120/300
Epoch 121/300
Epoch 122/300
Epoch 123/300
Epoch 124/300
Epoch 125/300
Epoch 126/300
Epoch 127/300
Epoch 128/300
Epoch 129/300
Epoch 130/300
Epoch 131/300
Epoch 132/300
Epoch 133/300
Epoch 134/300
Epoch 135/300
Epoch 136/300
Epoch 137/300
Epoch 138/300
Epoch 139/300
Epoch 140/300
Epoch 141/300


Epoch 142/300
Epoch 143/300
Epoch 144/300
Epoch 145/300
Epoch 146/300
Epoch 147/300
Epoch 148/300
Epoch 149/300
Epoch 150/300
Epoch 151/300
Epoch 152/300
Epoch 153/300
Epoch 154/300
Epoch 155/300
Epoch 156/300
Epoch 157/300
Epoch 158/300
Epoch 159/300
Epoch 160/300
Epoch 161/300
Epoch 162/300
Epoch 163/300
Epoch 164/300
Epoch 165/300
Epoch 166/300
Epoch 167/300
Epoch 168/300
Epoch 169/300


Epoch 170/300
Epoch 171/300
Epoch 172/300
Epoch 173/300
Epoch 174/300
Epoch 175/300
Epoch 176/300
Epoch 177/300
Epoch 178/300
Epoch 179/300
Epoch 180/300
Epoch 181/300
Epoch 182/300
Epoch 183/300
Epoch 184/300
Epoch 185/300
Epoch 186/300
Epoch 187/300
Epoch 188/300
Epoch 189/300
Epoch 190/300
Epoch 191/300
Epoch 192/300
Epoch 193/300
Epoch 194/300
Epoch 195/300
Epoch 196/300
Epoch 197/300


Epoch 198/300
Epoch 199/300
Epoch 200/300
Epoch 201/300
Epoch 202/300
Epoch 203/300
Epoch 204/300
Epoch 205/300
Epoch 206/300
Epoch 207/300
Epoch 208/300
Epoch 209/300
Epoch 210/300
Epoch 211/300
Epoch 212/300
Epoch 213/300
Epoch 214/300
Epoch 215/300
Epoch 216/300
Epoch 217/300
Epoch 218/300
Epoch 219/300
Epoch 220/300
Epoch 221/300
Epoch 222/300
Epoch 223/300
Epoch 224/300
Epoch 225/300


Epoch 226/300
Epoch 227/300
Epoch 228/300
Epoch 229/300
Epoch 230/300
Epoch 231/300
Epoch 232/300
Epoch 233/300
Epoch 234/300
Epoch 235/300
Epoch 236/300
Epoch 237/300
Epoch 238/300
Epoch 239/300
Epoch 240/300
Epoch 241/300
Epoch 242/300
Epoch 243/300
Epoch 244/300
Epoch 245/300
Epoch 246/300
Epoch 247/300
Epoch 248/300
Epoch 249/300
Epoch 250/300
Epoch 251/300
Epoch 252/300
Epoch 253/300


Epoch 254/300
Epoch 255/300
Epoch 256/300
Epoch 257/300
Epoch 258/300
Epoch 259/300
Epoch 260/300
Epoch 261/300
Epoch 262/300
Epoch 263/300
Epoch 264/300
Epoch 265/300
Epoch 266/300
Epoch 267/300
Epoch 268/300
Epoch 269/300
Epoch 270/300
Epoch 271/300
Epoch 272/300
Epoch 273/300
Epoch 274/300
Epoch 275/300
Epoch 276/300
Epoch 277/300
Epoch 278/300
Epoch 279/300
Epoch 280/300
Epoch 281/300


Epoch 282/300
Epoch 283/300
Epoch 284/300
Epoch 285/300
Epoch 286/300
Epoch 287/300
Epoch 288/300
Epoch 289/300
Epoch 290/300
Epoch 291/300
Epoch 292/300
Epoch 293/300
Epoch 294/300
Epoch 295/300
Epoch 296/300
Epoch 297/300
Epoch 298/300
Epoch 299/300
Epoch 300/300


In [21]:
r2_score(y_train, model_1.predict(Z_train))

0.8495193980691386

In [22]:
r2_score(y_test, model_1.predict(Z_test))

0.5912298482775749

In [23]:
nn_train = model_1.predict(Z_train)
nn_test = model_1.predict(Z_test)
nn_train = [g[0] for g in nn_train]
nn_test = [h[0] for h in nn_test]

### Second level model based on linear regression and neural network preds

In [24]:
a =pd.DataFrame([lin_train, nn_train, y_train], index=['lin_reg', 'nn', 'y']).T
b = pd.DataFrame([lin_test, nn_test, y_test], index=['lin_reg', 'nn', 'y']).T

In [25]:
a.head()

Unnamed: 0,lin_reg,nn,y
0,14.491725,17.818363,17.3
1,9.733514,17.107672,18.7
2,10.720415,10.453696,11.3
3,9.052294,7.383136,8.1
4,9.66362,10.758892,12.6


In [26]:
Xa_train = a.drop(columns='y')
ya_train = a['y']

In [27]:
b.head()

Unnamed: 0,lin_reg,nn,y
0,8.579643,8.821253,8.7
1,13.718193,16.608334,14.2
2,9.97925,9.419676,10.7
3,7.657098,8.733862,10.0
4,7.761239,10.169096,9.5


In [28]:
Xb_test = b.drop(columns='y')
yb_test = b['y']

In [29]:
lev2 = LinearRegression()

In [30]:
lev2.fit(Xa_train, ya_train)

LinearRegression()

In [31]:
lev2.score(Xa_train, ya_train), lev2.score(Xb_test, yb_test), cross_val_score(lev2, Xa_train, ya_train).mean()

(0.9009880466719563, 0.6166400989225644, 0.8996610030098859)