In [148]:
import numpy as np
import pandas as pd
from scipy.stats import reciprocal
from sklearn.datasets import load_diabetes
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow import keras

# Keras Sequential

In [63]:
titan = pd.read_csv('train.csv')
titan.drop(columns='Cabin', inplace=True)

#grouping by age to use later
age_grouped = titan.groupby(['Pclass']).median('Age')

### Function for null age
def impute_age(row):
    if pd.isna(row['Age']):
        #print(row['Pclass'])
        row['Age'] = age_grouped['Age'].iloc[int(row['Pclass'])-1] #hacky, but works
    return row

titan[['Age', 'Pclass']] = titan[['Age', 'Pclass']].apply(impute_age, axis=1)
titan = titan.dropna()

In [64]:
##Dummy Variables
titan = pd.concat([pd.get_dummies(titan['Sex']), titan], axis=1)
titan = pd.concat([pd.get_dummies(titan['Embarked'], prefix="Embarked"), titan], axis=1)

##Dropping Noise
survived = titan['Survived']
titan.drop(columns=['Survived'], inplace=True)
titan.drop(columns=['Name', 'Ticket', 'PassengerId', 'Sex','Embarked' ], inplace=True)

In [65]:
titan.sample(3)

Unnamed: 0,Embarked_C,Embarked_Q,Embarked_S,female,male,Pclass,Age,SibSp,Parch,Fare
279,0,0,1,1,0,3.0,35.0,1,1,20.25
544,1,0,0,0,1,1.0,50.0,1,0,106.425
221,0,0,1,0,1,2.0,27.0,0,0,13.0


In [78]:
titan.shape

(889, 10)

In [97]:
##Model creation
p2_model = keras.models.Sequential()
p2_model.add(keras.layers.Dense(100, input_dim=10, activation='relu'))
p2_model.add(keras.layers.Dense(100, activation='relu'))
p2_model.add(keras.layers.Dense(200, activation='relu'))
p2_model.add(keras.layers.Dense(100, activation='relu'))
p2_model.add(keras.layers.Dense(1, activation='relu'))
p2_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [103]:
p2_model.summary()
#seems to keep counting up even when clearing pointers.  Hope that doesn't affect the model, as far as I can tell it doesn't

Model: "sequential_15"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_20 (Dense)             (None, 100)               1100      
_________________________________________________________________
dense_21 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_22 (Dense)             (None, 200)               20200     
_________________________________________________________________
dense_23 (Dense)             (None, 100)               20100     
_________________________________________________________________
dense_24 (Dense)             (None, 1)                 101       
Total params: 51,601
Trainable params: 51,601
Non-trainable params: 0
_________________________________________________________________


In [105]:
primary_run = p2_model.fit(titan, survived, epochs=10, validation_data=(titan, survived)) 
#not sure that validation data really adds any value in this situation where we dont have real test data

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [100]:
p2_yhat = p2_model.predict(titan)

In [101]:
rmse2 = mean_squared_error(survived, p2_yhat, squared=False)

#### RMSE

In [102]:
rmse2

0.6184271933508548

# Bike Share Dataset

In [109]:
bike_df = pd.read_csv('bike_share_hour.csv')

##making categoricals
bike_df[['season', 'holiday', 'weekday', 'workingday', 'weathersit']] = bike_df[['season', 'holiday', 'weekday', 'workingday', 'weathersit']].astype('category')
##Scaling
scaler = StandardScaler()
bike_df[['yr','mnth','hr','temp','atemp','hum','windspeed','casual','registered']] = scaler.fit(bike_df[['yr','mnth','hr','temp','atemp','hum','windspeed','casual','registered']]).transform(bike_df[['yr','mnth','hr','temp','atemp','hum','windspeed','casual','registered']])
##Dropping columns
bike_df.drop(columns=['casual','registered','dteday','instant'], inplace=True)

##split
y = bike_df['cnt']
bike_df.drop(columns=['cnt'], inplace=True)
X_train, X_test, y_train, y_test = train_test_split(bike_df, y, test_size=.33, random_state=1)

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
438,-0.005515,0.050680,-0.015906,-0.067642,0.049341,0.079165,-0.028674,0.034309,-0.018118,0.044485
232,0.012648,0.050680,0.000261,-0.011409,0.039710,0.057245,-0.039719,0.056081,0.024053,0.032059
80,0.070769,-0.044642,0.012117,0.042530,0.071357,0.053487,0.052322,-0.002592,0.025393,-0.005220
46,-0.056370,-0.044642,-0.011595,-0.033214,-0.046975,-0.047660,0.004460,-0.039493,-0.007979,-0.088062
381,-0.070900,0.050680,-0.089197,-0.074528,-0.042848,-0.025739,-0.032356,-0.002592,-0.012908,-0.054925
...,...,...,...,...,...,...,...,...,...,...
255,0.001751,-0.044642,-0.065486,-0.005671,-0.007073,-0.019476,0.041277,-0.039493,-0.003304,0.007207
72,0.063504,0.050680,-0.004050,-0.012556,0.103003,0.048790,0.056003,-0.002592,0.084495,-0.017646
396,-0.085430,0.050680,-0.030996,-0.022885,-0.063487,-0.054236,0.019187,-0.039493,-0.096433,-0.034215
235,0.067136,0.050680,-0.014828,0.058596,-0.059359,-0.034508,-0.061809,0.012906,-0.005145,0.048628


In [120]:
p3_model = keras.models.Sequential()
p3_model.add(keras.layers.Dense(200, input_dim=X_train.shape[1], activation='relu'))
p3_model.add(keras.layers.Dense(100, activation='relu'))
p3_model.add(keras.layers.Dense(300, activation='relu'))
p3_model.add(keras.layers.Dense(100, activation='relu'))
p3_model.add(keras.layers.Dense(1, activation='relu'))
p3_model.compile(loss='mse', optimizer=keras.optimizers.SGD(learning_rate=1e-3)) #, metrics=['accuracy'] may use later?

In [117]:
p3_model.summary()

Model: "sequential_18"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_35 (Dense)             (None, 100)               1100      
_________________________________________________________________
dense_36 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_37 (Dense)             (None, 200)               20200     
_________________________________________________________________
dense_38 (Dense)             (None, 100)               20100     
_________________________________________________________________
dense_39 (Dense)             (None, 1)                 101       
Total params: 51,601
Trainable params: 51,601
Non-trainable params: 0
_________________________________________________________________


In [127]:
p3_run = p3_model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [129]:
p3_yhat = p3_model.predict(X_test)

In [131]:
rmse3 = mean_squared_error(y_test, p3_yhat, squared=False)

In [133]:
rmse3
#not a great rmse

164.30907433891883

# Optimization
Prompt only mentions different optimizers.  Seems sparse, but an exhastive grid search of optimizers would limit the ability to search on learning rate.  We will do 2 different grid searches and see how it goes.  

I reduced the scope of the initial grid search that I had.  I left the evidience incase there is ineterst.  But it was taking a very long time, and I wanted results to show.

In [214]:
def build_model(n_hidden=1, n_neurons=30, optimizer='SGD', input_shape=X_train.shape[1], activation='relu', loss='mse' ):
    model = keras.models.Sequential()
    model.add(keras.layers.InputLayer(input_shape=input_shape))
    
    for layer in range(n_hidden):
        model.add(keras.layers.Dense(n_neurons, activation=activation))
    
    model.add(keras.layers.Dense(1))
    model.compile(loss=loss, optimizer=optimizer)
    return model
        

In [192]:
model_bld = keras.wrappers.scikit_learn.KerasRegressor(build_model)

In [208]:
optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
param_grid = {'n_hidden': [1,2,3,4],
              #'n_neurons': np.arange(50,100),
              'optimizer': optimizer,
              #'activation': ['relu', 'softmax', 'elu', 'prelu'],
              'loss': ['mse','mean_absolute_error'  ]
             }

In [209]:
search_cv = GridSearchCV(model_bld, param_grid, cv=3, n_jobs=-1)

In [210]:
result = search_cv.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

 -2.94443757e+04 -2.95088815e+04 -2.94043405e+04 -1.89882386e+24
 -2.90144557e+04 -2.95482487e+04 -2.95959622e+04 -2.88885592e+04
 -2.93414362e+04 -2.88141576e+04 -4.30264992e+31 -2.80482083e+04
 -2.95308574e+04 -2.95992734e+04 -2.80491348e+04 -2.92179440e+04
 -2.64216901e+04             nan -2.39780059e+04 -2.95284961e+04
 -2.96000280e+04 -2.53779499e+04 -2.92106074e+04 -1.98175723e+04
 -1.51768672e+02 -1.52868429e+02 -1.53341349e+02 -1.53360616e+02
 -1.52864283e+02 -1.52982290e+02 -1.52859014e+02 -1.51068319e+02
 -1.50996648e+02 -1.53300049e+02 -1.53366689e+02 -1.51468643e+02
 -1.52758204e+02 -1.51073227e+02 -1.49884715e+02 -1.46800512e+02
 -1.53286062e+02 -1.53375671e+02 -1.48675878e+02 -1.51874868e+02
 -1.43816864e+02 -1.47710693e+02 -1.35358396e+02 -1.53264801e+02
 -1.53371475e+02 -1.37423818e+02 -1.50997264e+02 -1.15446686e+02]


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


### Best Sccore

In [211]:
result.best_score_

-115.44668579101562

### Best Params

In [212]:
result.best_params_

{'loss': 'mean_absolute_error', 'n_hidden': 4, 'optimizer': 'Nadam'}