In [None]:
from keras.models import Model, Sequential
from keras.layers import Dense, Dropout
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.pipeline import Pipeline
from imblearn.pipeline import Pipeline as imbPipeline
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import GridSearchCV, cross_val_score



import pandas as pd
import numpy as np
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
X_train = pd.read_csv('../data/X_train_h1n1.csv', index_col= 'respondent_id')
X_test = pd.read_csv('../data/X_test_h1n1.csv', index_col= 'respondent_id')
y_train = pd.read_csv('../data/y_train_h1n1.csv', index_col= 'respondent_id')
y_test = pd.read_csv('../data/y_test_h1h1.csv', index_col= 'respondent_id')

In [None]:
y_train = y_train['h1n1_vaccine']
y_test = y_test['h1n1_vaccine']

In [None]:
X_train_cat = X_train.select_dtypes('object')
X_train_num = X_train.select_dtypes(['float64', 'int64'])

cat_pipe = Pipeline([
    ('impute', SimpleImputer(strategy='most_frequent')),
    ('ohe', OneHotEncoder(sparse=False, handle_unknown='ignore'))
])
num_pipe = Pipeline(steps=[
    ('impute', SimpleImputer(strategy='most_frequent')),
])

transformer = ColumnTransformer([('categorical', cat_pipe, X_train_cat.columns),
                                 ('numerical', num_pipe, X_train_num.columns)])

In [None]:
def create_model(optimizer = 'adagrad', kernel_initializer = 'glorot_uniform', dropout = .2):
    model = Sequential()
    model.add(Dense(128, activation = 'relu', kernel_initializer = kernel_initializer))
    model.add(Dropout(dropout))
    model.add(Dense(64, activation = 'relu', kernel_initializer = kernel_initializer))
    model.add(Dropout(dropout))
    model.add(Dense(32, activation = 'relu', kernel_initializer = kernel_initializer))
    model.add(Dropout(dropout))
    model.add(Dense(16, activation = 'relu', kernel_initializer = kernel_initializer))
    model.add(Dropout(dropout))
    model.add(Dense(1, activation = 'softmax', kernel_initializer = kernel_initializer))
    
    model.compile(loss= 'binary_crossentropy', metrics=["accuracy"], optimizer = optimizer)
    
    return model

In [None]:
clf = KerasClassifier(build_fn= create_model, verbose = 3)

In [None]:
model_pipe = imbPipeline(steps=[
    ('trans', transformer),
    ('smote', SMOTE(random_state=42)),
    ('clf', clf)
])

In [None]:
model_pipe.get_params()

In [None]:
params = {
    'clf__optimizer' : ['rmsprop', 'adam', 'adagrad'],
    'clf__epochs' : list(range(100,500,50)),
    'clf__dropout' : [0.1, 0.2, 0.3],
    'clf__kernel_initializer' : ['glorot_uniform', 'normal', 'uniform']
    
}

In [None]:
grid = GridSearchCV(model_pipe, param_grid=params, n_jobs=-1, verbose = 3)
grid.fit(X_train, y_train)