In [1]:
from keras.models import Model, Sequential
from keras.layers import Dense, Dropout
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import GridSearchCV, cross_val_score
import tensorflow_addons as tfa

import pandas as pd
import numpy as np
import warnings
warnings.simplefilter(action='ignore')

In [2]:
X_train = pd.read_csv('../data/X_train_flu.csv', index_col= 'respondent_id')
X_test = pd.read_csv('../data/X_test_flu.csv', index_col= 'respondent_id')
y_train = pd.read_csv('../data/y_train_flu.csv', index_col= 'respondent_id')
y_test = pd.read_csv('../data/y_test_flu.csv', index_col= 'respondent_id')

In [3]:
y_train = y_train['seasonal_vaccine']
y_test = y_test['seasonal_vaccine']

In [4]:
X_train_cat = X_train.select_dtypes('object')
X_train_num = X_train.select_dtypes(['float64', 'int64'])

cat_pipe = Pipeline([
    ('impute', SimpleImputer(strategy='most_frequent')),
    ('ohe', OneHotEncoder(sparse=False, handle_unknown='ignore'))
])
num_pipe = Pipeline(steps=[
    ('impute', SimpleImputer(strategy='most_frequent')),
])

transformer = ColumnTransformer([('categorical', cat_pipe, X_train_cat.columns),
                                 ('numerical', num_pipe, X_train_num.columns)])

In [5]:
X_train.shape

(20030, 32)

In [6]:
y_train.shape

(20030,)

# Keras Models

## Simple Model

In [7]:
def create_model(dropout = .2):    
    model = Sequential()
    
    model.add(Dense(16, activation = 'relu', input_shape = (60,)))
    model.add(Dropout(dropout))
    model.add(Dense(8, activation = 'relu'))
    model.add(Dropout(dropout))
    model.add(Dense(4, activation = 'relu'))
    model.add(Dense(1, activation = 'sigmoid'))

    model.compile(loss='binary_crossentropy', metrics=['acc'], optimizer = 'adam')
    return model

In [8]:
clf = KerasClassifier(build_fn=create_model, verbose = 0)

In [9]:
params = {
    'clf__epochs' : [1500, 2000],
    'clf__batch_size': [2000, 2500],
    'clf__dropout' : [0.05, 0.1, 0.2, 0.3]
}

In [10]:
model_pipe = Pipeline(steps=[
    ('trans', transformer),
    ('clf', clf)
])

In [11]:
grid = GridSearchCV(model_pipe, param_grid=params, n_jobs= -1, verbose=0)
grid.fit(X_train, y_train)

OSError: [Errno 22] Invalid argument

In [None]:
grid.best_params_

In [None]:
grid_cv = cross_val_score(grid.best_estimator_, X_train, y_train, n_jobs=-1, verbose=3)

In [None]:
grid_cv.mean()

## Complex Model

In [None]:
def create_model(dropout = .2):    
    model = Sequential()
    
    model.add(Dense(32, activation = 'relu', input_shape = (60,)))
    model.add(Dropout(dropout))
    model.add(Dense(16, activation = 'relu'))
    model.add(Dropout(dropout))
    model.add(Dense(8, activation = 'relu'))
    model.add(Dropout(dropout))
    model.add(Dense(4, activation = 'relu'))
    model.add(Dense(1, activation = 'sigmoid'))

    model.compile(loss='binary_crossentropy', metrics=['acc'], optimizer = 'adam')
    
    return model

In [None]:
clf = KerasClassifier(build_fn=create_model, verbose = 0)

In [None]:
params = {
    'clf__epochs' : [1000, 1500, 2000],
    'clf__batch_size': [1500, 2000, 2500],
    'clf__dropout' : [0.05, 0.1, 0.2, 0.3]
}

In [None]:
model_pipe = Pipeline(steps=[
    ('trans', transformer),
    ('clf', clf)
])

In [None]:
grid = GridSearchCV(model_pipe, param_grid=params, n_jobs= -1, verbose=0)
grid.fit(X_train, y_train)

In [None]:
grid.best_params_

In [None]:
grid_cv = cross_val_score(grid.best_estimator_, X_train, y_train, n_jobs=-1, verbose=3)

In [None]:
grid_cv.mean()