In [1]:
from keras.models import Model, Sequential
from keras.layers import Dense, Dropout
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.pipeline import Pipeline
from imblearn.pipeline import Pipeline as imbPipeline
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import GridSearchCV, cross_val_score



import pandas as pd
import numpy as np
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
X_train = pd.read_csv('../data/X_train_h1n1.csv', index_col= 'respondent_id')
X_test = pd.read_csv('../data/X_test_h1n1.csv', index_col= 'respondent_id')
y_train = pd.read_csv('../data/y_train_h1n1.csv', index_col= 'respondent_id')
y_test = pd.read_csv('../data/y_test_h1h1.csv', index_col= 'respondent_id')

In [3]:
y_train = y_train['h1n1_vaccine']
y_test = y_test['h1n1_vaccine']

In [4]:
X_train_cat = X_train.select_dtypes('object')
X_train_num = X_train.select_dtypes(['float64', 'int64'])

cat_pipe = Pipeline([
    ('impute', SimpleImputer(strategy='most_frequent')),
    ('ohe', OneHotEncoder(sparse=False, handle_unknown='ignore'))
])
num_pipe = Pipeline(steps=[
    ('impute', SimpleImputer(strategy='most_frequent')),
])

transformer = ColumnTransformer([('categorical', cat_pipe, X_train_cat.columns),
                                 ('numerical', num_pipe, X_train_num.columns)])

In [5]:
def create_model(optimizer = 'adagrad', dropout = .2):
    model = Sequential()
    model.add(Dense(32, activation = 'relu'))
    model.add(Dropout(dropout))
    model.add(Dense(16, activation = 'relu'))
    model.add(Dropout(dropout))
    model.add(Dense(1, activation = 'sigmoid'))
    
    model.compile(loss= 'binary_crossentropy', metrics=["accuracy"], optimizer = optimizer)
    
    return model

In [6]:
clf = KerasClassifier(build_fn= create_model, verbose = 3)

In [7]:
model_pipe = imbPipeline(steps=[
    ('trans', transformer),
    ('smote', SMOTE(random_state=42)),
    ('clf', clf)
])

In [8]:
model_pipe.get_params()

{'memory': None,
 'steps': [('trans',
   ColumnTransformer(transformers=[('categorical',
                                    Pipeline(steps=[('impute',
                                                     SimpleImputer(strategy='most_frequent')),
                                                    ('ohe',
                                                     OneHotEncoder(handle_unknown='ignore',
                                                                   sparse=False))]),
                                    Index(['age_group', 'education', 'race', 'sex', 'income_poverty',
          'marital_status', 'rent_or_own', 'employment_status', 'hhs_geo_region',
          'census_msa'],
         dtype='object')),
                                   ('numerical',
                                    P...
          'behavioral_large_gatherings', 'behavioral_outside_home',
          'behavioral_touch_face', 'doctor_recc_h1n1', 'doctor_recc_seasonal',
          'chronic_med_condition', 'child_u

In [9]:
params = {
    'clf__optimizer' : ['adam'],
    'clf__epochs' : [100],
    'clf__dropout' : [0.1, 0.2, 0.3],
    
}

In [10]:
grid = GridSearchCV(model_pipe, param_grid=params, n_jobs=-1, verbose = 3, cv = 3)
grid.fit(X_train, y_train)

Fitting 3 folds for each of 3 candidates, totalling 9 fits


TerminatedWorkerError: A worker process managed by the executor was unexpectedly terminated. This could be caused by a segmentation fault while calling the function or by an excessive memory usage causing the Operating System to kill the worker.
