In [36]:
import numpy as np
import pandas as pd
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

from keras.callbacks import ModelCheckpoint
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, Dropout
seed = 7
np.random.seed(seed)

def drop_df_columns(df_train, df_test, col_name):
    df_train = df_train.drop(labels=col_name, axis=1)
    df_test = df_test.drop(labels=col_name, axis=1)
    return df_train, df_test

def baseline_model():
    model = Sequential()
    model.add(Dense(16, input_dim=6, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal'))
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

def deeper_model():
    model = Sequential()
    # Input Layer
    model.add(Dense(16, input_dim=6, kernel_initializer='normal', activation='relu'))
    # Hidden layers
    model.add(Dense(32, kernel_initializer='normal', activation='relu'))
    model.add(Dense(32, kernel_initializer='normal', activation='relu'))
    model.add(Dense(32, kernel_initializer='normal', activation='relu'))
    model.add(Dense(32, kernel_initializer='normal', activation='relu'))
    model.add(Dense(32, kernel_initializer='normal', activation='relu'))
    # Output Layer
    model.add(Dense(1, kernel_initializer='normal', activation='linear'))
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

def deeper_model_with_dropout():
    model = Sequential()
    # Input Layer
    model.add(Dense(16, input_dim=6, kernel_initializer='normal', activation='relu'))
    model.add(Dropout(0.2, input_shape=(10,)))
    # Hidden layers
    model.add(Dense(32, kernel_initializer='normal', activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(32, kernel_initializer='normal', activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(32, kernel_initializer='normal', activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(32, kernel_initializer='normal', activation='relu'))
    model.add(Dropout(0.2))
    # Output Layer
    model.add(Dense(1, kernel_initializer='normal', activation='linear'))
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

def deeper_model_2nd():
    model = Sequential()
    # Input Layer
    model.add(Dense(16, input_dim=9, kernel_initializer='normal', activation='relu'))
    # Hidden layers
    model.add(Dense(64, kernel_initializer='normal', activation='relu'))
    # Output Layer
    model.add(Dense(1, kernel_initializer='normal', activation='linear'))
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

def wider_model():
    model = Sequential()
    model.add(Dense(20, input_dim=9, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal'))
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

def run_model(**kwargs):
    print(kwargs)
    np.random.seed(seed)
    
    estimators = []
    estimators.append(('standardize', StandardScaler()))
    estimators.append(('mlp', KerasRegressor(
        build_fn=kwargs['model'],
        epochs=kwargs['num_of_epochs'],
        batch_size=5,
        verbose=0,
    )))
    pipeline = Pipeline(estimators)
   
    kfold = KFold(n_splits=kwargs['cv'], shuffle=True, random_state=seed)
    results = cross_val_score(pipeline, X, Y, cv=kfold, verbose=0)
    print("%s: %.2f (%.2f) MSE" % (kwargs['model_name'], results.mean(), results.std()))
    print(results)
    return pipeline

In [37]:
# Read train.csv and test.csv to pandas dataframe
df_train = pd.read_csv("train.csv", header='infer', na_values='?')
df_test = pd.read_csv("test.csv", header='infer', na_values='?')
df_train_label = df_train["time"]
df_train = df_train.drop(labels="time", axis=1)
df_train = df_train.drop(labels="id", axis=1)
df_test = df_test.drop(labels="id", axis=1)

# Create a new features called combine which is a combination of log transform of 5 feaures
# including max_iter, n_samples, n_features, n_classes and alpha
df_train["combine"] = (np.log1p(df_train["max_iter"]) + np.log1p(df_train["n_samples"]) + np.log1p(df_train["n_features"]) + 
                       np.log1p(df_train["n_classes"]) + np.log1p(df_train["alpha"]))
df_test["combine"] = (np.log1p(df_test["max_iter"]) + np.log1p(df_test["n_samples"]) + np.log1p(df_test["n_features"]) + 
                       np.log1p(df_test["n_classes"]) + np.log1p(df_test["alpha"]))

# Drop all less correlated features
df_train, df_test = drop_df_columns(df_train, df_test, "l1_ratio")
df_train, df_test = drop_df_columns(df_train, df_test, "alpha")
df_train, df_test = drop_df_columns(df_train, df_test, "max_iter")
df_train, df_test = drop_df_columns(df_train, df_test, "random_state")
df_train, df_test = drop_df_columns(df_train, df_test, "n_samples")
df_train, df_test = drop_df_columns(df_train, df_test, "n_features")
df_train, df_test = drop_df_columns(df_train, df_test, "n_classes")
df_train, df_test = drop_df_columns(df_train, df_test, "n_clusters_per_class")
df_train, df_test = drop_df_columns(df_train, df_test, "n_informative")
df_train, df_test = drop_df_columns(df_train, df_test, "flip_y")
df_train, df_test = drop_df_columns(df_train, df_test, "scale")

df_train.loc[df_train['n_jobs'] == -1, 'n_jobs'] = 16
df_test.loc[df_test['n_jobs'] == -1, 'n_jobs'] = 16

# One hot encoding for penalty feature
df_train = pd.get_dummies(df_train, columns=["penalty"])
df_test = pd.get_dummies(df_test, columns=["penalty"])

df_train.head()

Unnamed: 0,n_jobs,combine,penalty_elasticnet,penalty_l1,penalty_l2,penalty_none
0,16,20.431966,0,0,0,1
1,1,20.569294,0,1,0,0
2,2,20.637689,0,0,0,1
3,4,21.195375,0,0,0,1
4,2,19.171336,1,0,0,0


In [40]:
X = df_train.values
Y = df_train_label.values.ravel()
deep = run_model(model=deeper_model, model_name="Deep", num_of_epochs=100, cv=3)
deep.fit(X, Y)
prediction_result = deep.predict(df_test.values)
print(deep.score(X, Y))

# Predictation result is saved to submission.csv file
test_id = np.arange(0, 100)
my_submission = pd.DataFrame({'Id': test_id, 'time': prediction_result})
my_submission.to_csv('submission.csv', index=False)

{'model': <function deeper_model at 0x000001ABD97142F0>, 'model_name': 'Deep', 'num_of_epochs': 100, 'cv': 3}
Deep: -1.73 (0.62) MSE
[-0.85749478 -2.09410005 -2.23606177]
-1.1293861901853233
