<font color='tomato'><font color="#CC3D3D"><p>
# Build models using Keras Tuner and Winner's Data

##### Import modules

In [None]:
import pandas as pd
import numpy as np
import os
import random
import pickle
from tqdm import tqdm
from IPython.display import Image, clear_output
import seaborn as sns
import matplotlib.pylab as plt
from matplotlib import font_manager, rc
%matplotlib inline

from lightgbm import LGBMRegressor
from sklearn.preprocessing import StandardScaler
import shap
import tensorflow as tf
from tensorflow import keras
import kerastuner as kt
print(tf.__version__)

##### Set random seeds to make your results reproducible

In [None]:
# 매번 모델링을 할 때마다 동일한 결과를 얻으려면 아래 코드를 실행해야 함.

def reset_seeds(s1,s2,s3, reset_graph_with_backend=None):
    if reset_graph_with_backend is not None:
        K = reset_graph_with_backend
        K.clear_session()
        tf.compat.v1.reset_default_graph()
        print("KERAS AND TENSORFLOW GRAPHS RESET")  # optional

    np.random.seed(s1)
    random.seed(s2)
    tf.compat.v1.set_random_seed(s3)
#    os.environ['CUDA_VISIBLE_DEVICES'] = ''  # for GPU
#    print("RANDOM SEEDS RESET")  # optional

### Step 1: Load and process the data

##### Read data

In [None]:
# 1st round 1등 데이터
w1_train = pd.DataFrame(pd.read_pickle(os.path.abspath("../input")+'/1st_train_features.pkl'))
w1_train.columns = ['w1_'+str(c) for c in w1_train.columns]
w1_train['custid'] = pd.read_csv(os.path.abspath("../input")+'/train_features_3rd_winner.csv').custid

w1_test = pd.DataFrame(pd.read_pickle(os.path.abspath("../input")+'/1st_test_features.pkl'))
w1_test.columns = ['w1_'+str(c) for c in w1_test.columns]
w1_test['custid'] = pd.read_csv(os.path.abspath("../input")+'/test_features_3rd_winner.csv').custid

# 1st round 2등 데이터
train_x, valid_x, _, _, test_x, _ = pd.read_pickle(os.path.abspath("../input")+'/2nd_data.pkl')
w2_train = pd.DataFrame(np.vstack([train_x, valid_x]))
w2_train.columns = ['w2_'+str(c) for c in w2_train.columns]
w2_train['custid'] = w1_train['custid']

w2_test = pd.DataFrame(test_x)
w2_test.columns = ['w2_'+str(c) for c in w2_test.columns]
w2_test['custid'] = w1_test['custid']

# 1st round 3등 데이터
w3_train = pd.read_csv(os.path.abspath("../input")+'/train_features_3rd_winner.csv', index_col=0)
w3_test = pd.read_csv(os.path.abspath("../input")+'/test_features_3rd_winner.csv', index_col=0)
w3_train.columns = ['custid']+['w3_'+str(c) for c in w3_train.columns[1:]]
w3_test.columns = ['custid']+['w3_'+str(c) for c in w3_test.columns[1:]]

# 1,2,3등 데이터 병합
X_train = w1_train.merge(w2_train).merge(w3_train)
X_test = w1_test.merge(w2_test).merge(w3_test)
y_train = pd.read_csv(os.path.abspath("../input")+'/y_train.csv').age
IDtest = X_test.custid.unique()

# Feature Selection: Using SHAP values
IDtrain = X_train['custid']
X_train = X_train.drop('custid', axis=1)
X_test = X_test.drop('custid', axis=1)

# DF, based on which importance is checked
X_importance = X_test
# Explain model predictions using shap library:
model = LGBMRegressor(random_state=0).fit(X_train, y_train)
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X_importance)
shap_sum = np.abs(shap_values).mean(axis=0)
importance_df = pd.DataFrame([X_importance.columns.tolist(), shap_sum.tolist()]).T
importance_df.columns = ['column_name', 'shap_importance']
importance_df = importance_df.sort_values('shap_importance', ascending=False)
importance_df

# 중요도가 0인 feature를 제거 
features_selected = importance_df.query('shap_importance > 0').column_name
X_train = X_train[features_selected]
X_test = X_test[features_selected]
print(X_train.shape, X_test.shape)

##### Feature scaling

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_test[8978] = np.nan_to_num(X_test[8978], copy=False) # nan 제거

##### Split data into train & validation set 

In [None]:
# Train/Validation
i = int(round(X_train.shape[0] * 0.8,0))
X_valid, y_valid = X_train[i:], y_train[i:]
X_train, y_train = X_train[:i], y_train[:i]

### Step 2: Define the hyper-model

In [None]:
def model_fn(hp):
    inputs = keras.Input(shape=(X_train.shape[1],))
    x = inputs
    act = hp.Choice('act', ['relu','elu','selu'])
#    reg = hp.Float('reg', 0, 0.01, 0.01)
    for i in range(hp.Int('num_layers', 2, 4)):
        x = keras.layers.Dense(hp.Choice('unit_'+str(i), [512, 256, 128, 64, 32, 16]),
#                               kernel_regularizer=keras.regularizers.l2(reg),
                               activation=act)(x)
        x = keras.layers.Dropout(hp.Float('dropout_'+str(i), 0, 0.5, step=0.25, default=0.5))(x)
    outputs = keras.layers.Dense(1, activation='linear')(x)
    model = keras.Model(inputs, outputs)
    model.compile(loss='mse', 
#                  optimizer=tf.keras.optimizers.Adam(hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])), 
                  optimizer=hp.Choice('optimizer', ['adam','nadam', 'rmsprop']), 
                  metrics=[keras.metrics.RootMeanSquaredError()])
    return model

### Step 3: Build multiple hyper-tuned models

In [None]:
N = 5
preds = []

for i in tqdm(range(N)):
    reset_seeds(i,i*10,i*100)
    tuner = kt.Hyperband(model_fn,
                     objective=kt.Objective('val_root_mean_squared_error', direction="min"), 
                     max_epochs=10,
                     hyperband_iterations=2,
                     overwrite=True,
                     directory='dnn_tuning')
    tuner.search(X_train, y_train, validation_data=(X_valid, y_valid), 
             callbacks=[tf.keras.callbacks.EarlyStopping(patience=2)], verbose=0)
    model = tuner.get_best_models(1)[0]
    preds.append(model.predict(X_test).flatten())         

### Step 4: Ensemble models & make submissions

In [None]:
# Power mean ensemble
p = 1
pred = 0
n = 0
for i in range(N):
    pred = pred + preds[i]**p 
    n += 1
pred = pred / n    
pred = pred**(1/p)

In [None]:
# Make submissions: (Public LB) 8.15961
t = pd.Timestamp.now()
fname = f"dnn_submission_{t.month:02}{t.day:02}{t.hour:02}{t.minute:02}.csv"
pd.DataFrame({'custid': IDtest, 'age': pred}).to_csv(fname, index=False)
print(f"'{fname}' is ready to submit.")

<font color="#CC3D3D"><p>
# End