# <font color='tomato'><font color="#CC3D3D"> Regression with Keras

모델은 저장해 첨부했지만 keras에서 다시 파일을 불러오는 것이 되지 않아서 DNN은 submission으로만 앙상블을 진행했습니다.

### Import modules

In [1]:
import pandas as pd
import numpy as np
import os
import random
import pickle
from IPython.display import Image
import seaborn as sns
import matplotlib.pylab as plt
from matplotlib import font_manager, rc
%matplotlib inline
from sklearn.preprocessing import StandardScaler

import tensorflow as tf
from tensorflow import keras
print(tf.__version__)
from tqdm import tqdm
import kerastuner as kt
from keras import regularizers

2.4.1


In [2]:
from tensorflow.keras.layers import Input, Dense, BatchNormalization, Dropout, Concatenate, Lambda, GaussianNoise, Activation

### Set random seeds to make your results reproducible

In [3]:
def reset_seeds(reset_graph_with_backend=None):
    if reset_graph_with_backend is not None:
        K = reset_graph_with_backend
        K.clear_session()
        tf.compat.v1.reset_default_graph()
        print("KERAS AND TENSORFLOW GRAPHS RESET")  # optional

    np.random.seed(1)
    random.seed(2)
    tf.compat.v1.set_random_seed(3)
    os.environ['CUDA_VISIBLE_DEVICES'] = ''  # for GPU
    print("RANDOM SEEDS RESET")  # optional
   
reset_seeds()

RANDOM SEEDS RESET


### Step 1: Load and process the data

##### Read data

In [4]:
# 앞 단계(Feature Enginnering)에서 저장했던 훈련/평가/적용 데이터를 읽어온다.
X_train, X_test, y_train, y_test, X_dep, ID_dep = pd.read_pickle('../models/DNN_features.pkl')

# 모델링에 사용되는 최종 학습 및 평가 데이터 확인
X_train.shape, X_test.shape, X_dep.shape

((15110, 5756), (6477, 5756), (14380, 5756))

##### Feature scaling

In [5]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# NN을 사용할 때에는 StandarddScaler를 사용하고, 
# 보통 NN에서는 Scaler후에 데이터를 넣는다.

##### Split data into train & validation set 

In [6]:
# Train/Validation
i = int(round(X_train.shape[0] * 0.8,0))
X_valid, y_valid = X_train[i:], y_train[i:]
X_train, y_train = X_train[:i], y_train[:i]

### Step 2: Define Hypermodel

In [7]:
from tensorflow.keras.utils import get_custom_objects
import tensorflow.keras.backend as K
class Mish(Activation):
    def __init__(self, activation, **kwargs):
        super(Mish, self).__init__(activation, **kwargs)
        self.__name__ = 'Mish'

def mish(x):
    return x * K.tanh(K.softplus(x))

get_custom_objects().update({'mish': Mish(mish)})

In [8]:
def model_fn(hp):
    inputs = keras.Input(shape=(X_train.shape[1],))
    x = inputs
    x = keras.layers.Dropout(hp.Float('dropout0', 0, 0.7, step=0.1, default=0.5))(x)
    
    for i in range(hp.Int('num_layers',  4, 10)):
        
        x = keras.layers.Dense(hp.Int('unit_'+str(i), min_value=32, max_value = 512, step=32), 
                               activation = hp.Choice('act_' + str(i), ['mish', 'selu', 'swish']),
                               kernel_regularizer = regularizers.l2(hp.Choice('reg_value', [0.01,0.001,0.1, 0.005, 0.05])),
                               kernel_initializer = keras.initializers.glorot_normal())(x)
        
        x = BatchNormalization()(x)
        
        x = keras.layers.Dropout(hp.Float('dropout_'+str(i), 0, 0.7, step=0.1, default=0.5))(x)
       
    
    outputs = keras.layers.Dense(1, activation='linear')(x)
                               
    model_dnn = keras.Model(inputs, outputs)
    
    model_dnn.compile(loss='mse', 
                  optimizer=tf.keras.optimizers.Adam(hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])),     # rmsprop로 바꾸기 가능
                  metrics=[keras.metrics.RootMeanSquaredError()])
    return model_dnn

In [9]:
tuner = kt.Hyperband(model_fn,
                     objective = kt.Objective('val_root_mean_squared_error', direction="min"), 
                     max_epochs = 20,
                     hyperband_iterations=2,
                     overwrite=True,
                     directory='dnn_tuning')

tuner.search(X_train, 
             y_train, 
             validation_data = (X_valid, y_valid), 
             callbacks=[tf.keras.callbacks.EarlyStopping(patience=3)])

Trial 60 Complete [00h 02m 41s]
val_root_mean_squared_error: 8.076971054077148

Best val_root_mean_squared_error So Far: 8.017032623291016
Total elapsed time: 01h 03m 30s
INFO:tensorflow:Oracle triggered exit


In [10]:
model_dnn = tuner.get_best_models(1)[0]
model_dnn.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 5756)]            0         
_________________________________________________________________
dropout (Dropout)            (None, 5756)              0         
_________________________________________________________________
dense (Dense)                (None, 320)               1842240   
_________________________________________________________________
batch_normalization (BatchNo (None, 320)               1280      
_________________________________________________________________
dropout_1 (Dropout)          (None, 320)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 160)               51360     
_________________________________________________________________
batch_normalization_1 (Batch (None, 160)               640   

In [11]:
model_dnn.evaluate(X_test, y_test)



[83.18057250976562, 8.306385040283203]

In [12]:
X_dep = scaler.transform(X_dep)

In [13]:
pred = model_dnn.predict(X_dep).flatten() 

In [14]:
t = pd.Timestamp.now()
fname = f"dnn_submission_{t.month:02}{t.day:02}{t.hour:02}{t.minute:02}.csv"
pd.DataFrame({'custid': ID_dep, 'age': pred}).to_csv(fname, index=False)
print(f"'{fname}' is ready to submit.")

'dnn_submission_06161750.csv' is ready to submit.


###  Step 7: Save the model for future use

In [15]:
#h5 형식으로 저장
model_dnn.save('DNN_model_fisrt2.h5')

# 추후 저장한 모형 불러올 때: 
# model = load_model('nn_model.h5')

<font color="#CC3D3D"><p>
# End