In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import missingno
import seaborn as sns
import math


# Feature Selection and Encoding
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, label_binarize, StandardScaler, MinMaxScaler,OrdinalEncoder
# https://scikit-learn.org/stable/auto_examples/compose/plot_column_transformer_mixed_types.html
from sklearn import model_selection
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline



# Managing Warnings 
import warnings
warnings.filterwarnings('ignore')

# Plot the Figures Inline
%matplotlib inline

In [7]:
# data_df = pd.read_csv('./pre_credit_df(fill_groupby).csv', index_col=0) # 엑셀 파일 읽기
data_df = pd.read_csv('./pre_credit_df(fill_groupby)_final.csv', index_col=0) # 엑셀 파일 읽기

### Target, Feature 분리

In [8]:
credit_Y = data_df['credit']
credit_X = data_df.drop(['credit'],axis=1)

### Pipeline 활용한 encoding

In [9]:
def pipe_processing(x_train, x_test):
    #numeric, categorical feature 정리
    numeric_features = ['income_total','DAYS_EMPLOYED','family_size','begin_month','Age','cards']
    numeric_transformer = StandardScaler()

    categorical_features = ['child_num','income_type','edu_type','family_type','house_type','occyp_type','family_category','occyp_category']
    categorical_transformer = OneHotEncoder()

    pass_through = ['gender','car','reality','work_phone','phone','email','dup']

    preprocessor = ColumnTransformer(
        transformers=[ # List of (name, transformer, column(s))
            ('num', numeric_transformer, numeric_features),
            ('cat', categorical_transformer, categorical_features),
            ('pass_through','passthrough',pass_through)])

    preprocessor_pipe = Pipeline(steps=[('preprocessor', preprocessor)]) # preprocessing-only

    preprocessor_pipe.fit(x_train)
    x_train_transformed = preprocessor_pipe.transform(x_train)
    x_test_transformed = preprocessor_pipe.transform(x_test)
    
    return x_train_transformed, x_test_transformed, preprocessor

### 인코딩한 Dataframe 열 이름 생성

In [10]:
def get_feature_names(column_transformer):

    def get_names(trans):
        # >> Original get_feature_names() method
        if trans == 'drop' or (
                hasattr(column, '__len__') and not len(column)):
            return []
        if trans == 'passthrough':
            if hasattr(column_transformer, '_df_columns'):
                if ((not isinstance(column, slice))
                        and all(isinstance(col, str) for col in column)):
                    return column
                else:
                    return column_transformer._df_columns[column]
            else:
                indices = np.arange(column_transformer._n_features)
                return ['x%d' % i for i in indices[column]]
        if not hasattr(trans, 'get_feature_names'):
        # >>> Change: Return input column names if no method avaiable
            # Turn error into a warning
            warnings.warn("Transformer %s (type %s) does not "
                                 "provide get_feature_names. "
                                 "Will return input column names if available"
                                 % (str(name), type(trans).__name__))
            # For transformers without a get_features_names method, use the input
            # names to the column transformer
            if column is None:
                return []
            else:
                return [name + "__" + f for f in column]

        return [name + "__" + f for f in trans.get_feature_names()]
    
    ### Start of processing
    feature_names = []
    
    # Allow transformers to be pipelines. Pipeline steps are named differently, so preprocessing is needed
    if type(column_transformer) == Pipeline:
        l_transformers = [(name, trans, None, None) for step, name, trans in column_transformer._iter()]
    else:
        # For column transformers, follow the original method
        l_transformers = list(column_transformer._iter(fitted=True))
    
    
    for name, trans, column, _ in l_transformers: 
        if type(trans) == Pipeline:
            # Recursive call on pipeline
            _names = get_feature_names(trans)
            # if pipeline has no transformer that returns names
            if len(_names)==0:
                _names = [name + "__" + f for f in column]
            feature_names.extend(_names)
        else:
            feature_names.extend(get_names(trans))
    
    return feature_names

# DL

In [11]:
import tensorflow as tf
from tensorflow.keras import datasets, utils
from tensorflow.keras import models, layers, activations, initializers, losses, optimizers, metrics
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import log_loss, accuracy_score

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # https://stackoverflow.com/questions/35911252/disable-tensorflow-debugging-information

In [12]:
random_state = 0
num_folds= 5 # test_size = 0.2
str_kf = StratifiedKFold(n_splits = num_folds, shuffle = True, random_state = random_state)

##### num_hidden_layer = 4
##### optimizer = Adam
##### initializer = he_normal
##### BN 적용
##### DROPOUT 적용

In [13]:
logloss_history = []
accuracy_history = []
fold_no = 1
for train_index, test_index in str_kf.split(credit_X, credit_Y):
    X_train, X_test = credit_X.loc[train_index], credit_X.loc[test_index]
    y_train, y_test = credit_Y.loc[train_index], credit_Y.loc[test_index]
    
    x_train_transformed , x_test_transformed, preprocessor = pipe_processing(X_train, X_test)
    new_col_names = get_feature_names(preprocessor)
    x_train_transformed = pd.DataFrame(x_train_transformed,columns=new_col_names)
    x_test_transformed = pd.DataFrame(x_test_transformed,columns=new_col_names)
    train_label = utils.to_categorical(y_train) # 0~2-> one-hot vector
    test_label = utils.to_categorical(y_test) # 0~2 -> one-hot vector
    
    model = models.Sequential() 

    model.add(layers.Flatten())
    model.add(layers.Dense(units=256, activation=None, kernel_initializer=initializers.he_normal())) 
    model.add(layers.BatchNormalization()) # BN은 적용하려면 매 레이어마다 해주는 것이 좋다.
    model.add(layers.Activation('elu')) # layers.ELU or layers.LeakyReLU

    model.add(layers.Dense(units=512, activation=None, kernel_initializer=initializers.he_normal())) 
    model.add(layers.BatchNormalization())
    model.add(layers.Activation('elu')) # layers.ELU or layers.LeakyReLU

    model.add(layers.Dense(units=256, activation=None, kernel_initializer=initializers.he_normal())) 
    model.add(layers.BatchNormalization())
    model.add(layers.Activation('elu'))
    model.add(layers.Dropout(rate=0.5))

    model.add(layers.Dense(units=256, activation=None, kernel_initializer=initializers.he_normal())) 
    model.add(layers.BatchNormalization())
    model.add(layers.Activation('elu'))
    model.add(layers.Dropout(rate=0.5))

    model.add(layers.Dense(units=3, activation='softmax')) # 0~2 

    model.compile(optimizer=optimizers.Adam(),# 함수에 인자로 learning Rate 적용 가능 
                  loss=losses.categorical_crossentropy, 
                  metrics=[metrics.categorical_accuracy])
    # Generate a print
    print('------------------------------------------------------------------------')
    print(f'Training for fold {fold_no} ...')

    # Fit data to model
    history = model.fit(x_train_transformed, train_label, batch_size=100, epochs=30, verbose = 0) 
    # Generate generalization metrics
    scores = model.evaluate(x_test_transformed, test_label, verbose=0)
    print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')
    accuracy_history.append(scores[1] * 100)
    logloss_history.append(scores[0])

    # Increase fold number
    fold_no = fold_no + 1
    

------------------------------------------------------------------------
Training for fold 1 ...
Score for fold 1: loss of 0.8135190010070801; categorical_accuracy of 68.11566948890686%
------------------------------------------------------------------------
Training for fold 2 ...
Score for fold 2: loss of 0.8228822946548462; categorical_accuracy of 67.22117066383362%
------------------------------------------------------------------------
Training for fold 3 ...
Score for fold 3: loss of 0.8188735842704773; categorical_accuracy of 68.07183623313904%
------------------------------------------------------------------------
Training for fold 4 ...
Score for fold 4: loss of 0.8155917525291443; categorical_accuracy of 68.63893866539001%
------------------------------------------------------------------------
Training for fold 5 ...
Score for fold 5: loss of 0.8386233448982239; categorical_accuracy of 67.29678511619568%


In [14]:
print("각 분할의 loss 기록 :", logloss_history)    
print("각 분할의 정확도 기록 :", accuracy_history)
print("평균 loss :", np.mean(logloss_history))
print("평균 정확도 :", np.mean(accuracy_history)) 

각 분할의 loss 기록 : [0.8135190010070801, 0.8228822946548462, 0.8188735842704773, 0.8155917525291443, 0.8386233448982239]
각 분할의 정확도 기록 : [68.11566948890686, 67.22117066383362, 68.07183623313904, 68.63893866539001, 67.29678511619568]
평균 loss : 0.8218979954719543
평균 정확도 : 67.86888003349304


##### num_hidden_layer = 4
##### optimizer = RMSprop
##### initializer = he_normal
##### BN 적용
##### DROPOUT 적용

In [103]:
logloss_history = []
accuracy_history = []
fold_no = 1
for train_index, test_index in str_kf.split(credit_X, credit_Y):
    X_train, X_test = credit_X.loc[train_index], credit_X.loc[test_index]
    y_train, y_test = credit_Y.loc[train_index], credit_Y.loc[test_index]
    
    x_train_transformed , x_test_transformed, preprocessor = pipe_processing(X_train, X_test)
#     new_col_names = get_feature_names(preprocessor)
#     x_train_transformed = pd.DataFrame(x_train_transformed,columns=new_col_names)
#     x_test_transformed = pd.DataFrame(x_test_transformed,columns=new_col_names)
    train_label = utils.to_categorical(y_train) # 0~2-> one-hot vector
    test_label = utils.to_categorical(y_test) # 0~2 -> one-hot vector
    
    model = models.Sequential() 

    model.add(layers.Flatten())
    model.add(layers.Dense(units=256, activation=None, kernel_initializer=initializers.he_normal())) 
    model.add(layers.BatchNormalization()) # BN은 적용하려면 매 레이어마다 해주는 것이 좋다.
    model.add(layers.Activation('elu')) # layers.ELU or layers.LeakyReLU

    model.add(layers.Dense(units=512, activation=None, kernel_initializer=initializers.he_normal())) 
    model.add(layers.BatchNormalization())
    model.add(layers.Activation('elu')) # layers.ELU or layers.LeakyReLU

    model.add(layers.Dense(units=256, activation=None, kernel_initializer=initializers.he_normal())) 
    model.add(layers.BatchNormalization())
    model.add(layers.Activation('elu'))
    model.add(layers.Dropout(rate=0.5))

    model.add(layers.Dense(units=256, activation=None, kernel_initializer=initializers.he_normal())) 
    model.add(layers.BatchNormalization())
    model.add(layers.Activation('elu'))
    model.add(layers.Dropout(rate=0.5))

    model.add(layers.Dense(units=3, activation='softmax')) # 0~2 

    model.compile(optimizer=optimizers.RMSprop(),# 함수에 인자로 learning Rate 적용 가능 
                  loss=losses.categorical_crossentropy, 
                  metrics=[metrics.categorical_accuracy])
    # Generate a print
    print('------------------------------------------------------------------------')
    print(f'Training for fold {fold_no} ...')

    # Fit data to model
    history = model.fit(x_train_transformed, train_label, batch_size=100, epochs=30, verbose = 0) 
    # Generate generalization metrics
    scores = model.evaluate(x_test_transformed, test_label, verbose=0)
    print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')
    accuracy_history.append(scores[1] * 100)
    logloss_history.append(scores[0])

    # Increase fold number
    fold_no = fold_no + 1
    

------------------------------------------------------------------------
Training for fold 1 ...
Score for fold 1: loss of 0.8207738995552063; categorical_accuracy of 67.88886785507202%
------------------------------------------------------------------------
Training for fold 2 ...
Score for fold 2: loss of 0.8187645077705383; categorical_accuracy of 68.62003803253174%
------------------------------------------------------------------------
Training for fold 3 ...
Score for fold 3: loss of 0.84190434217453; categorical_accuracy of 68.16635131835938%
------------------------------------------------------------------------
Training for fold 4 ...
Score for fold 4: loss of 0.8140873312950134; categorical_accuracy of 68.80907416343689%
------------------------------------------------------------------------
Training for fold 5 ...
Score for fold 5: loss of 0.8331273198127747; categorical_accuracy of 67.58034229278564%


In [40]:
print("각 분할의 loss 기록 :", logloss_history)    
print("각 분할의 정확도 기록 :", accuracy_history)
print("평균 loss :", np.mean(logloss_history))
print("평균 정확도 :", np.mean(accuracy_history)) 

각 분할의 loss 기록 : []
각 분할의 정확도 기록 : []
평균 loss : nan
평균 정확도 : nan


In [41]:
!pip install -q -U keras-tuner

### HPO(BayesianOpt)

In [17]:
import keras_tuner as kt
import IPython

In [18]:
# 2) Build the hyper-model
# Available HyperParameter search spaces (https://j.mp/2IXPzh7) : Int, Float, Boolean, Choice, Fixed

def build_hyper_model(hp):
    
    model = models.Sequential() 
    model.add(layers.Flatten())
        
    # Tune the number of hidden layer (Choose an optimal value between 1~3)
    for layer_num in range(hp.Int('num_layers', min_value=1, max_value=3)): 
        # Tune the number of perceptrons in a dense layer (Choose an optimal value between 32~512) 
        hp_units = hp.Int('units_' + str(layer_num), min_value=32, max_value=512, step=32) # 32:512 & step 32, all parameter names should be unique (we name the inner parameters 'units_' + str(i))
        hp_activations = hp.Choice('activation_' + str(layer_num), values=['relu', 'elu'])
        model.add(layers.Dense(units = hp_units, activation = hp_activations))

    model.add(layers.Dense(units=3, activation='softmax')) # 0~2 

    # Tune the learning rate for the optimizer (Choose an optimal value from 0.1, 0.01, or 0.001)
    hp_learning_rate = hp.Choice('learning_rate', values = [1e-1, 1e-2, 1e-3]) 
    
    model.compile(optimizer = optimizers.Adam(learning_rate = hp_learning_rate),
                loss = losses.categorical_crossentropy, 
                metrics=[metrics.categorical_crossentropy])
    
    return model

In [19]:
tuner = kt.BayesianOptimization(build_hyper_model,
                                objective = kt.Objective('val_loss','min'), # Hyper-params tuning을 위한 목적함수 설정 (metric to minimize or maximize)
                                max_trials = 30, # 서로 다른 Hyper-params 조합으로 시도할 총 Trial 횟수 설정
                                directory = 'test_prac_dir', # Path to the working directory
                                project_name = 'Credit_hyper_1(final)') # Name to use as directory name for files saved by this Tuner

INFO:tensorflow:Reloading Oracle from existing project test_prac_dir\Credit_hyper_1(final)\oracle.json
INFO:tensorflow:Reloading Tuner from test_prac_dir\Credit_hyper_1(final)\tuner0.json


In [20]:
x_train, x_test, y_train, y_test = model_selection.train_test_split(credit_X, credit_Y, test_size=0.2, random_state=0)
x_train_transformed , x_test_transformed, preprocessor = pipe_processing(x_train, x_test)
new_col_names = get_feature_names(preprocessor)
x_train_transformed = pd.DataFrame(x_train_transformed,columns=new_col_names)
x_test_transformed = pd.DataFrame(x_test_transformed,columns=new_col_names)
train_label = utils.to_categorical(y_train) # 0~2-> one-hot vector
test_label = utils.to_categorical(y_test) # 0~2 -> one-hot vector


In [21]:
tuner.search(x_train_transformed, train_label, epochs=30, validation_data = (x_test_transformed, test_label))

INFO:tensorflow:Oracle triggered exit


In [22]:
tuner.results_summary(num_trials=3) 

Results summary
Results in test_prac_dir\Credit_hyper_1(final)
Showing 3 best trials
<keras_tuner.engine.objective.Objective object at 0x000001C7E8EB6310>
Trial summary
Hyperparameters:
num_layers: 2
units_0: 512
activation_0: relu
learning_rate: 0.001
units_1: 512
activation_1: elu
units_2: 512
activation_2: elu
Score: 0.778764545917511
Trial summary
Hyperparameters:
num_layers: 1
units_0: 320
activation_0: relu
learning_rate: 0.001
units_1: 512
activation_1: relu
units_2: 512
activation_2: elu
Score: 0.7788336277008057
Trial summary
Hyperparameters:
num_layers: 1
units_0: 224
activation_0: relu
learning_rate: 0.001
units_1: 512
activation_1: elu
units_2: 512
activation_2: relu
Score: 0.7807657718658447


In [23]:
# Check top-3 trials' hyper-params

top3_models = tuner.get_best_hyperparameters(num_trials=3)
# print(tuner.get_best_hyperparameters(num_trials=3)[0].space) # 특정 Trial의 Search-space 를 확인할 수 있음
# print(tuner.get_best_hyperparameters(num_trials=3)[0].values) # 특정 Trial에 적용된 Hyper-params를 확인할 수 있음

for idx, model in enumerate(top3_models):
    print('Model performance rank :', idx)
    print(model.values)
    print()


# Check the best trial's hyper-params

best_hps = top3_models[0]

print("""
The hyperparameter search is complete. 
* Optimal # of layers : {}
* Optimal value of the learning-rate : {}""".format(best_hps.get('num_layers'), best_hps.get('learning_rate')))

for layer_num in range(best_hps.get('num_layers')):
    print('Layer {} - # of Perceptrons :'.format(layer_num), best_hps.get('units_' + str(layer_num)))
    print('Layer {} - Applied activation function :'.format(layer_num), best_hps.get('activation_' + str(layer_num)))

Model performance rank : 0
{'num_layers': 2, 'units_0': 512, 'activation_0': 'relu', 'learning_rate': 0.001, 'units_1': 512, 'activation_1': 'elu', 'units_2': 512, 'activation_2': 'elu'}

Model performance rank : 1
{'num_layers': 1, 'units_0': 320, 'activation_0': 'relu', 'learning_rate': 0.001, 'units_1': 512, 'activation_1': 'relu', 'units_2': 512, 'activation_2': 'elu'}

Model performance rank : 2
{'num_layers': 1, 'units_0': 224, 'activation_0': 'relu', 'learning_rate': 0.001, 'units_1': 512, 'activation_1': 'elu', 'units_2': 512, 'activation_2': 'relu'}


The hyperparameter search is complete. 
* Optimal # of layers : 2
* Optimal value of the learning-rate : 0.001
Layer 0 - # of Perceptrons : 512
Layer 0 - Applied activation function : relu
Layer 1 - # of Perceptrons : 512
Layer 1 - Applied activation function : elu


In [38]:
model = tuner.hypermodel.build(best_hps)

In [46]:
logloss_history = []
fold_no = 1
for train_index, test_index in str_kf.split(credit_X, credit_Y):
    X_train, X_test = credit_X.loc[train_index], credit_X.loc[test_index]
    y_train, y_test = credit_Y.loc[train_index], credit_Y.loc[test_index]
    
    x_train_transformed , x_test_transformed, preprocessor = pipe_processing(X_train, X_test)
    new_col_names = get_feature_names(preprocessor)
    x_train_transformed = pd.DataFrame(x_train_transformed,columns=new_col_names)
    x_test_transformed = pd.DataFrame(x_test_transformed,columns=new_col_names)
    train_label = utils.to_categorical(y_train) # 0~2-> one-hot vector
    test_label = utils.to_categorical(y_test) # 0~2 -> one-hot vector
    model = tuner.hypermodel.build(best_hps)

    # Generate a print
    print('------------------------------------------------------------------------')
    print(f'Training for fold {fold_no} ...')

    # Fit data to model
    history = model.fit(x_train_transformed, train_label, epochs=30, verbose = 0,validation_data = (x_test_transformed, test_label))
    # Generate generalization metrics
    scores = model.evaluate(x_test_transformed, test_label, verbose=0)
    print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]};')
    logloss_history.append(scores[0])

    # Increase fold number
    fold_no = fold_no + 1

print("각 분할의 loss 기록 :", logloss_history)    
print("평균 loss :", np.mean(logloss_history))

------------------------------------------------------------------------
Training for fold 1 ...
Score for fold 1: loss of 0.8496361374855042; categorical_crossentropy of 84.96361374855042%
------------------------------------------------------------------------
Training for fold 2 ...
Score for fold 2: loss of 0.8090305328369141; categorical_crossentropy of 80.9030532836914%
------------------------------------------------------------------------
Training for fold 3 ...
Score for fold 3: loss of 0.8262324929237366; categorical_crossentropy of 82.62324929237366%
------------------------------------------------------------------------
Training for fold 4 ...
Score for fold 4: loss of 0.8400170803070068; categorical_crossentropy of 84.00170803070068%
------------------------------------------------------------------------
Training for fold 5 ...
Score for fold 5: loss of 0.839669406414032; categorical_crossentropy of 83.9669406414032%
각 분할의 loss 기록 : [0.8496361374855042, 0.80903053283691

### BN 적용한 HPO(BayesianOpt)

In [24]:
# 2) Build the hyper-model
# Available HyperParameter search spaces (https://j.mp/2IXPzh7) : Int, Float, Boolean, Choice, Fixed

def build_hyper_model(hp):
    
    model = models.Sequential() 
    model.add(layers.Flatten())
        
    # Tune the number of hidden layer (Choose an optimal value between 1~3)
    for layer_num in range(hp.Int('num_layers', min_value=1, max_value=3)): 
        # Tune the number of perceptrons in a dense layer (Choose an optimal value between 32~512) 
        hp_units = hp.Int('units_' + str(layer_num), min_value=32, max_value=512, step=32) # 32:512 & step 32, all parameter names should be unique (we name the inner parameters 'units_' + str(i))
        hp_activations = hp.Choice('activation_' + str(layer_num), values=['relu', 'elu'])
        hp_dropout = hp.Float('dropout_', min_value=0.0, max_value=0.5, step=0.05)
        model.add(layers.Dense(units = hp_units, activation = None))
        model.add(layers.BatchNormalization())
        model.add(layers.Activation(hp_activations))
        model.add(layers.Dropout(hp_dropout))
    model.add(layers.Dense(units=3, activation='softmax')) # 0~2 

    # Tune the learning rate for the optimizer (Choose an optimal value from 0.01, 0.001, or 0.0001)
    hp_learning_rate = hp.Choice('learning_rate', values = [1e-1, 1e-2, 1e-3]) 
    
    model.compile(optimizer = optimizers.Adam(learning_rate = hp_learning_rate),
                loss = losses.categorical_crossentropy, 
                metrics=[metrics.categorical_crossentropy])
    
    return model

In [25]:
tuner = kt.BayesianOptimization(build_hyper_model,
                                objective = kt.Objective('val_loss','min'), # Hyper-params tuning을 위한 목적함수 설정 (metric to minimize or maximize)
                                max_trials = 30, # 서로 다른 Hyper-params 조합으로 시도할 총 Trial 횟수 설정
                                directory = 'test_prac_dir', # Path to the working directory
                                project_name = 'Credit_hyper_2(final)') # Name to use as directory name for files saved by this Tuner

INFO:tensorflow:Reloading Oracle from existing project test_prac_dir\Credit_hyper_2(final)\oracle.json
INFO:tensorflow:Reloading Tuner from test_prac_dir\Credit_hyper_2(final)\tuner0.json


In [26]:
x_train, x_test, y_train, y_test = model_selection.train_test_split(credit_X, credit_Y, test_size=0.2, random_state=0)
x_train_transformed , x_test_transformed, preprocessor = pipe_processing(x_train, x_test)
new_col_names = get_feature_names(preprocessor)
x_train_transformed = pd.DataFrame(x_train_transformed,columns=new_col_names)
x_test_transformed = pd.DataFrame(x_test_transformed,columns=new_col_names)
train_label = utils.to_categorical(y_train) # 0~2-> one-hot vector
test_label = utils.to_categorical(y_test) # 0~2 -> one-hot vector
tuner.search(x_train_transformed, train_label, epochs=30, validation_data = (x_test_transformed, test_label))

INFO:tensorflow:Oracle triggered exit


In [27]:
tuner.results_summary(num_trials=3) 

Results summary
Results in test_prac_dir\Credit_hyper_2(final)
Showing 3 best trials
<keras_tuner.engine.objective.Objective object at 0x000001C7E9E6CDF0>
Trial summary
Hyperparameters:
num_layers: 3
units_0: 512
activation_0: elu
dropout_: 0.2
learning_rate: 0.001
units_1: 512
activation_1: relu
units_2: 192
activation_2: elu
Score: 0.7753281593322754
Trial summary
Hyperparameters:
num_layers: 3
units_0: 512
activation_0: elu
dropout_: 0.1
learning_rate: 0.001
units_1: 512
activation_1: relu
units_2: 32
activation_2: elu
Score: 0.7769176959991455
Trial summary
Hyperparameters:
num_layers: 3
units_0: 512
activation_0: elu
dropout_: 0.1
learning_rate: 0.001
units_1: 512
activation_1: relu
units_2: 32
activation_2: elu
Score: 0.7777508497238159


In [51]:
# Check top-3 trials' hyper-params

top3_models = tuner.get_best_hyperparameters(num_trials=3)
# print(tuner.get_best_hyperparameters(num_trials=3)[0].space) # 특정 Trial의 Search-space 를 확인할 수 있음
# print(tuner.get_best_hyperparameters(num_trials=3)[0].values) # 특정 Trial에 적용된 Hyper-params를 확인할 수 있음

for idx, model in enumerate(top3_models):
    print('Model performance rank :', idx)
    print(model.values)
    print()


# Check the best trial's hyper-params

best_hps = top3_models[0]

print("""
The hyperparameter search is complete. 
* Optimal # of layers : {}
* Optimal value of the learning-rate : {}""".format(best_hps.get('num_layers'), best_hps.get('learning_rate')))

for layer_num in range(best_hps.get('num_layers')):
    print('Layer {} - # of Perceptrons :'.format(layer_num), best_hps.get('units_' + str(layer_num)))
    print('Layer {} - Applied activation function :'.format(layer_num), best_hps.get('activation_' + str(layer_num)))

Model performance rank : 0
{'num_layers': 3, 'units_0': 512, 'activation_0': 'elu', 'dropout_': 0.2, 'learning_rate': 0.001, 'units_1': 512, 'activation_1': 'relu', 'units_2': 192, 'activation_2': 'elu'}

Model performance rank : 1
{'num_layers': 3, 'units_0': 512, 'activation_0': 'elu', 'dropout_': 0.1, 'learning_rate': 0.001, 'units_1': 512, 'activation_1': 'relu', 'units_2': 32, 'activation_2': 'elu'}

Model performance rank : 2
{'num_layers': 3, 'units_0': 512, 'activation_0': 'elu', 'dropout_': 0.1, 'learning_rate': 0.001, 'units_1': 512, 'activation_1': 'relu', 'units_2': 32, 'activation_2': 'elu'}


The hyperparameter search is complete. 
* Optimal # of layers : 3
* Optimal value of the learning-rate : 0.001
Layer 0 - # of Perceptrons : 512
Layer 0 - Applied activation function : elu
Layer 1 - # of Perceptrons : 512
Layer 1 - Applied activation function : relu
Layer 2 - # of Perceptrons : 192
Layer 2 - Applied activation function : elu


In [57]:
model = tuner.hypermodel.build(best_hps)

logloss_history = []
fold_no = 1
for train_index, test_index in str_kf.split(credit_X, credit_Y):
    X_train, X_test = credit_X.loc[train_index], credit_X.loc[test_index]
    y_train, y_test = credit_Y.loc[train_index], credit_Y.loc[test_index]
    
    x_train_transformed , x_test_transformed, preprocessor = pipe_processing(X_train, X_test)
    new_col_names = get_feature_names(preprocessor)
    x_train_transformed = pd.DataFrame(x_train_transformed,columns=new_col_names)
    x_test_transformed = pd.DataFrame(x_test_transformed,columns=new_col_names)
    train_label = utils.to_categorical(y_train) # 0~2-> one-hot vector
    test_label = utils.to_categorical(y_test) # 0~2 -> one-hot vector
    model = tuner.hypermodel.build(best_hps)

    # Generate a print
    print('------------------------------------------------------------------------')
    print(f'Training for fold {fold_no} ...')

    # Fit data to model
    history = model.fit(x_train_transformed, train_label, epochs=30, verbose = 0,validation_data = (x_test_transformed, test_label))
    # Generate generalization metrics
    scores = model.evaluate(x_test_transformed, test_label, verbose=0)
    print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]};')
    logloss_history.append(scores[0])

    # Increase fold number
    fold_no = fold_no + 1

print("각 분할의 loss 기록 :", logloss_history)    
print("평균 loss :", np.mean(logloss_history))

------------------------------------------------------------------------
Training for fold 1 ...
Score for fold 1: loss of 0.7921059727668762;
------------------------------------------------------------------------
Training for fold 2 ...
Score for fold 2: loss of 0.8007999062538147;
------------------------------------------------------------------------
Training for fold 3 ...
Score for fold 3: loss of 0.7948606014251709;
------------------------------------------------------------------------
Training for fold 4 ...
Score for fold 4: loss of 0.794422447681427;
------------------------------------------------------------------------
Training for fold 5 ...
Score for fold 5: loss of 0.8013486266136169;
각 분할의 loss 기록 : [0.7921059727668762, 0.8007999062538147, 0.7948606014251709, 0.794422447681427, 0.8013486266136169]
평균 loss : 0.7967075109481812


## Keras tuner : Hyperband 적용

In [28]:
# tensorflow
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
from tensorflow import keras 
from tensorflow.keras import utils,models, layers, activations, initializers, losses, optimizers, metrics
from tensorflow.keras.layers.experimental import preprocessing
from sklearn.metrics import log_loss, accuracy_score

# Hyperband 
from kerastuner.tuners import Hyperband

In [29]:
def run_pipeline(data_df, target):
    #자동으로 num과 cat 변수 갈라서 df 생성
    data_df_cat = data_df.select_dtypes(include=np.object)
    data_df_num = data_df.select_dtypes(exclude=np.object)

    # category df 만들어주기
    data_df_cat = pd.concat([data_df_cat, data_df[['occyp_category', 'child_num']]], axis=1)
    
    # binary df 만들어주기
    data_df_bi = data_df[['gender','car','reality','work_phone','phone','email','dup']]

    # cat인데 num df에 들어간 변수 num df에서 drop해주기
    data_df_num = data_df_num.drop(columns=['gender','car','reality','child_num','work_phone','phone','email','dup','occyp_category'])
    
    x_train, x_test, y_train, y_test = model_selection.train_test_split(data_df,
                                                                    target,
                                                                   test_size = 0.2,
                                                                   random_state=0)

    binary_features = data_df_bi.columns
    
    numeric_features = data_df_num.columns
    numeric_transformer = StandardScaler() # cf) RobustScaler

    categorical_features = data_df_cat.columns
    categorical_transformer = OneHotEncoder(categories='auto', handle_unknown='ignore') # categories='auto' : just for ignoring warning messages

    preprocessor = ColumnTransformer(
        transformers=[ # List of (name, transformer, column(s))
            ('num', numeric_transformer, numeric_features),
            ('cat', categorical_transformer, categorical_features),
             ('bi','passthrough',binary_features)])

    preprocessor_pipe = Pipeline(steps=[('preprocessor', preprocessor)])
    preprocessor_pipe.fit(x_train)

    x_train_transformed = preprocessor_pipe.transform(x_train)
    x_test_transformed = preprocessor_pipe.transform(x_test)

    return x_train_transformed, x_test_transformed, y_train, y_test

### hyperband model builder

In [30]:
def model_builder(hp):
    model = keras.Sequential()
    model.add(keras.layers.Flatten())

     # Tune the number of hidden layer (Choose an optimal value between 1~3)
    for layer_num in range(hp.Int('num_layers', min_value=1, max_value=3)): 
        # Tune the number of perceptrons in a dense layer (Choose an optimal value between 32~512) 
        hp_units = hp.Int('units_' + str(layer_num), min_value=32, max_value=512, step=32) # 32:512 & step 32, all parameter names should be unique (we name the inner parameters 'units_' + str(i))
        hp_activations = hp.Choice('activation_' + str(layer_num), values=['relu', 'elu'])
        model.add(layers.Dense(units = hp_units, activation = hp_activations))

    model.add(layers.Dense(units=3, activation='softmax')) # 0~2 

    # Tune the learning rate for the optimizer (Choose an optimal value from 0.01, 0.001, or 0.0001)
    hp_learning_rate = hp.Choice('learning_rate', values = [1e-2, 1e-3, 1e-4]) 
    
    model.compile(optimizer = optimizers.Adam(learning_rate = hp_learning_rate),
                loss = losses.categorical_crossentropy, 
                metrics=[metrics.categorical_crossentropy])

    return model

### BN, Dropout 적용

In [31]:
def model_builder(hp):
    model = keras.Sequential()
    model.add(keras.layers.Flatten())

     # Tune the number of hidden layer (Choose an optimal value between 1~3)
    for layer_num in range(hp.Int('num_layers', min_value=1, max_value=3)): 
        # Tune the number of perceptrons in a dense layer (Choose an optimal value between 32~512) 
        hp_units = hp.Int('units_' + str(layer_num), min_value=32, max_value=512, step=32) # 32:512 & step 32, all parameter names should be unique (we name the inner parameters 'units_' + str(i))
        hp_activations = hp.Choice('activation_' + str(layer_num), values=['relu', 'elu'])
        hp_dropout = hp.Float('dropout_', min_value=0.0, max_value=0.5, step=0.05)
        model.add(layers.Dense(units = hp_units, activation = None))
        model.add(layers.BatchNormalization())
        model.add(layers.Activation(hp_activations))
        model.add(layers.Dropout(hp_dropout))

    model.add(layers.Dense(units=3, activation='softmax')) # 0~2 

    # Tune the learning rate for the optimizer (Choose an optimal value from 0.01, 0.001, or 0.0001)
    hp_learning_rate = hp.Choice('learning_rate', values = [1e-1, 1e-2, 1e-3]) 
    
    model.compile(optimizer = optimizers.Adam(learning_rate = hp_learning_rate),
                loss = losses.categorical_crossentropy, 
                metrics=[metrics.categorical_crossentropy])

    return model

In [33]:
data_df = pd.read_csv('pre_credit_df(fill_groupby)_final.csv')
credit_Y = data_df['credit']
credit_X = data_df.drop(['credit'],axis=1)
x_train_transformed, x_test_transformed, y_train, y_test = run_pipeline(credit_X, credit_Y)

y_train = utils.to_categorical(y_train, 3)
y_test = utils.to_categorical(y_test, 3)

In [34]:
# Hyperband
tuner = kt.Hyperband(model_builder,
                     objective = kt.Objective('val_loss','min'), 
                     max_epochs = 30,
                     factor = 3,
                     directory = 'test_prac_dir', # Path to the working directory
                     project_name = 'Credit_hyperband')

In [35]:
tuner.search(x_train_transformed, y_train, epochs = 30, validation_data = (x_test_transformed, y_test))

# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials = 1)[0]

In [152]:
# Build the model with the optimal hyperparameters and train it on the data
model = tuner.hypermodel.build(best_hps)
hb_history = model.fit(x_train_transformed, y_train, epochs = 15, validation_data = (x_test_transformed, y_test))

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [153]:
pred_result = model.predict_proba(x_test_transformed)
pre_logloss = log_loss(y_test, pred_result)
print('Hyperband Logloss : ',pre_logloss)

Hyperband Logloss :  0.8140285514455103


### k-fold 적용

In [154]:
model = tuner.hypermodel.build(best_hps)

logloss_history = []
fold_no = 1
for train_index, test_index in str_kf.split(credit_X, credit_Y):
    X_train, X_test = credit_X.loc[train_index], credit_X.loc[test_index]
    y_train, y_test = credit_Y.loc[train_index], credit_Y.loc[test_index]
    
    x_train_transformed , x_test_transformed, preprocessor = pipe_processing(X_train, X_test)
#     new_col_names = get_feature_names(preprocessor)
#     x_train_transformed = pd.DataFrame(x_train_transformed,columns=new_col_names)
#     x_test_transformed = pd.DataFrame(x_test_transformed,columns=new_col_names)
    train_label = utils.to_categorical(y_train) # 0~2-> one-hot vector
    test_label = utils.to_categorical(y_test) # 0~2 -> one-hot vector
    model = tuner.hypermodel.build(best_hps)

    # Generate a print
    print('------------------------------------------------------------------------')
    print(f'Training for fold {fold_no} ...')

    # Fit data to model
    history = model.fit(x_train_transformed, train_label, epochs=30, verbose = 0,validation_data = (x_test_transformed, test_label))
    # Generate generalization metrics
    scores = model.evaluate(x_test_transformed, test_label, verbose=0)
    print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]};')
    logloss_history.append(scores[0])

    # Increase fold number
    fold_no = fold_no + 1

print("각 분할의 loss 기록 :", logloss_history)    
print("평균 loss :", np.mean(logloss_history))

------------------------------------------------------------------------
Training for fold 1 ...
Score for fold 1: loss of 0.8564336895942688;
------------------------------------------------------------------------
Training for fold 2 ...
Score for fold 2: loss of 0.8435595035552979;
------------------------------------------------------------------------
Training for fold 3 ...
Score for fold 3: loss of 0.8567010164260864;
------------------------------------------------------------------------
Training for fold 4 ...
Score for fold 4: loss of 0.837535560131073;
------------------------------------------------------------------------
Training for fold 5 ...
Score for fold 5: loss of 0.8494362235069275;
각 분할의 loss 기록 : [0.8564336895942688, 0.8435595035552979, 0.8567010164260864, 0.837535560131073, 0.8494362235069275]
평균 loss : 0.8487331986427307
