In [1]:
import pandas as pd
import numpy as np

In [2]:
# 당뇨병 데이터 불러오기
df = pd.read_csv('diabetes_data.csv')

In [3]:
# feature와 target 분리
target = df[['Diabetes']]
data = df.drop(columns=['Diabetes'])

In [4]:
# train_test 분리
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=33)

In [5]:
# Pipeline 을 위한 전처리 모듈 불러오기
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [6]:
x_train.head(3)

Unnamed: 0,Age,Sex,HighChol,CholCheck,BMI,Smoker,HeartDiseaseorAttack,PhysActivity,Fruits,Veggies,HvyAlcoholConsump,GenHlth,MentHlth,PhysHlth,DiffWalk,Stroke,HighBP
50498,13.0,0.0,1.0,1.0,32.0,1.0,1.0,1.0,1.0,1.0,0.0,3.0,0.0,0.0,0.0,0.0,1.0
44300,10.0,0.0,1.0,1.0,22.0,0.0,0.0,1.0,0.0,0.0,0.0,4.0,10.0,20.0,0.0,0.0,1.0
49497,8.0,1.0,1.0,1.0,35.0,0.0,0.0,0.0,1.0,0.0,0.0,3.0,5.0,0.0,0.0,0.0,1.0


In [7]:
# 파이프라인
# 수로 된 열 선택
numeric_features = ['Age', 'BMI', 'MentHlth', 'PhysHlth']
numeric_transformer = StandardScaler()

# 수로 된 열 제외한 모든 라벨로 된 열 선택
categorical_features = list(x_train.columns)
categorical_features.remove('Age')
categorical_features.remove('BMI')
categorical_features.remove('MentHlth')
categorical_features.remove('PhysHlth')

categorical_transformer = OneHotEncoder(categories='auto', handle_unknown='ignore') 

# 전처리 모델 생성
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)])

In [8]:
# 파이프라인으로 전처리
preprocessor_pipe = Pipeline(steps=[('preprocessor', preprocessor)])

In [9]:
# 파이프라인 학습
preprocessor_pipe.fit(x_train)

Pipeline(steps=[('preprocessor',
                 ColumnTransformer(transformers=[('num', StandardScaler(),
                                                  ['Age', 'BMI', 'MentHlth',
                                                   'PhysHlth']),
                                                 ('cat',
                                                  OneHotEncoder(handle_unknown='ignore'),
                                                  ['Sex', 'HighChol',
                                                   'CholCheck', 'Smoker',
                                                   'HeartDiseaseorAttack',
                                                   'PhysActivity', 'Fruits',
                                                   'Veggies',
                                                   'HvyAlcoholConsump',
                                                   'GenHlth', 'DiffWalk',
                                                   'Stroke', 'HighBP'])]))])

In [10]:
# 파이프라인 학습된 전처리로 변환
x_train = preprocessor_pipe.transform(x_train)
x_test = preprocessor_pipe.transform(x_test)

In [11]:
# 딥러닝 모듈 불러오기
import tensorflow as tf
from tensorflow.keras import datasets, utils
from tensorflow.keras import models, layers, activations, initializers, losses, optimizers, metrics
import keras_tuner as kt

In [12]:
# target 데이터 원핫인코딩
# y_train = utils.to_categorical(y_train)

In [13]:
x_train.shape

(56553, 33)

In [14]:
# 2) Build the hyper-model
# Available HyperParameter search spaces (https://j.mp/2IXPzh7) : Int, Float, Boolean, Choice, Fixed

def build_hyper_model(hp):
    
    model = models.Sequential()
    model.add(layers.Dense(input_dim=33, units=64)) # change 2-dims MNIST dataset to 1-dim 
        
    # Tune the number of hidden layer (Choose an optimal value between 1~3)
    for layer_num in range(hp.Int('num_layers', min_value=1, max_value=3)): 
        # Tune the number of perceptrons in a dense layer (Choose an optimal value between 32~512) 
        hp_units = hp.Int('units_' + str(layer_num), min_value=32, max_value=512, step=32) # 32:512 & step 32, all parameter names should be unique (we name the inner parameters 'units_' + str(i))
        hp_activations = hp.Choice('activation_' + str(layer_num), values=['relu', 'elu'])
        model.add(layers.Dense(units = hp_units, activation = hp_activations))

    model.add(layers.Dense(units=1, activation='sigmoid')) # class 10 : 0~9

    # Tune the learning rate for the optimizer (Choose an optimal value from 0.01, 0.001, or 0.0001)
    hp_learning_rate = hp.Choice('learning_rate', values = [1e-2, 1e-3, 1e-4]) 
    
    model.compile(optimizer = optimizers.Adam(learning_rate = hp_learning_rate),
                loss = losses.binary_crossentropy, # use sparse c.c when our labels are looks like "1" (single integer), not "[1,0,0]" (one-hot vector) (@ http://j.mp/2XS0jmv)
                metrics = [metrics.binary_accuracy])
    
    return model

In [15]:
# 3) Select tuner and compile it
# Available tuners (https://j.mp/39cWz4n) : kt.BayesianOptimization / kt.Hyperband / kt.RandomSearch / kt.Sklearn (https://j.mp/3nSJn8O)

tuner = kt.BayesianOptimization(build_hyper_model,
                                objective = 'val_accuracy', # Hyper-params tuning을 위한 목적함수 설정 (metric to minimize or maximize)
                                max_trials = 10, # 서로 다른 Hyper-params 조합으로 시도할 총 Trial 횟수 설정
                                directory = 'test_prac_dir', # Path to the working directory
                                project_name = 'diabetes_hyper_1') # Name to use as directory name for files saved by this Tuner

# tuner = kt.Hyperband(build_hyper_model,
#                      objective = 'val_accuracy', # Hyper-params tuning을 위한 목적함수 설정 (metric to minimize or maximize)
#                      max_epochs = 5, # 최대 epoch 수 설정, epoch 수 자체도 지정한 최대 횟수 내에서 변화시켜가며 테스트를 진행함 (epochs to train one model) 
#                      directory = 'test_prac_dir', # Path to the working directory
#                      project_name = 'MNIST_hyper_1') # Name to use as directory name for files saved by this Tuner

tuner.search_space_summary()

INFO:tensorflow:Reloading Tuner from test_prac_dir\diabetes_hyper_1\tuner0.json
Search space summary
Default search space size: 8
num_layers (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 3, 'step': 1, 'sampling': 'linear'}
units_0 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': 'linear'}
activation_0 (Choice)
{'default': 'relu', 'conditions': [], 'values': ['relu', 'elu'], 'ordered': False}
learning_rate (Choice)
{'default': 0.01, 'conditions': [], 'values': [0.01, 0.001, 0.0001], 'ordered': True}
units_1 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': 'linear'}
activation_1 (Choice)
{'default': 'relu', 'conditions': [], 'values': ['relu', 'elu'], 'ordered': False}
units_2 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': 'linear'}
activation_2 (Choice)
{'default': 'relu', 'conditions': [], 'values': ['relu

In [16]:
# 4) Train the model

tuner.search(x_train, y_train, epochs=30, validation_split=0.3) # epochs == learning epoch for training a single model(epoch for each trial) 


# # 아래와 같이 별도의 클래스로 콜백을 정의하여 search 함수에서 활용하면 모든 학습 단계 종료 후 학습 중 발생한 출력 결과를 자동으로 지워낼 수 있습니다.
# class ClearTrainingOutput(tf.keras.callbacks.Callback):
#   def on_train_end(*args, **kwargs):
#     IPython.display.clear_output(wait = True)

# tuner.search(x_train, y_train, epochs = 7, validation_data = (x_test, y_test), callbacks = [ClearTrainingOutput()]) # epochs == learning epoch for training a single model 

Trial 5 Complete [00h 01m 12s]

Best val_accuracy So Far: None
Total elapsed time: 00h 01m 59s

Search: Running Trial #6

Value             |Best Value So Far |Hyperparameter
2                 |2                 |num_layers
448               |288               |units_0
elu               |elu               |activation_0
0.001             |0.01              |learning_rate
128               |64                |units_1
elu               |elu               |activation_1
224               |192               |units_2
relu              |relu              |activation_2

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


Traceback (most recent call last):
  File "C:\Users\TECH2_30\AppData\Roaming\Python\Python39\site-packages\keras_tuner\engine\base_tuner.py", line 270, in _try_run_and_update_trial
    self._run_and_update_trial(trial, *fit_args, **fit_kwargs)
  File "C:\Users\TECH2_30\AppData\Roaming\Python\Python39\site-packages\keras_tuner\engine\base_tuner.py", line 261, in _run_and_update_trial
    tuner_utils.convert_to_metrics_dict(
  File "C:\Users\TECH2_30\AppData\Roaming\Python\Python39\site-packages\keras_tuner\engine\tuner_utils.py", line 225, in convert_to_metrics_dict
    [convert_to_metrics_dict(elem, objective) for elem in results]
  File "C:\Users\TECH2_30\AppData\Roaming\Python\Python39\site-packages\keras_tuner\engine\tuner_utils.py", line 225, in <listcomp>
    [convert_to_metrics_dict(elem, objective) for elem in results]
  File "C:\Users\TECH2_30\AppData\Roaming\Python\Python39\site-packages\keras_tuner\engine\tuner_utils.py", line 238, in convert_to_metrics_dict
    best_value, _

RuntimeError: Number of consecutive failures excceeded the limit of 3.
Traceback (most recent call last):
  File "C:\Users\TECH2_30\AppData\Roaming\Python\Python39\site-packages\keras_tuner\engine\base_tuner.py", line 270, in _try_run_and_update_trial
    self._run_and_update_trial(trial, *fit_args, **fit_kwargs)
  File "C:\Users\TECH2_30\AppData\Roaming\Python\Python39\site-packages\keras_tuner\engine\base_tuner.py", line 261, in _run_and_update_trial
    tuner_utils.convert_to_metrics_dict(
  File "C:\Users\TECH2_30\AppData\Roaming\Python\Python39\site-packages\keras_tuner\engine\tuner_utils.py", line 225, in convert_to_metrics_dict
    [convert_to_metrics_dict(elem, objective) for elem in results]
  File "C:\Users\TECH2_30\AppData\Roaming\Python\Python39\site-packages\keras_tuner\engine\tuner_utils.py", line 225, in <listcomp>
    [convert_to_metrics_dict(elem, objective) for elem in results]
  File "C:\Users\TECH2_30\AppData\Roaming\Python\Python39\site-packages\keras_tuner\engine\tuner_utils.py", line 238, in convert_to_metrics_dict
    best_value, _ = _get_best_value_and_best_epoch_from_history(
  File "C:\Users\TECH2_30\AppData\Roaming\Python\Python39\site-packages\keras_tuner\engine\tuner_utils.py", line 209, in _get_best_value_and_best_epoch_from_history
    objective_value = objective.get_value(metrics)
  File "C:\Users\TECH2_30\AppData\Roaming\Python\Python39\site-packages\keras_tuner\engine\objective.py", line 57, in get_value
    return logs[self.name]
KeyError: 'val_accuracy'


In [None]:
# 5) Check the result 

tuner.results_summary(num_trials=3) # Show "n" best trial results

In [None]:
# Check top-3 trials' hyper-params

top3_models = tuner.get_best_hyperparameters(num_trials=3)
# print(tuner.get_best_hyperparameters(num_trials=3)[0].space) # 특정 Trial의 Search-space 를 확인할 수 있음
# print(tuner.get_best_hyperparameters(num_trials=3)[0].values) # 특정 Trial에 적용된 Hyper-params를 확인할 수 있음

for idx, model in enumerate(top3_models):
    print('Model performance rank :', idx)
    print(model.values)
    print()


# Check the best trial's hyper-params

best_hps = top3_models[0]

print("""
The hyperparameter search is complete. 
* Optimal # of layers : {}
* Optimal value of the learning-rate : {}""".format(best_hps.get('num_layers'), best_hps.get('learning_rate')))

for layer_num in range(best_hps.get('num_layers')):
    print('Layer {} - # of Perceptrons :'.format(layer_num), best_hps.get('units_' + str(layer_num)))
    print('Layer {} - Applied activation function :'.format(layer_num), best_hps.get('activation_' + str(layer_num)))

In [None]:
# Get the best model from trials

models = tuner.get_best_models(num_models=3) # Keras Sequential models
top_model = models[0]
top_model.summary()
print()

results = top_model.evaluate(x_test, y_test)
print('Cross-entropy :', results[0])
print('Accuracy :', results[1])

In [None]:
# We can retrain the model with the optimal hyperparameters from the search.
best_hps = top3_models[0]

# Build the model with the optimal hyperparameters and train it on the data.
model = tuner.hypermodel.build(best_hps)
model.fit(x_train, y_train, epochs=10, validation_data=(x_test, y_test))

results = model.evaluate(x_test, y_test)
print('Cross-entropy :', results[0])
print('Accuracy :', results[1])

In [None]:
# We can also find detailed logs, checkpoints, etc, in the folder "directory/project_name".

# The [test_prac_dir/MNIST_hyper_1] directory contains detailed logs and checkpoints for every trial (model configuration) run during the hyperparameter search. 
# If you re-run the hyperparameter search, the Keras Tuner uses the existing state from these logs to resume the search. 
# To disable this behavior, pass an additional [overwrite = True] argument while instantiating the tuner.

for trial in tuner.oracle.get_best_trials(num_trials=3):
    print('Trial-score is :', trial.score)
    print('Trial-directory(trial_id) is :', trial.trial_id)
    print()

# tuner.oracle.trials -> get all trial_id 