# 🧠 Kor-DEEPression
## 🛠️ Step 2-4. ML/DL Modeling
---

## 💻 Step 2-4-3. Multi Layer Perceptron(MLP) Modeling & Tuning

- 💾 Data Prepare

In [1]:
from custom_modules.modeling import data_load

# 데이터 불러오기 기능
print("\n(Data Loading)")
print("\t(Depression)")
df_X_depr, df_y_depr = data_load(target_name='depression', filepath='downloads/Encoded_depr.csv')
print("\n\t(MDD)")
df_X_mdd, df_y_mdd = data_load(target_name='MDD', filepath='downloads/Encoded_mdd.csv')


(Data Loading)
	(Depression)
	DataFrame Shape : (16570, 48)
	Features(X) Shape : (16570, 47)
	Target(y) Shape : (16570,)

	(MDD)
	DataFrame Shape : (3359, 48)
	Features(X) Shape : (3359, 47)
	Target(y) Shape : (3359,)


In [2]:
from custom_modules.modeling import data_split

# 데이터 분리 기능
print("\n(Data Splitting)")
print("\t(Depression)")
X_train_depr, X_val_depr, X_test_depr, y_train_depr, y_val_depr, y_test_depr = data_split(df_X_depr, df_y_depr, val_set=True)
print("\n\t(MDD)")
X_train_mdd, X_val_mdd, X_test_mdd, y_train_mdd, y_val_mdd, y_test_mdd = data_split(df_X_mdd, df_y_mdd, val_set=True)


(Data Splitting)
	(Depression)
	X_train, y_train : (9942, 47), (9942,)
	X_val, y_val : (3314, 47), (3314,)
	X_test, y_test : (3314, 47), (3314,)

	(MDD)
	X_train, y_train : (2015, 47), (2015,)
	X_val, y_val : (672, 47), (672,)
	X_test, y_test : (672, 47), (672,)


In [3]:
from custom_modules.modeling import make_baseline

# Baseline (최빈 Class) 생성 기능
print("\n(Baseline)")
print("\t(Depression)")
baseline_depr = make_baseline(y_train_depr)
print("\n\t(MDD)")
baseline_mdd = make_baseline(y_train_mdd)


(Baseline)
	(Depression)
	Baseline Accuracy : 0.7944
	Baseline AUC_score : 0.5

	(MDD)
	Baseline Accuracy : 0.7300
	Baseline AUC_score : 0.5


- 💾 keras_tuner 라이브러리 불러오기

In [4]:
# keras_tuner 라이브러리를 import
import keras_tuner as kt

# keras Tuner Output을 자동으로 갱신하는 Class 선언(overriding)
from keras.callbacks import Callback
import IPython

# Callback 클래스를 상속받고 오버라이딩 실시
class ClearTrainingOutput(Callback):
    def on_train_end(*args, **kwargs):
        IPython.display.clear_output(wait=True)

### ⚙️ Depression(정상vs우울증) Model Tuning 

In [5]:
# custom_modules에서 정의한 model_builder를 불러옴
from custom_modules.modeling import model_builder_mlp

# keras tuner 정의 (튜닝방식은 Hyperband로 실시)
# 평가지표 기준은 AUC score를 최대화 하는 방향으로 설정함
tuner_depr = kt.Hyperband(hypermodel=model_builder_mlp,
                         objective=kt.Objective(name='val_auc', direction='max'),
                         max_epochs=10,
                         factor=3,
                         directory='kt_tuning',
                         project_name='mlp_tuning_depr')

In [6]:
tuner_depr.search(X_train_depr, y_train_depr,
                 epochs=10,
                 validation_data=(X_val_depr, y_val_depr),
                 callbacks=[ClearTrainingOutput()])
best_hps_depr = tuner_depr.get_best_hyperparameters(num_trials=1)[0]

Trial 30 Complete [00h 00m 20s]
val_auc: 0.8214871883392334

Best val_auc So Far: 0.8241401314735413
Total elapsed time: 00h 04m 27s
INFO:tensorflow:Oracle triggered exit


In [7]:
print(f"""
      최적화된 Dense_1 노드 수 : {best_hps_depr.get('units_1')}
      최적화된 Dense_2 노드 수 : {best_hps_depr.get('units_2')}
      """)


      최적화된 Dense_1 노드 수 : 32
      최적화된 Dense_2 노드 수 : 16
      


In [8]:
# 최적화된 파라미터로 모델을 다시 Build
best_model_depr = tuner_depr.hypermodel.build(best_hps_depr)
# 모델 구조 출력
best_model_depr.summary()

Model: "Sequential_MLP"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Dense_1 (Dense)             (None, 32)                1536      
                                                                 
 Dense_2 (Dense)             (None, 16)                528       
                                                                 
 Output_Layer (Dense)        (None, 1)                 17        
                                                                 
Total params: 2,081
Trainable params: 2,081
Non-trainable params: 0
_________________________________________________________________


In [9]:
# Callback 함수 불러오기(EarlyStopping, ModelCheckpoint)
from custom_modules.modeling import callback_sets
savepath = '../tuning-models/MLP_depr.h5'
callbacks = callback_sets(monitor='val_auc', mode='max', patience=8, savepath=savepath)
# 최적화된 파라미터로 모델 재학습
best_model_depr.fit(X_train_depr, y_train_depr,
                    validation_data=(X_val_depr, y_val_depr),
                    epochs=100,
                    callbacks=callbacks)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100


<keras.callbacks.History at 0x20cc42a5d00>

In [10]:
# Evaluation Best Model
best_model_depr.evaluate(X_train_depr, y_train_depr, verbose=2)
best_model_depr.evaluate(X_val_depr, y_val_depr, verbose=2)
best_model_depr.evaluate(X_test_depr, y_test_depr, verbose=2)

311/311 - 1s - loss: 0.3827 - auc: 0.8256 - 937ms/epoch - 3ms/step
104/104 - 0s - loss: 0.3827 - auc: 0.8226 - 317ms/epoch - 3ms/step
104/104 - 0s - loss: 0.3788 - auc: 0.8200 - 318ms/epoch - 3ms/step


[0.3788101375102997, 0.8199905157089233]

In [11]:
# Saved model Evaluation
from keras.models import load_model
model_test_depr = load_model('../tuning-models/MLP_depr.h5')
model_test_depr.evaluate(X_train_depr, y_train_depr, verbose=2)
model_test_depr.evaluate(X_val_depr, y_val_depr, verbose=2)
model_test_depr.evaluate(X_test_depr, y_test_depr, verbose=2)

311/311 - 1s - loss: 0.3827 - auc: 0.8256 - 1s/epoch - 4ms/step
104/104 - 0s - loss: 0.3827 - auc: 0.8226 - 331ms/epoch - 3ms/step
104/104 - 0s - loss: 0.3788 - auc: 0.8200 - 332ms/epoch - 3ms/step


[0.3788101375102997, 0.8199905157089233]

- EarlyStopping이 적용된 Best 모델과 저장된 모델의 Evaluation 수치가 동일함을 확인.

### ⚙️ MDD(경도우울증vs주요우울장애) Model Tuning 

In [12]:
# Depression과 동일한 구조와 방식으로 진행함
tuner_mdd = kt.Hyperband(hypermodel=model_builder_mlp,
                         objective=kt.Objective(name='val_auc', direction='max'),
                         max_epochs=10,
                         factor=3,
                         directory='kt_tuning',
                         project_name='mlp_tuning_mdd')
tuner_mdd.search(X_train_mdd, y_train_mdd,
                 epochs=10,
                 validation_data=(X_val_mdd, y_val_mdd),
                 callbacks=[ClearTrainingOutput()])
best_hps_mdd = tuner_mdd.get_best_hyperparameters(num_trials=1)[0]

Trial 30 Complete [00h 00m 06s]
val_auc: 0.7162057757377625

Best val_auc So Far: 0.7283497452735901
Total elapsed time: 00h 01m 37s
INFO:tensorflow:Oracle triggered exit


In [13]:
print(f"""
      최적화된 Dense_1 노드 수 : {best_hps_mdd.get('units_1')}
      최적화된 Dense_2 노드 수 : {best_hps_mdd.get('units_2')}
      """)


      최적화된 Dense_1 노드 수 : 40
      최적화된 Dense_2 노드 수 : 40
      


In [14]:
# 최적화된 파라미터로 모델을 다시 Build
best_model_mdd = tuner_mdd.hypermodel.build(best_hps_mdd)
# 모델 구조 출력
best_model_mdd.summary()

Model: "Sequential_MLP"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Dense_1 (Dense)             (None, 40)                1920      
                                                                 
 Dense_2 (Dense)             (None, 40)                1640      
                                                                 
 Output_Layer (Dense)        (None, 1)                 41        
                                                                 
Total params: 3,601
Trainable params: 3,601
Non-trainable params: 0
_________________________________________________________________


In [15]:
savepath_mdd = '../tuning-models/MLP_mdd.h5'
callbacks_mdd = callback_sets(monitor='val_auc', mode='max', patience=8, savepath=savepath_mdd)
# 최적화된 파라미터로 모델 재학습
best_model_mdd.fit(X_train_mdd, y_train_mdd,
                    validation_data=(X_val_mdd, y_val_mdd),
                    epochs=100,
                    callbacks=callbacks_mdd)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100


<keras.callbacks.History at 0x20cec9bc790>

In [16]:
# Evaluation Best Model
best_model_mdd.evaluate(X_train_mdd, y_train_mdd, verbose=2)
best_model_mdd.evaluate(X_val_mdd, y_val_mdd, verbose=2)
best_model_mdd.evaluate(X_test_mdd, y_test_mdd, verbose=2)

63/63 - 0s - loss: 0.4596 - auc: 0.8065 - 431ms/epoch - 7ms/step
21/21 - 0s - loss: 0.5359 - auc: 0.7170 - 76ms/epoch - 4ms/step
21/21 - 0s - loss: 0.5094 - auc: 0.7468 - 75ms/epoch - 4ms/step


[0.5093514323234558, 0.7468259930610657]

In [17]:
# Saved model Evaluation
model_test_mdd = load_model('../tuning-models/MLP_mdd.h5')
model_test_mdd.evaluate(X_train_mdd, y_train_mdd, verbose=2)
model_test_mdd.evaluate(X_val_mdd, y_val_mdd, verbose=2)
model_test_mdd.evaluate(X_test_mdd, y_test_mdd, verbose=2)

63/63 - 0s - loss: 0.4596 - auc: 0.8065 - 412ms/epoch - 7ms/step
21/21 - 0s - loss: 0.5359 - auc: 0.7170 - 83ms/epoch - 4ms/step
21/21 - 0s - loss: 0.5094 - auc: 0.7468 - 83ms/epoch - 4ms/step


[0.5093514323234558, 0.7468259930610657]

- EarlyStopping이 적용된 Best 모델과 저장된 모델의 Evaluation 수치가 동일함을 확인.