# 🧠 Kor-DEEPression 
## 🛠️ Step 2-4. ML/DL Modeling
---

## 💻 Step 2-4-4.<br>1D-Convolutional Neural Networks(1D-CNN)<br>Modeling & Tuning

- 💾 Data Prepare

In [1]:
from custom_modules.modeling import data_load

# 데이터 불러오기 기능
print("\n(Data Loading)")
print("\t(Depression)")
df_X_depr, df_y_depr = data_load(target_name='depression', filepath='downloads/Encoded_depr.csv')
print("\n\t(MDD)")
df_X_mdd, df_y_mdd = data_load(target_name='MDD', filepath='downloads/Encoded_mdd.csv')


(Data Loading)
	(Depression)
	DataFrame Shape : (16570, 48)
	Features(X) Shape : (16570, 47)
	Target(y) Shape : (16570,)

	(MDD)
	DataFrame Shape : (3359, 48)
	Features(X) Shape : (3359, 47)
	Target(y) Shape : (3359,)


In [2]:
from custom_modules.modeling import data_split

# 데이터 분리 기능
print("\n(Data Splitting)")
print("\t(Depression)")
X_train_depr, X_val_depr, X_test_depr, y_train_depr, y_val_depr, y_test_depr = data_split(df_X_depr, df_y_depr, val_set=True)
print("\n\t(MDD)")
X_train_mdd, X_val_mdd, X_test_mdd, y_train_mdd, y_val_mdd, y_test_mdd = data_split(df_X_mdd, df_y_mdd, val_set=True)


(Data Splitting)
	(Depression)
	X_train, y_train : (9942, 47), (9942,)
	X_val, y_val : (3314, 47), (3314,)
	X_test, y_test : (3314, 47), (3314,)

	(MDD)
	X_train, y_train : (2015, 47), (2015,)
	X_val, y_val : (672, 47), (672,)
	X_test, y_test : (672, 47), (672,)


In [3]:
from custom_modules.modeling import make_baseline

# Baseline (최빈 Class) 생성 기능
print("\n(Baseline)")
print("\t(Depression)")
baseline_depr = make_baseline(y_train_depr)
print("\n\t(MDD)")
baseline_mdd = make_baseline(y_train_mdd)


(Baseline)
	(Depression)
	Baseline Accuracy : 0.7944
	Baseline AUC_score : 0.5

	(MDD)
	Baseline Accuracy : 0.7300
	Baseline AUC_score : 0.5


- 💾 keras_tuner 라이브러리 불러오기

In [4]:
# keras_tuner 라이브러리를 import
import keras_tuner as kt

# keras Tuner Output을 자동으로 갱신하는 Class 선언(overriding)
from keras.callbacks import Callback
import IPython

# Callback 클래스를 상속받고 오버라이딩 실시
class ClearTrainingOutput(Callback):
    def on_train_end(*args, **kwargs):
        IPython.display.clear_output(wait=True)

### ⚙️ Depression(정상vs우울증) Model Tuning 

In [5]:
# custom_modules에서 정의한 model_builder를 불러옴
from custom_modules.modeling import model_builder_cnn

# keras tuner 정의 (튜닝방식은 Hyperband로 실시)
# 평가지표 기준은 AUC score를 최대화 하는 방향으로 설정함
tuner_depr = kt.Hyperband(hypermodel=model_builder_cnn,
                         objective=kt.Objective(name='val_auc', direction='max'),
                         max_epochs=10,
                         factor=3,
                         directory='kt_tuning',
                         project_name='cnn_tuning_depr')

# Tuning 실시
tuner_depr.search(X_train_depr, y_train_depr,
                 epochs=10,
                 validation_data=(X_val_depr, y_val_depr),
                 callbacks=[ClearTrainingOutput()])
best_hps_depr = tuner_depr.get_best_hyperparameters(num_trials=1)[0]

Trial 30 Complete [00h 00m 24s]
val_auc: 0.821434736251831

Best val_auc So Far: 0.8224851489067078
Total elapsed time: 00h 05m 32s
INFO:tensorflow:Oracle triggered exit


In [6]:
print(f"""
      최적화된 Conv1D_Filters : {best_hps_depr.get('Conv1D_Filters')}
      최적화된 kernel_size : {best_hps_depr.get('kernel_size')}
      최적화된 FC_units : {best_hps_depr.get('FC_units')}
      최적화된 Dropout_rate : {best_hps_depr.get('Dropout_rate')}
      최적화된 learning_rate : {best_hps_depr.get('learning_rate')}
      """)


      최적화된 Conv1D_Filters : 16
      최적화된 kernel_size : 3
      최적화된 FC_units : 128
      최적화된 Dropout_rate : 0.8
      최적화된 learning_rate : 0.001
      


In [7]:
# 최적화된 파라미터로 모델을 다시 Build
best_model_depr = tuner_depr.hypermodel.build(best_hps_depr)
# 모델 구조 출력
best_model_depr.summary()

Model: "Sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Conv1D_Layer (Conv1D)       (None, 45, 16)            64        
                                                                 
 Batch_Normalization (BatchN  (None, 45, 16)           64        
 ormalization)                                                   
                                                                 
 Flatten_Layer (Flatten)     (None, 720)               0         
                                                                 
 FC_Dense_Layer (Dense)      (None, 128)               92288     
                                                                 
 Dropout (Dropout)           (None, 128)               0         
                                                                 
 Output_Layer (Dense)        (None, 1)                 129       
                                                        

In [8]:
# Callback 함수 불러오기(EarlyStopping, ModelCheckpoint)
from custom_modules.modeling import callback_sets
savepath = '../tuning-models/CNN_depr.h5'
callbacks = callback_sets(monitor='val_auc', mode='max', patience=8, savepath=savepath)
# 최적화된 파라미터로 모델 재학습
best_model_depr.fit(X_train_depr, y_train_depr,
                    validation_data=(X_val_depr, y_val_depr),
                    epochs=100,
                    callbacks=callbacks)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100


<keras.callbacks.History at 0x1412ac096a0>

In [9]:
# Evaluation Best Model
best_model_depr.evaluate(X_train_depr, y_train_depr, verbose=2)
best_model_depr.evaluate(X_val_depr, y_val_depr, verbose=2)
best_model_depr.evaluate(X_test_depr, y_test_depr, verbose=2)

311/311 - 1s - loss: 0.3799 - auc: 0.8316 - 992ms/epoch - 3ms/step
104/104 - 0s - loss: 0.3893 - auc: 0.8213 - 323ms/epoch - 3ms/step
104/104 - 0s - loss: 0.3874 - auc: 0.8156 - 325ms/epoch - 3ms/step


[0.3873653709888458, 0.8156205415725708]

In [10]:
# Saved model Evaluation
from keras.models import load_model
model_test_depr = load_model('../tuning-models/CNN_depr.h5')
model_test_depr.evaluate(X_train_depr, y_train_depr, verbose=2)
model_test_depr.evaluate(X_val_depr, y_val_depr, verbose=2)
model_test_depr.evaluate(X_test_depr, y_test_depr, verbose=2)

311/311 - 1s - loss: 0.3799 - auc: 0.8316 - 1s/epoch - 4ms/step
104/104 - 0s - loss: 0.3893 - auc: 0.8213 - 326ms/epoch - 3ms/step
104/104 - 0s - loss: 0.3874 - auc: 0.8156 - 326ms/epoch - 3ms/step


[0.3873653709888458, 0.8156205415725708]

- EarlyStopping이 적용된 Best 모델과 저장된 모델의 Evaluation 수치가 동일함을 확인.

### ⚙️ MDD(경도우울증vs주요우울장애) Model Tuning 

In [11]:
# Depression과 동일한 구조와 방식으로 진행함
tuner_mdd = kt.Hyperband(hypermodel=model_builder_cnn,
                         objective=kt.Objective(name='val_auc', direction='max'),
                         max_epochs=10,
                         factor=3,
                         directory='kt_tuning',
                         project_name='cnn_tuning_mdd')
tuner_mdd.search(X_train_mdd, y_train_mdd,
                 epochs=10,
                 validation_data=(X_val_mdd, y_val_mdd),
                 callbacks=[ClearTrainingOutput()])
best_hps_mdd = tuner_mdd.get_best_hyperparameters(num_trials=1)[0]

Trial 30 Complete [00h 00m 08s]
val_auc: 0.6838897466659546

Best val_auc So Far: 0.7213106751441956
Total elapsed time: 00h 01m 56s
INFO:tensorflow:Oracle triggered exit


In [12]:
print(f"""
      최적화된 Conv1D_Filters : {best_hps_mdd.get('Conv1D_Filters')}
      최적화된 kernel_size : {best_hps_mdd.get('kernel_size')}
      최적화된 FC_units : {best_hps_mdd.get('FC_units')}
      최적화된 Dropout_rate : {best_hps_mdd.get('Dropout_rate')}
      최적화된 learning_rate : {best_hps_mdd.get('learning_rate')}
      """)


      최적화된 Conv1D_Filters : 32
      최적화된 kernel_size : 4
      최적화된 FC_units : 256
      최적화된 Dropout_rate : 0.8
      최적화된 learning_rate : 0.01
      


In [13]:
# 최적화된 파라미터로 모델을 다시 Build
best_model_mdd = tuner_mdd.hypermodel.build(best_hps_mdd)
# 모델 구조 출력
best_model_mdd.summary()

Model: "Sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Conv1D_Layer (Conv1D)       (None, 44, 32)            160       
                                                                 
 Batch_Normalization (BatchN  (None, 44, 32)           128       
 ormalization)                                                   
                                                                 
 Flatten_Layer (Flatten)     (None, 1408)              0         
                                                                 
 FC_Dense_Layer (Dense)      (None, 256)               360704    
                                                                 
 Dropout (Dropout)           (None, 256)               0         
                                                                 
 Output_Layer (Dense)        (None, 1)                 257       
                                                        

In [14]:
savepath_mdd = '../tuning-models/CNN_mdd.h5'
callbacks_mdd = callback_sets(monitor='val_auc', mode='max', patience=8, savepath=savepath_mdd)
# 최적화된 파라미터로 모델 재학습
best_model_mdd.fit(X_train_mdd, y_train_mdd,
                    validation_data=(X_val_mdd, y_val_mdd),
                    epochs=100,
                    callbacks=callbacks_mdd)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100


<keras.callbacks.History at 0x1412764b0d0>

In [15]:
# Evaluation Best Model
best_model_mdd.evaluate(X_train_mdd, y_train_mdd, verbose=2)
best_model_mdd.evaluate(X_val_mdd, y_val_mdd, verbose=2)
best_model_mdd.evaluate(X_test_mdd, y_test_mdd, verbose=2)

63/63 - 0s - loss: 0.5034 - auc: 0.8009 - 465ms/epoch - 7ms/step
21/21 - 0s - loss: 0.5418 - auc: 0.7153 - 74ms/epoch - 4ms/step
21/21 - 0s - loss: 0.5226 - auc: 0.7447 - 82ms/epoch - 4ms/step


[0.5226476192474365, 0.7446709871292114]

In [16]:
# Saved model Evaluation
model_test_mdd = load_model('../tuning-models/CNN_mdd.h5')
model_test_mdd.evaluate(X_train_mdd, y_train_mdd, verbose=2)
model_test_mdd.evaluate(X_val_mdd, y_val_mdd, verbose=2)
model_test_mdd.evaluate(X_test_mdd, y_test_mdd, verbose=2)

63/63 - 0s - loss: 0.5034 - auc: 0.8009 - 438ms/epoch - 7ms/step
21/21 - 0s - loss: 0.5418 - auc: 0.7153 - 85ms/epoch - 4ms/step
21/21 - 0s - loss: 0.5226 - auc: 0.7447 - 81ms/epoch - 4ms/step


[0.5226476192474365, 0.7446709871292114]

- EarlyStopping이 적용된 Best 모델과 저장된 모델의 Evaluation 수치가 동일함을 확인.