In [17]:
import pymysql
import tensorflow as tf
from sqlalchemy import create_engine
import pandas as pd

# GPU 설정
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.set_visible_devices(gpus[0], 'GPU')
        tf.config.experimental.set_memory_growth(gpus[0], True)
        print("GPU 설정 완료:", gpus[0])
    except RuntimeError as e:
        print(e)

# 데이터베이스 연결 함수
def get_db():
    db = pymysql.connect(
        host='human-mysql.mysql.database.azure.com',  # Azure MySQL Host
        port=3306,  # Port number (MySQL default is 3306)
        user='human',  # Username
        passwd='!q1w2e3r4',  # Password
        db='humandb',  # Database name
        ssl_ca=r'/home/azureuser/Desktop/config/DigiCertGlobalRootG2.crt.pem'  # SSL certificate path
    )
    return db

# DB에서 데이터 로드
db_connection = get_db()
cursor = db_connection.cursor()

query = "SELECT * FROM humandb.modeling"
cursor.execute(query)
columns = [desc[0] for desc in cursor.description]  # 컬럼 이름 가져오기
all_data = cursor.fetchall()  # 데이터 가져오기

# Pandas DataFrame으로 변환
df = pd.DataFrame(all_data, columns=columns)

# 데이터 확인
print("데이터셋의 크기:", df.shape)
print("컬럼명:", df.columns)

데이터셋의 크기: (100330, 17)
컬럼명: Index(['Index', 'HeartRate', 'BreathRate', 'SPO2', 'SkinTemperature',
       'SleepPhase', 'SleepScore', 'WalkingSteps', 'StressIndex',
       'ActivityIntensity', 'CaloricExpenditure', '심박', '호흡', '피부온도', '혈중산소농도',
       '일상', '상태'],
      dtype='object')


In [18]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100330 entries, 0 to 100329
Data columns (total 17 columns):
 #   Column              Non-Null Count   Dtype  
---  ------              --------------   -----  
 0   Index               100330 non-null  int64  
 1   HeartRate           100330 non-null  int64  
 2   BreathRate          100330 non-null  int64  
 3   SPO2                100330 non-null  int64  
 4   SkinTemperature     100330 non-null  float64
 5   SleepPhase          100330 non-null  int64  
 6   SleepScore          100330 non-null  int64  
 7   WalkingSteps        100330 non-null  int64  
 8   StressIndex         100330 non-null  int64  
 9   ActivityIntensity   100330 non-null  int64  
 10  CaloricExpenditure  100330 non-null  int64  
 11  심박                  100330 non-null  int64  
 12  호흡                  100330 non-null  int64  
 13  피부온도                100330 non-null  int64  
 14  혈중산소농도              100330 non-null  int64  
 15  일상                  100330 non-nul

In [19]:
df.head()

Unnamed: 0,Index,HeartRate,BreathRate,SPO2,SkinTemperature,SleepPhase,SleepScore,WalkingSteps,StressIndex,ActivityIntensity,CaloricExpenditure,심박,호흡,피부온도,혈중산소농도,일상,상태
0,0,73,16,98,0.0,9,0,0,0,102,0,0,0,0,0,1,0
1,1,73,16,98,0.0,9,0,0,0,0,0,0,0,0,0,1,0
2,2,73,16,98,0.0,9,0,0,0,0,0,0,0,0,0,1,0
3,3,74,16,98,0.0,9,0,0,0,0,0,0,0,0,0,1,0
4,4,74,16,98,0.0,9,0,0,0,0,0,0,0,0,0,1,0


In [20]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, LeakyReLU
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, log_loss, roc_auc_score
import numpy as np

### 다중 클래스

In [21]:
# 다중 라벨 컬럼 선택
y = df[['상태']]  # 다중 라벨 대상

# 입력 데이터(X) 설정
x = df.drop(columns=['Index', '심박', '호흡', '피부온도', '혈중산소농도','일상','상태'])  # 독립 변수


# 데이터의 shape 확인
print("x (입력 데이터) shape:", x.shape)
print("y (출력 데이터) shape:", y.shape)

x (입력 데이터) shape: (100330, 10)
y (출력 데이터) shape: (100330, 1)


In [22]:

# 데이터 준비
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=42)

# 신경망 모델 정의
keras.backend.clear_session()
input_layer = keras.layers.Input(shape=(10,))

d1 = Dense(256)(input_layer)
d1 = LeakyReLU(alpha=0.1)(d1)  # Leaky ReLU 적용
d1 = BatchNormalization()(d1)
d1 = Dropout(0.2)(d1)  # 첫 번째 레이어 Dropout 낮게 설정

d2 = Dense(128, activation='relu')(d1)
d2 = BatchNormalization()(d2)
d2 = Dropout(0.3)(d2)

d3 = Dense(64, activation='relu')(d2)
d3 = BatchNormalization()(d3)
d3 = Dropout(0.4)(d3)

d4 = Dense(32, activation='relu')(d3)
d4 = BatchNormalization()(d4)
d4 = Dropout(0.4)(d4)

output_layer = Dense(3, activation='softmax')(d4)

model = keras.models.Model(inputs=input_layer, outputs=output_layer)

# AdamW 옵티마이저 설정
optimizer = keras.optimizers.AdamW(learning_rate=0.0005, weight_decay=1e-5)

# 모델 컴파일
model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=optimizer,
    metrics=['accuracy']
)

# EarlyStopping 설정
es = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

# 모델 학습 (GPU 가속 적용)
with tf.device("GPU:0"):
    history = model.fit(
        x_train, y_train,
        validation_data=(x_val, y_val),
        callbacks=[es],
        verbose=1,
        epochs=50
    )

# 모델 평가 (GPU 가속 적용)
with tf.device("GPU:0"):
    test_loss, test_accuracy = model.evaluate(x_test, y_test)

# 예측값 생성 (소프트맥스 확률값 반환)
y_pred_prob = model.predict(x_test)
y_pred = np.argmax(y_pred_prob, axis=1)

# 로그 손실 (Log Loss) 계산
logloss = log_loss(y_test, y_pred_prob)

# AUROC 계산 (One-vs-Rest 방식)
y_test_oh = tf.keras.utils.to_categorical(y_test, num_classes=3)
auroc = roc_auc_score(y_test_oh, y_pred_prob, multi_class="ovr")

# 정확도 계산
accuracy = accuracy_score(y_test, y_pred)

# 결과 출력
print(f"테스트 손실 (Log Loss): {logloss:.4f}")
print(f"테스트 AUROC: {auroc:.4f}")
print(f"테스트 정확도 (Accuracy): {accuracy:.4f}")


Epoch 1/50




[1m2007/2007[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.7257 - loss: 0.7881 - val_accuracy: 0.8816 - val_loss: 0.2747
Epoch 2/50
[1m2007/2007[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.8706 - loss: 0.3099 - val_accuracy: 0.8830 - val_loss: 0.2547
Epoch 3/50
[1m2007/2007[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.8796 - loss: 0.2682 - val_accuracy: 0.9209 - val_loss: 0.2051
Epoch 4/50
[1m2007/2007[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.8943 - loss: 0.2321 - val_accuracy: 0.9066 - val_loss: 0.2071
Epoch 5/50
[1m2007/2007[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9058 - loss: 0.2113 - val_accuracy: 0.9369 - val_loss: 0.1588
Epoch 6/50
[1m2007/2007[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9146 - loss: 0.1991 - val_accuracy: 0.9425 - val_loss: 0.1456
Epoch 7/50
[1m2007/2007[0

In [None]:
# 상태(Status) 값이 1 또는 2인 데이터만 필터링
df_filtered = df[df["상태"].isin([1, 2])]

# 필터링된 데이터 출력
from IPython.display import display
display(df_filtered)

Unnamed: 0,Index,HeartRate,BreathRate,SPO2,SkinTemperature,SleepPhase,SleepScore,WalkingSteps,StressIndex,ActivityIntensity,CaloricExpenditure,심박,호흡,피부온도,혈중산소농도,일상,상태
356,356,80,24,98,-1.1,1,72,0,0,0,0,1,1,0,0,0,1
357,357,77,21,97,0.2,1,72,0,0,0,0,1,1,0,0,0,1
358,358,77,19,98,0.4,0,72,0,0,0,0,1,0,0,0,0,1
359,359,75,21,98,0.5,0,72,0,0,0,0,1,1,0,0,0,1
360,360,77,13,98,0.5,2,72,0,0,0,0,1,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100284,51539608288,78,21,98,-0.8,1,9,0,0,0,0,1,1,0,0,0,1
100285,51539608289,79,16,98,-0.6,1,9,0,0,0,0,1,0,0,0,0,1
100286,51539608290,82,20,98,-0.8,2,9,0,0,0,0,1,1,0,0,0,1
100287,51539608291,82,21,98,-0.4,1,9,0,0,0,0,1,1,0,0,0,1


### 다중 레이블


In [24]:
# 다중 라벨 컬럼 선택
y = df_filtered[['심박', '호흡', '피부온도', '혈중산소농도']]  # 다중 라벨 대상

# 입력 데이터(X) 설정
x = df_filtered.drop(columns=['Index', '심박', '호흡', '피부온도', '혈중산소농도','일상','상태'])  # 독립 변수


# 데이터의 shape 확인
print("x (입력 데이터) shape:", x.shape)
print("y (출력 데이터) shape:", y.shape)

x (입력 데이터) shape: (21526, 10)
y (출력 데이터) shape: (21526, 4)


In [25]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, LeakyReLU
from tensorflow.keras.metrics import AUC
from sklearn.model_selection import train_test_split

# 데이터 준비
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=42)

# 신경망 모델 정의
keras.backend.clear_session()
input_layer = keras.layers.Input(shape=(10,))

d1 = Dense(256)(input_layer)
d1 = LeakyReLU(alpha=0.1)(d1)  # LeakyReLU 적용 (Dying ReLU 문제 해결)
d1 = BatchNormalization()(d1)
d1 = Dropout(0.2)(d1)  # 첫 번째 레이어 Dropout 낮게 설정

d2 = Dense(128)(d1)
d2 = LeakyReLU(alpha=0.1)(d2)
d2 = BatchNormalization()(d2)
d2 = Dropout(0.3)(d2)

d3 = Dense(64)(d2)
d3 = LeakyReLU(alpha=0.1)(d3)
d3 = BatchNormalization()(d3)
d3 = Dropout(0.4)(d3)

d4 = Dense(32)(d3)
d4 = LeakyReLU(alpha=0.1)(d4)
d4 = BatchNormalization()(d4)
d4 = Dropout(0.4)(d4)

output_layer = Dense(4, activation='sigmoid')(d4)

model = keras.models.Model(inputs=input_layer, outputs=output_layer)

# AdamW 옵티마이저 설정
optimizer = keras.optimizers.AdamW(learning_rate=0.0005, weight_decay=1e-5)

# 모델 컴파일
model.compile(
    loss='binary_crossentropy',
    optimizer=optimizer,
    metrics=['accuracy', AUC(name='auc', multi_label=True)]
)

# EarlyStopping 설정 (patience 증가)
es = EarlyStopping(
    monitor='val_loss',
    min_delta=1e-4,
    patience=7,  # 조기 종료까지 기다리는 에포크 수 증가
    verbose=1,
    restore_best_weights=True
)

# 모델 학습 (GPU 가속 적용)
with tf.device("GPU:0"):
    history = model.fit(
        x_train, y_train,
        validation_data=(x_val, y_val),
        callbacks=[es],
        verbose=1,
        epochs=50
    )

# 모델 평가 (GPU 가속 적용)
with tf.device("GPU:0"):
    test_loss, test_accuracy, test_auc = model.evaluate(x_test, y_test)

print(f"테스트 손실: {test_loss:.4f}")
print(f"테스트 정확도: {test_accuracy:.4f}")
print(f"테스트 AUC: {test_auc:.4f}")


Epoch 1/50




[1m431/431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.2920 - auc: 0.5129 - loss: 0.8229 - val_accuracy: 0.6150 - val_auc: 0.6572 - val_loss: 0.5471
Epoch 2/50
[1m431/431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5421 - auc: 0.5897 - loss: 0.5799 - val_accuracy: 0.6411 - val_auc: 0.7351 - val_loss: 0.4529
Epoch 3/50
[1m431/431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6187 - auc: 0.6724 - loss: 0.4784 - val_accuracy: 0.6966 - val_auc: 0.8251 - val_loss: 0.3709
Epoch 4/50
[1m431/431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6763 - auc: 0.7454 - loss: 0.4114 - val_accuracy: 0.7517 - val_auc: 0.8486 - val_loss: 0.3323
Epoch 5/50
[1m431/431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7133 - auc: 0.7841 - loss: 0.3755 - val_accuracy: 0.7448 - val_auc: 0.8628 - val_loss: 0.3129
Epoch 6/50
[1m431/431[0m [32m━━━━━━━━━

### 설명

#### y_pred > 0.5: sigmoid에서 나온 확률 값을 0/1로 변환하여 평가.
#### Hamming Loss: 잘못 예측한 라벨의 비율 (낮을수록 좋음).
#### F1-Score (Micro, Macro): 정밀도(Precision)와 재현율(Recall)의 균형 평가.
#### Jaccard Score: 실제 라벨과 예측 라벨의 유사도를 평가.

In [26]:
from sklearn.metrics import f1_score, jaccard_score, hamming_loss

# 모델 예측 수행
y_pred = model.predict(x_test)
y_pred_binary = (y_pred > 0.5).astype(int)  # 0.5 이상이면 1, 아니면 0

# 평가 지표 계산
print("Hamming Loss:", hamming_loss(y_test, y_pred_binary))
print("Micro F1-Score:", f1_score(y_test, y_pred_binary, average='micro'))
print("Macro F1-Score:", f1_score(y_test, y_pred_binary, average='macro'))
print("Jaccard Score:", jaccard_score(y_test, y_pred_binary, average='samples'))


[1m  1/135[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m8s[0m 64ms/step

[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step  
Hamming Loss: 0.03593822573153739
Micro F1-Score: 0.9442091031996395
Macro F1-Score: 0.891280896988946
Jaccard Score: 0.9324972906022605
