### Dataload and preprocessing

In [16]:
import torch

# Clear PyTorch CUDA cache
torch.cuda.empty_cache()


In [154]:
import os
import pandas as pd
import numpy as np
from sklearn.metrics import precision_score, recall_score, roc_curve, roc_auc_score, f1_score, accuracy_score
from sklearn.model_selection import train_test_split, cross_val_predict
from sklearn.metrics import precision_recall_fscore_support

from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

### Dataset 준비

### Train user dataset

In [155]:
folder_path = '/home/hangilee/etri_test_data/user_acc_hr_mgps_1d'  

# 폴더 내의 모든 CSV 파일을 읽어서 하나의 DataFrame으로 합치기
def merge_csv_files(folder_path):
    all_files = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith('.csv')]
    df_list = []

    for file in all_files:
        df = pd.read_csv(file)
        df_list.append(df)

    # 모든 DataFrame을 행 방향으로 합치기
    combined_df = pd.concat(df_list, axis=0, ignore_index=True)
    
    combined_df.drop(columns='Unnamed: 0',inplace=True)
    
    return combined_df

# 합쳐진 데이터 프레임을 가져오기
combined_df = merge_csv_files(folder_path)

### Val user dataset

In [158]:
val_data = pd.read_csv('/home/hangilee/etri_test_data/val_data_acc_hr_mgps_1d/val_acc_hr_mgps_1d_set/val_acc_hr_mgps_1d.csv')
val_data.drop(columns='Unnamed: 0',inplace=True)
len(val_data)

105

In [191]:
combined_df = pd.concat([combined_df,val_data],axis=0)

In [192]:
len(combined_df)

614

### x,y 분리, 날짜 및 userid 추출

In [193]:
combined_df_date = combined_df['date']
combined_df_user = combined_df['subject_id']

y = combined_df.iloc[:,-7:]
x = combined_df.iloc[:,1:-8]

# print(y.head())
# print(x.head())

### StandardScaler 정규화

In [194]:
x_sds = pd.DataFrame()
scaler = StandardScaler()

x_sds[list(x.columns)] = scaler.fit_transform(x)

### data split

In [195]:
X_train, X_val, y_train, y_val = train_test_split(x_sds, y, test_size=0.2, random_state=625)

# # 트레이닝 데이터와 검증 데이터 각각 분할
# total_rows = x_sds.shape[0]
# X_train = x_sds[:total_rows - 100]
# X_val = x_sds[total_rows - 100:]

# y_train = y[:total_rows - 100]
# y_val = y[total_rows - 100:]

### transformation data to tensor

In [196]:
# 데이터를 텐서로 변환
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val.values, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32)

# 데이터 로더 생성
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
val_loader = DataLoader(val_dataset, batch_size=32)  

### Model Implement

In [197]:
# class MultiLabelNN(nn.Module):
#     def __init__(self):
#         super(MultiLabelNN, self).__init__()

#         self.layer1 = nn.Linear(X_train.shape[1], 64)
#         self.relu1 = nn.ReLU()

#         self.layer2 = nn.Linear(64, 128)
#         self.relu2 = nn.ReLU()
        
#         self.layer3 = nn.Linear(128, 128)
#         self.relu3 = nn.ReLU()
        
#         self.layer4 = nn.Linear(128, 64)
#         self.relu4 = nn.ReLU()
#         self.dropout4 = nn.Dropout(0.1)
        
#         self.output = nn.Linear(64, y_train.shape[1])
        
    
#     def forward(self, x):
#         x = self.relu1(self.layer1(x))
#         x = self.relu2(self.layer2(x))
#         x = self.relu3(self.layer3(x))
#         x = self.dropout4(self.relu4(self.layer4(x)))
#         x = torch.sigmoid(self.output(x))
#         return x


In [198]:
class MultiLabelNN(nn.Module):
    def __init__(self):
        super(MultiLabelNN, self).__init__()

        self.layer1 = nn.Linear(X_train.shape[1], 64)
        self.bn1 = nn.BatchNorm1d(64) 
        self.relu1 = nn.ReLU()

        self.layer2 = nn.Linear(64, 128)
        self.bn2 = nn.BatchNorm1d(128)
        self.relu2 = nn.ReLU()
        
        self.layer3 = nn.Linear(128, 128)
        self.bn3 = nn.BatchNorm1d(128)
        self.relu3 = nn.ReLU()
        
        self.layer4 = nn.Linear(128, 64)
        self.bn4 = nn.BatchNorm1d(64)
        self.relu4 = nn.ReLU()
        self.dropout4 = nn.Dropout(0.1)
        
        self.output = nn.Linear(64, y_train.shape[1])
        
    
    def forward(self, x):
        x = self.relu1(self.layer1(x))
        x = self.relu2(self.layer2(x))
        x = self.relu3(self.layer3(x))
        x = self.dropout4(self.relu4(self.layer4(x)))
        x = torch.sigmoid(self.output(x))
        return x


### Model Training

In [199]:
# Early Stopping 클래스 정의
class EarlyStopping:
    def __init__(self, patience=50, verbose=False, delta=0):
        self.patience = patience
        self.verbose = verbose
        self.delta = delta
        self.best_loss = float('inf')
        self.early_stop = False
        self.counter = 0

    def __call__(self, val_loss):
        if self.best_loss - val_loss > self.delta:
            self.best_loss = val_loss
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
            if self.verbose:
                print(f'EarlyStopping counter: {self.counter} out of {self.patience}')

## learning rate scheduller training

In [231]:
from torch.optim.lr_scheduler import CyclicLR
model = MultiLabelNN().to('cuda')

criterion = nn.BCELoss().to('cuda')  # binary cross entropy
optimizer = optim.AdamW(model.parameters(), lr=0.001)

# optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
scheduler = CyclicLR(optimizer, base_lr=0.002, 
                     max_lr=0.1, step_size_up=500, 
                     step_size_down=None, mode='exp_range', gamma=0.995,
                     cycle_momentum=False)

def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, epochs=600):
# def train_model(model, train_loader, val_loader, criterion, optimizer, epochs=1000):
    early_stopping = EarlyStopping(patience=600, verbose=True)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.train()
    for epoch in range(epochs):
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            scheduler.step()

        # 검증 손실 계산
        val_loss = 0
        model.eval()
        with torch.no_grad():
            for val_inputs, val_labels in val_loader:
                val_inputs, val_labels = val_inputs.to(device), val_labels.to(device)
                outputs = model(val_inputs)
                v_loss = criterion(outputs, val_labels)
                val_loss += v_loss.item()

        val_loss /= len(val_loader)
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Val Loss: {val_loss}')

        # Early Stopping 체크
        early_stopping(val_loss)
        if early_stopping.early_stop:
            print("Early stopping")
            break
        
train_model(model, train_loader, val_loader, criterion, optimizer,scheduler)
# train_model(model, train_loader, test_loader, criterion, optimizer)

Epoch 1, Loss: 0.6278355121612549, Val Loss: 0.660341277718544
Epoch 2, Loss: 0.5947006940841675, Val Loss: 0.6538551896810532
Epoch 3, Loss: 0.6615185737609863, Val Loss: 0.6363389045000076
Epoch 4, Loss: 0.6112147569656372, Val Loss: 0.6719638407230377
EarlyStopping counter: 1 out of 600
Epoch 5, Loss: 0.6041650772094727, Val Loss: 0.6407226920127869
EarlyStopping counter: 2 out of 600
Epoch 6, Loss: 0.5875002145767212, Val Loss: 0.6439094096422195
EarlyStopping counter: 3 out of 600
Epoch 7, Loss: 0.5501195192337036, Val Loss: 0.666202962398529
EarlyStopping counter: 4 out of 600
Epoch 8, Loss: 0.6084142923355103, Val Loss: 0.6481114476919174
EarlyStopping counter: 5 out of 600
Epoch 9, Loss: 0.5857084393501282, Val Loss: 0.6571843922138214
EarlyStopping counter: 6 out of 600
Epoch 10, Loss: 0.565907895565033, Val Loss: 0.6539648026227951
EarlyStopping counter: 7 out of 600
Epoch 11, Loss: 0.49320369958877563, Val Loss: 0.6150695979595184
Epoch 12, Loss: 0.6286236047744751, Val Loss

## learning rate scheduller evaluation

In [232]:
def evaluate_model(model, test_loader):
    model.eval()  # 평가 모드로 설정
    total_loss = 0
    all_predictions = []
    all_labels = []
    criterion = nn.BCELoss().to('cuda')  # 이진 크로스 엔트로피 손실
    
    with torch.no_grad():  # 그래디언트 계산 비활성화
        for inputs, labels in test_loader:
            inputs = inputs.to('cuda')  # 입력 데이터를 GPU로 이동
            labels = labels.to('cuda') # 레이블 데이터를 GPU로 이동
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            # 예측값을 0 또는 1로 변환
            predicted = outputs > 0.5
            all_predictions.append(predicted.cpu().numpy())
            all_labels.append(labels.cpu().numpy())
            
    print(all_predictions)

    all_predictions = np.vstack(all_predictions)
    all_labels = np.vstack(all_labels)
    
    # 스코어 계산
    precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_predictions, average='macro')
    
    average_loss = total_loss / len(test_loader)
    print(f'Average Loss: {average_loss:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}')

# 모델 평가
evaluate_model(model, val_loader)

[array([[ True, False,  True, False,  True,  True,  True],
       [False, False, False,  True,  True, False, False],
       [False, False,  True, False, False,  True,  True],
       [ True, False, False, False,  True, False,  True],
       [ True,  True,  True, False,  True, False,  True],
       [ True,  True,  True, False, False, False, False],
       [ True,  True, False,  True,  True, False,  True],
       [ True,  True,  True,  True,  True,  True,  True],
       [ True, False, False, False,  True,  True, False],
       [ True,  True,  True, False,  True,  True,  True],
       [False, False,  True, False, False, False,  True],
       [ True,  True,  True, False,  True,  True, False],
       [False,  True,  True, False, False, False, False],
       [ True,  True, False, False,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True, False],
       [False,  True, False,  True,  True, False,  True],
       [False,  True, False, False, False, False, False],
       [ True

## Basic training

In [214]:
# 훈련 루프
model = MultiLabelNN().to('cuda')

criterion = nn.BCELoss().to('cuda')  # binary cross entropy
optimizer = optim.AdamW(model.parameters(), lr=0.001)

def train_model(model, train_loader, val_loader, criterion, optimizer, epochs=700):
    early_stopping = EarlyStopping(patience=700, verbose=True)
    model.train()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    for epoch in range(epochs):
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
        # 검증 손실 계산
        val_loss = 0
        model.eval()
        with torch.no_grad():
            for val_inputs, val_labels in val_loader:
                val_inputs, val_labels = val_inputs.to(device), val_labels.to(device)
                outputs = model(val_inputs)
                v_loss = criterion(outputs, val_labels)
                val_loss += v_loss.item()

        val_loss /= len(val_loader)
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Val Loss: {val_loss}')

        # Early Stopping 체크
        early_stopping(val_loss)
        if early_stopping.early_stop:
            print("Early stopping")
            break

train_model(model, train_loader, val_loader, criterion, optimizer)

Epoch 1, Loss: 0.5743914246559143, Val Loss: 0.6780306100845337
Epoch 2, Loss: 0.6075423359870911, Val Loss: 0.6599037349224091
Epoch 3, Loss: 0.5803540945053101, Val Loss: 0.6650640666484833
EarlyStopping counter: 1 out of 700
Epoch 4, Loss: 0.5553098320960999, Val Loss: 0.6536053866147995
Epoch 5, Loss: 0.672127366065979, Val Loss: 0.6410967707633972
Epoch 6, Loss: 0.5848856568336487, Val Loss: 0.6394033133983612
Epoch 7, Loss: 0.49838554859161377, Val Loss: 0.6364636421203613
Epoch 8, Loss: 0.5754633545875549, Val Loss: 0.6254353225231171
Epoch 9, Loss: 0.5023847222328186, Val Loss: 0.6249626129865646
Epoch 10, Loss: 0.4864312708377838, Val Loss: 0.6286615133285522
EarlyStopping counter: 1 out of 700
Epoch 11, Loss: 0.6152466535568237, Val Loss: 0.6269301027059555
EarlyStopping counter: 2 out of 700
Epoch 12, Loss: 0.5462976694107056, Val Loss: 0.6156433820724487
Epoch 13, Loss: 0.5300934314727783, Val Loss: 0.6321629285812378
EarlyStopping counter: 1 out of 700
Epoch 14, Loss: 0.36

## Basic training evaluation

In [218]:
def evaluate_model(model, test_loader):
    model.eval()  # 평가 모드로 설정
    total_loss = 0
    all_predictions = []
    all_labels = []
    criterion = nn.BCELoss()  # 이진 크로스 엔트로피 손실
    
    with torch.no_grad():  # 그래디언트 계산 비활성화
        for inputs, labels in test_loader:
            inputs = inputs.to('cuda')  # 입력 데이터를 GPU로 이동
            labels = labels.to('cuda') # 레이블 데이터를 GPU로 이동
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            # 예측값을 0 또는 1로 변환
            predicted = outputs > 0.5
            all_predictions.append(predicted.cpu().numpy())
            all_labels.append(labels.cpu().numpy())
            
    print(all_predictions)

    all_predictions = np.vstack(all_predictions)
    all_labels = np.vstack(all_labels)
    
    # 스코어 계산
    precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_predictions, average='macro')
    
    average_loss = total_loss / len(test_loader)
    print(f'Average Loss: {average_loss:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}')

# 모델 평가
evaluate_model(model, val_loader)

[array([[ True, False,  True, False,  True,  True,  True],
       [False,  True,  True, False, False, False, False],
       [ True, False,  True, False,  True,  True, False],
       [ True, False, False, False,  True, False,  True],
       [False,  True,  True, False, False, False,  True],
       [ True,  True,  True, False, False, False, False],
       [ True,  True, False,  True,  True, False,  True],
       [ True,  True,  True,  True,  True,  True,  True],
       [ True,  True, False, False, False,  True, False],
       [ True,  True,  True, False,  True,  True,  True],
       [False,  True,  True,  True, False,  True, False],
       [ True,  True,  True, False,  True,  True, False],
       [False, False,  True, False,  True, False,  True],
       [False,  True, False, False, False, False,  True],
       [ True,  True,  True,  True,  True,  True, False],
       [ True,  True, False, False, False, False,  True],
       [False,  True, False, False, False, False, False],
       [ True

## model weight save

In [219]:
pth_path = '/home/hangilee/etri_test_data/model_weight'
torch.save(model.state_dict(), f'{pth_path}/f1_066.pth')
print('Model weights saved to model_weights.pth')

Model weights saved to model_weights.pth


## Test data prediction

In [203]:
test_acc_hr_df = pd.read_csv('/home/hangilee/etri_test_data/test_user_1d_acc_hr_gps/test_acc_hr_mgps_1d_set/test_acc_hr_mgps_1d.csv')
test_acc_hr_df_date = test_acc_hr_df['date']
test_acc_hr_df_user = test_acc_hr_df['subject_id']
test_acc_hr_df.drop(columns=['Unnamed: 0','date','subject_id'],inplace=True)

In [204]:
test_acc_hr_df.head()

Unnamed: 0,speed_count,speed_mean,speed_std,speed_25%,speed_50%,speed_75%,speed_max,distances_count,distances_mean,distances_std,...,sma_75%,sma_max,hr_count,hr_mean,hr_std,hr_min,hr_25%,hr_50%,hr_75%,hr_max
0,1295.0,0.260138,1.035536,0.008658,0.05692,0.229001,19.346886,1295.0,0.014291,0.063388,...,13.464837,16.906186,523.0,90.544933,11.610348,47.0,83.0,91.0,97.0,131.0
1,1440.0,0.245918,0.87564,0.008382,0.048793,0.205027,12.635966,1440.0,0.015659,0.050818,...,13.870529,26.255015,530.0,94.1,9.58986,60.0,88.0,94.0,99.0,125.0
2,1421.0,0.3176,1.07833,0.009659,0.102175,0.311239,16.198814,1440.0,0.013318,0.058681,...,13.579299,21.664752,620.0,95.322581,9.988482,68.0,88.0,96.0,101.0,128.0
3,1401.0,0.214482,0.906553,0.004196,0.036504,0.173235,14.242488,1440.0,0.012825,0.049066,...,14.045329,19.6733,500.0,98.23,11.277387,69.0,90.75,98.0,106.0,131.0
4,1440.0,0.251952,1.041339,0.00323,0.027572,0.162775,15.078416,1440.0,0.01511,0.054264,...,13.680834,18.356803,821.0,92.686967,10.340811,65.0,86.0,92.0,100.0,123.0


In [205]:
scaler = StandardScaler()
test_acc_hr_df_sds= scaler.fit_transform(test_acc_hr_df)
print(test_acc_hr_df_sds)

test_x_sds = pd.DataFrame()

test_x_sds[combined_df.columns[1:36]] = test_acc_hr_df_sds


[[ 1.23262062 -0.44923958  0.1153887  ...  0.27050014 -0.04386696
  -0.47059609]
 [ 1.47136185 -0.47873074 -0.01186306 ...  0.6213368   0.19493541
  -0.74232104]
 [ 1.44007852 -0.33007022  0.14944599 ...  0.8552279   0.43373778
  -0.60645857]
 ...
 [-0.89958555 -0.98873946 -0.70873474 ... -1.24979202 -0.76027406
   0.07285379]
 [-0.89958555 -0.98873946 -0.70873474 ... -0.66506426 -0.99907643
  -0.74232104]
 [-0.89958555 -0.98873946 -0.70873474 ...  0.8552279   0.79194133
  -0.06300868]]


In [241]:
# Test data convert tensor
test_user_tensor = torch.tensor(test_x_sds.values, dtype=torch.float32)

### Test data Prediction

In [233]:

import torch
import numpy as np

def model_predict(model, data_loader):
    model.eval()  # 평가 모드로 설정
    all_predictions = []
    
    with torch.no_grad():  # 그래디언트 계산 비활성화
        for inputs in data_loader:
            inputs = inputs.to('cuda')
            # inputs_tensor = torch.tensor(inputs) if not isinstance(inputs, torch.Tensor) else inputs
            # inputs_tensor = inputs_tensor.float()
            outputs = model(inputs)
            # 예측값을 0 또는 1로 변환
            predicted = outputs > 0.5
            # predicted = predicted.astype(int) 
            all_predictions.append(predicted.cpu().numpy().astype(int) )
    
    # 모든 배치의 예측 결과를 하나로 합치기
    all_predictions = np.vstack(all_predictions)
    return all_predictions


# 모델 예측
model_predictions = model_predict(model, test_user_tensor)

print("Predictions:", model_predictions)


Predictions: [[1 1 1 0 0 1 0]
 [1 1 1 0 0 1 0]
 [1 1 0 0 0 1 0]
 [1 1 1 0 0 1 0]
 [1 1 0 0 0 1 0]
 [1 1 0 0 0 0 0]
 [0 1 0 1 0 0 0]
 [0 1 1 0 0 1 0]
 [1 1 1 0 0 1 0]
 [0 1 0 0 0 0 0]
 [1 0 1 1 1 1 1]
 [1 0 1 1 1 1 1]
 [0 1 0 0 0 0 1]
 [1 1 1 0 1 1 1]
 [1 1 0 0 0 1 0]
 [1 0 0 0 1 1 1]
 [0 1 1 0 1 1 1]
 [1 1 1 0 1 1 1]
 [1 1 1 0 1 1 1]
 [0 1 1 0 1 1 1]
 [1 1 1 0 1 1 1]
 [1 1 1 0 1 0 1]
 [1 0 1 1 1 1 0]
 [1 1 1 0 1 0 1]
 [1 1 1 0 1 1 1]
 [1 1 0 0 1 1 1]
 [1 0 1 0 1 1 1]
 [1 1 0 0 1 0 1]
 [1 1 1 0 1 1 1]
 [1 1 0 0 1 1 1]
 [1 1 1 0 1 1 1]
 [1 1 1 0 1 0 1]
 [0 0 0 0 1 1 1]
 [1 0 1 1 1 1 1]
 [1 1 0 0 1 1 1]
 [0 1 1 0 1 1 1]
 [1 1 1 0 1 1 1]
 [1 1 1 1 1 1 1]
 [1 1 0 0 0 0 0]
 [0 1 1 0 1 1 1]
 [0 0 1 0 1 1 1]
 [0 0 1 0 0 0 1]
 [1 1 1 0 1 1 1]
 [0 0 1 0 1 1 0]
 [1 0 0 0 1 1 1]
 [1 1 1 0 1 0 1]
 [1 1 0 0 1 1 1]
 [0 0 0 0 0 0 1]
 [0 0 1 0 0 0 1]
 [1 0 1 0 1 0 0]
 [1 1 1 0 0 0 1]
 [1 0 1 1 0 0 0]
 [1 0 1 1 0 0 0]
 [1 0 0 0 1 1 1]
 [1 0 0 0 1 1 1]
 [1 0 1 1 0 1 0]
 [1 0 1 1 1 0 1]
 [1 0 1 0 1 1 1]
 

### Prediction save

In [234]:
target_col = y_train.columns
print(target_col)

test_result = pd.DataFrame()

test_result['subject_id'] = test_acc_hr_df_user
test_result['date']=test_acc_hr_df_date
test_result[y_train.columns] = model_predictions

Index(['Q1', 'Q2', 'Q3', 'S1', 'S2', 'S3', 'S4'], dtype='object')


In [235]:
test_result.head(20)

Unnamed: 0,subject_id,date,Q1,Q2,Q3,S1,S2,S3,S4
0,5,2023-11-05,1,1,1,0,0,1,0
1,5,2023-11-06,1,1,1,0,0,1,0
2,5,2023-11-07,1,1,0,0,0,1,0
3,5,2023-11-08,1,1,1,0,0,1,0
4,5,2023-11-09,1,1,0,0,0,1,0
5,5,2023-11-10,1,1,0,0,0,0,0
6,5,2023-11-11,0,1,0,1,0,0,0
7,5,2023-11-12,0,1,1,0,0,1,0
8,5,2023-11-13,1,1,1,0,0,1,0
9,5,2023-11-14,0,1,0,0,0,0,0


In [93]:
len(test_result)

115

In [239]:
test_result=test_result.sort_values(['subject_id','date'])

In [240]:
test_result.to_csv('/home/hangilee/etri_test_data/test_data_result/f1_068_lr.csv',index=False)