In [1]:
import pandas as pd
import numpy as np

ROOT_PATH = '../../data/01/'

train_df = pd.read_csv(f'{ROOT_PATH}train.csv',index_col='ID')
test_df = pd.read_csv(f'{ROOT_PATH}test.csv',index_col='ID')

train_df = train_df[['사고일시','요일', '기상상태', '도로형태', '노면상태', '사고유형','ECLO']]
test_df = test_df[['사고일시','요일', '기상상태', '도로형태', '노면상태', '사고유형']]

train_df['사고일시'] = pd.to_datetime(train_df['사고일시'], format='%Y-%m-%d %H', errors='raise')
train_df['시간'] = train_df['사고일시'].dt.hour
train_df['월'] = train_df['사고일시'].dt.month
test_df['사고일시'] = pd.to_datetime(test_df['사고일시'], format='%Y-%m-%d %H', errors='raise')
test_df['시간'] = test_df['사고일시'].dt.hour
test_df['월'] = test_df['사고일시'].dt.month
train_df.drop(columns='사고일시',inplace=True)
test_df.drop(columns='사고일시',inplace=True)

train_df = pd.get_dummies(train_df)
test_df = pd.get_dummies(test_df)

X_trn = train_df.drop(columns='ECLO').astype(np.float32)
X_trn.drop(columns='기상상태_안개',inplace=True)
y_trn = train_df['ECLO'].astype(np.float32)

X_tst = test_df.astype(np.float32)

X_trn.shape, y_trn.shape, X_tst.shape

((39609, 34), (39609,), (10963, 34))

In [3]:
import torch
from torch import nn
from torch.utils.data import DataLoader
import torchmetrics
from typing import Optional, List
import numpy as np
import pandas as pd
  
from nn import ANN
from torch.utils.data import TensorDataset
# from utils import CustomDataset
from torchmetrics import MeanAbsoluteError, MeanSquaredError, MeanSquaredLogError
from tqdm.auto import tqdm

def train_one_epoch(
    model: nn.Module,
    criterion: callable,
    optimizer: torch.optim.Optimizer,
    data_loader: DataLoader,
    device: str
) -> float:
    '''train one epoch

    Args:
        model: model
        criterion: loss
        optimizer: optimizer
        data_loader: data loader
        device: device
    '''
    model.train()
    total_loss = 0.
    for X, y in data_loader:
        X, y = X.to(device), y.to(device)
        output = model(X)
        loss = criterion(output, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * len(y)
    return total_loss / len(data_loader.dataset)

def evaluate(
    model: nn.Module,
    criterion: callable,
    data_loader: DataLoader,
    device: str,
    metric: Optional[torchmetrics.metric.Metric] = None,
    multi_metrics: List[torchmetrics.metric.Metric] = None
) -> float:
    '''evaluate

    Args:
        model: model
        criterions: list of criterion functions
        data_loader: data loader
        device: device
    '''
    model.eval()
    total_loss = 0.

    mae, mse, msle = (
        MeanAbsoluteError().to(device),
        MeanSquaredError().to(device),
        MeanSquaredLogError().to(device)
    )

    with torch.no_grad():
        for X, y in data_loader:
            X, y = X.to(device), y.to(device)
            output = model(X)
            total_loss += criterion(output, y).item() * len(y)

            if metric is not None:
                metric.update(output, y)

            if multi_metrics is not None:
                for metric in multi_metrics:
                    metric.update(output, y)

    if isinstance(total_loss, torch.Tensor):
        return total_loss.item() / len(data_loader.dataset)
    else:
        return total_loss / len(data_loader.dataset)

def kfold_cross_validation(model: nn.Module, criterion:callable, device:str, X_trn:np.array, y_trn:np.array, n_splits:int=5):
  from sklearn.model_selection import KFold
  from torchmetrics import MeanAbsoluteError, MeanSquaredError, MeanSquaredLogError
  # from sklearn.metrics import mean_absolute_error,mean_squared_error,mean_squared_log_error
  from copy import deepcopy
  
  Kf = KFold(n_splits=n_splits, shuffle=True, random_state=2023)
  nets = [deepcopy(model) for i in range(n_splits)]
  scores = {
  'mae': [],
  'mse': [],
  'msle': []
  }
  
  for i, (trn_idx, val_idx) in enumerate(Kf.split(X_trn, y_trn)):
    X, y = (
        torch.tensor(X_trn.iloc[trn_idx].values).to(device),
        torch.tensor(y_trn.iloc[trn_idx].values).to(device)
    )
    X_val, y_val = (
        torch.tensor(X_trn.iloc[val_idx].values).to(device),
        torch.tensor(y_trn.iloc[val_idx].values).to(device)
    )
    ds = TensorDataset(X, y)
    ds_val = TensorDataset(X_val, y_val)
    dl = DataLoader(ds, batch_size=32, shuffle=True)
    dl_val = DataLoader(ds_val, batch_size=len(ds_val), shuffle=False)

    net = nets[i]()
    net.to(device)  # 모델을 디바이스로 이동

    pbar = tqdm(range(50))
    for j in pbar:
        mae, mse, msle = (
            MeanAbsoluteError().to(device),
            MeanSquaredError().to(device),
            MeanSquaredLogError().to(device)
        )
        criterion = nn.MSELoss(reduction='mean')
        optimizer = torch.optim.Adam(net.parameters(), lr=0.0001)
        loss = train_one_epoch(net, criterion, optimizer, dl, device)
        loss_val = evaluate(net, criterion, dl_val, device, multi_metrics=[mae, mse, msle])
        mae, mse, msle = mae.compute(), mse.compute(), msle.compute()
        pbar.set_postfix(trn_loss=loss, val_loss=loss_val)
        print(f'Epoch {j+1}/{300} - Training Loss: {loss:.4f}, Validation Loss: {loss_val:.4f}')

    scores["mae"].append(mae.item())
    scores["mse"].append(mse.item())
    scores["msle"].append(msle.item())


In [4]:
device = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'
device

  return torch._C._cuda_getDeviceCount() > 0


'cpu'

In [5]:
kfold_cross_validation(ANN, nn.MSELoss, device, X_trn, y_trn, n_splits = 5)

  0%|          | 0/50 [00:00<?, ?it/s]

Epoch 1/300 - Training Loss: 18.6309, Validation Loss: 13.2637
Epoch 2/300 - Training Loss: 10.7179, Validation Loss: 10.8745
Epoch 3/300 - Training Loss: 10.1541, Validation Loss: 10.8367
Epoch 4/300 - Training Loss: 10.1495, Validation Loss: 10.8365
Epoch 5/300 - Training Loss: 10.1500, Validation Loss: 10.8361
Epoch 6/300 - Training Loss: 10.1493, Validation Loss: 10.8343
Epoch 7/300 - Training Loss: 10.1486, Validation Loss: 10.8316
Epoch 8/300 - Training Loss: 10.1402, Validation Loss: 10.8205
Epoch 9/300 - Training Loss: 10.1237, Validation Loss: 10.8045
Epoch 10/300 - Training Loss: 10.1011, Validation Loss: 10.7752
Epoch 11/300 - Training Loss: 10.0684, Validation Loss: 10.7359
Epoch 12/300 - Training Loss: 10.0265, Validation Loss: 10.6910
Epoch 13/300 - Training Loss: 9.9806, Validation Loss: 10.6496
Epoch 14/300 - Training Loss: 9.9449, Validation Loss: 10.6236
Epoch 15/300 - Training Loss: 9.9248, Validation Loss: 10.6080
Epoch 16/300 - Training Loss: 9.9111, Validation Los

  0%|          | 0/50 [00:00<?, ?it/s]

Epoch 1/300 - Training Loss: 17.2843, Validation Loss: 11.0937
Epoch 2/300 - Training Loss: 10.8522, Validation Loss: 9.4248
Epoch 3/300 - Training Loss: 10.5032, Validation Loss: 9.4246
Epoch 4/300 - Training Loss: 10.5015, Validation Loss: 9.4252
Epoch 5/300 - Training Loss: 10.5023, Validation Loss: 9.4248
Epoch 6/300 - Training Loss: 10.5020, Validation Loss: 9.4224
Epoch 7/300 - Training Loss: 10.4978, Validation Loss: 9.4200
Epoch 8/300 - Training Loss: 10.4877, Validation Loss: 9.4007
Epoch 9/300 - Training Loss: 10.4682, Validation Loss: 9.3757
Epoch 10/300 - Training Loss: 10.4439, Validation Loss: 9.3403
Epoch 11/300 - Training Loss: 10.4118, Validation Loss: 9.2972
Epoch 12/300 - Training Loss: 10.3709, Validation Loss: 9.2475
Epoch 13/300 - Training Loss: 10.3336, Validation Loss: 9.2077
Epoch 14/300 - Training Loss: 10.3044, Validation Loss: 9.1793
Epoch 15/300 - Training Loss: 10.2878, Validation Loss: 9.1665
Epoch 16/300 - Training Loss: 10.2777, Validation Loss: 9.1695


KeyboardInterrupt: 