In [1]:
from google.colab import drive
drive.mount('/content/drive')
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import matplotlib.pyplot as plt
from google.colab import files

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# CSV 파일에서 데이터 로드
uploaded = files.upload()
train = pd.read_csv('data_tr_city.csv', header=None, skiprows=1, names=['date', 'value'])

Saving data_tr_city.csv to data_tr_city (2).csv


In [3]:
train.head()

Unnamed: 0,date,value
0,2017-01-01 01:00:00,138.0
1,2017-01-01 02:00:00,237.0
2,2017-01-01 03:00:00,128.0
3,2017-01-01 04:00:00,14.0
4,2017-01-01 05:00:00,11.0


In [4]:
# 날짜 열을 날짜/시간 유형으로 변환
train['date'] = pd.to_datetime(train['date'])

In [5]:
# 날짜 열을 인덱스로 설정
# data.set_index('date') 이렇게하면 값이 바뀌는 것이 아님

train.set_index('date', inplace=True)

In [6]:
train.head()

Unnamed: 0_level_0,value
date,Unnamed: 1_level_1
2017-01-01 01:00:00,138.0
2017-01-01 02:00:00,237.0
2017-01-01 03:00:00,128.0
2017-01-01 04:00:00,14.0
2017-01-01 05:00:00,11.0


In [7]:
# IQR로 이상값 처리
Q1 = train['value'].quantile(0.25)
Q3 = train['value'].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
train.loc[((train['value'] < lower_bound) | (train['value'] > upper_bound)) & (train['value'] >= 0)] = None
train.fillna(method='ffill', inplace=True)

In [8]:
# 음수 값은 0으로 바꾸기
train.loc[train['value'] < 0, 'value'] = 0

In [9]:
class NLinear(torch.nn.Module):
    def __init__(self, window_size, forcast_size, individual, feature_size):
        super(NLinear, self).__init__()
        self.window_size = window_size
        self.forcast_size = forcast_size
        self.individual = individual
        self.channels = feature_size
        if self.individual:
            self.Linear = torch.nn.ModuleList()
            for i in range(self.channels):
                self.Linear.append(torch.nn.Linear(self.window_size, self.forcast_size))
        else:
          
            self.Linear = torch.nn.Linear(self.window_size, self.forcast_size)

    def forward(self, x):
        seq_last = x[:,-1:,:].detach()
        x = x - seq_last
        if self.individual:
            output = torch.zeros([x.size(0), self.forcast_size, x.size(2)],dtype=x.dtype).to(x.device)
            for i in range(self.channels):
                output[:,:,i] = self.Linear[i](x[:,:,i])
            x = output
        else:
            x = self.Linear(x.permute(0,2,1)).permute(0,2,1)
        x = x + seq_last
        return x

In [10]:
class TimeSeriesDataset(Dataset):
    def __init__(self, data, window_size, forecasting_size):
        self.data = data
        self.window_size = window_size
        self.forecasting_size = forecasting_size

    def __len__(self):
        return len(self.data) - self.window_size - self.forecasting_size + 1

    def __getitem__(self, idx):
        window_start = idx
        window_end = idx + self.window_size
        forecast_start = window_end
        forecast_end = window_end + self.forecasting_size

        window_data = self.data[window_start:window_end]
        forecast_data = self.data[forecast_start:forecast_end]

        return window_data, forecast_data

In [11]:
type(train.values)

numpy.ndarray

In [12]:
train_data = torch.as_tensor(train.values).to(torch.float32)

In [13]:
window_size = 192
forecasting_size = 24
model = NLinear(window_size, forecasting_size, False, 1)


# 훈련 데이터셋 생성
train_dataset = TimeSeriesDataset(train_data, window_size, forecasting_size)
train_dataloader = DataLoader(train_dataset, batch_size=24, shuffle=False)

In [14]:
x, y = next(iter(train_dataloader))

In [15]:
x.shape, y.shape # Batch, Sequence Length, Channel or Dimension

(torch.Size([24, 192, 1]), torch.Size([24, 24, 1]))

In [16]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cpu


In [17]:
train_losses = []

In [18]:
criterion = nn.L1Loss()
optimizer = optim.Adam(model.parameters(), lr=1e-5)

# 학습 설정
num_epochs = 50

# 학습 루프
model.train()
model = model.to(device)
for epoch in range(num_epochs):
    running_loss = 0.0

    for window_data, forecast_data in train_dataloader:
        # 데이터를 GPU로 이동
        window_data = window_data.to(device)
        forecast_data = forecast_data.to(device)

        # 모델 업데이트를 위한 그래디언트 초기화
        optimizer.zero_grad()

        # 모델 순전파
        outputs = model(window_data)

        # 손실 계산
        loss = criterion(outputs, forecast_data)

        # 역전파 및 가중치 업데이트
        loss.backward()
        optimizer.step()

        # 손실 누적
        running_loss += loss.item()

    # 에폭마다 손실 출력
    epoch_loss = running_loss / len(train_dataloader) 
    train_losses.append(epoch_loss)

    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss}")

print("학습 완료!")



Epoch 1/50, Loss: 141.44988843429186
Epoch 2/50, Loss: 116.98859666136013
Epoch 3/50, Loss: 109.47928754422946
Epoch 4/50, Loss: 106.4079105230074
Epoch 5/50, Loss: 104.85642916046227
Epoch 6/50, Loss: 103.75540429632854
Epoch 7/50, Loss: 102.81664797359919
Epoch 8/50, Loss: 101.96660161084051
Epoch 9/50, Loss: 101.18461866877952
Epoch 10/50, Loss: 100.45984025960455
Epoch 11/50, Loss: 99.78651450816594
Epoch 12/50, Loss: 99.1625723615494
Epoch 13/50, Loss: 98.58303198801256
Epoch 14/50, Loss: 98.04307066967337
Epoch 15/50, Loss: 97.5410542343602
Epoch 16/50, Loss: 97.07444559312721
Epoch 17/50, Loss: 96.63953851011502
Epoch 18/50, Loss: 96.23441801780511
Epoch 19/50, Loss: 95.85685246049866
Epoch 20/50, Loss: 95.50437360033187
Epoch 21/50, Loss: 95.17474530748099
Epoch 22/50, Loss: 94.86662832895915
Epoch 23/50, Loss: 94.57858619479765
Epoch 24/50, Loss: 94.30891685906192
Epoch 25/50, Loss: 94.05607204857608
Epoch 26/50, Loss: 93.81877284667381
Epoch 27/50, Loss: 93.5966758360219
Epoc