In [22]:
import random

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.optim import AdamW
from torch.utils.data import DataLoader, Dataset
import torch.backends.cudnn as cudnn

from tqdm import tqdm

In [4]:
torch.manual_seed(0)
torch.cuda.manual_seed(0)
torch.cuda.manual_seed_all(0)
np.random.seed(0)
cudnn.benchmark = False
cudnn.deterministic = True
random.seed(0)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
class GasDataset(Dataset):
    def __init__(self, df, seq_len, type, year=None):
        self.seq_len = seq_len

        self.df = df.loc[df['type'] == type]
        if year is not None:
            self.df = self.df.loc[self.df['year'] == year]
    
    def __len__(self):
        return len(self.df) - self.seq_len - 1

    def __getitem__(self, idx):
        data = self.df.iloc[idx:idx + self.seq_len]
        date, x = data.date, torch.from_numpy(np.array(data[['supply']], dtype=np.float32))
        y = torch.from_numpy(np.array(self.df.iloc[idx + self.seq_len, -1], dtype=np.float32))

        return x, y

In [11]:
train_rate = 0.2
df = pd.read_csv("data/kor_gas_day.csv")
train_len = int(len(df) * train_rate)

df_train, df_val = df.iloc[:train_len], df.iloc[train_len:]
train_dataset = GasDataset(df_train, 7, 'A', 2013)
val_dataset = GasDataset(df_val, 7, 'A', 2013)

SyntaxError: unterminated string literal (detected at line 6) (2690087885.py, line 6)

In [12]:
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTM, self).__init__()

        self.lstm = nn.LSTM(input_size, hidden_size, output_size)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        x, _ = self.lstm(x)
        x = self.fc(x)
        return x

In [27]:
def train(model, train_loader, val_loader, optimizer, criterion, epoch):
    model.train()
    for i in (range(epoch)):
        for batch_idx, (x, y) in enumerate(train_loader):
            x = x.squeeze().to(device)
            y = y.unsqueeze(-1).to(device)
            
            optimizer.zero_grad()
            output = model(x)
            
            loss = criterion(output, y)
            loss.backward()
            optimizer.step()

        if i % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, i, epoch,
                100. * i / epoch, loss.item()))

    if val_loader is not None:
        model.eval()
        loss = 0
        with torch.no_grad():
            for batch_idx, (x, y) in enumerate(val_loader):
                x = x.squeeze().to(device)
                y = y.unsqueeze(-1).to(device)
                output = model(x)
                loss += criterion(output, y)
        print('\nTest set: Average loss: {:.4f}'.format(loss / len(val_loader.dataset)))

        

In [24]:
train_rate = 0.2
df = pd.read_csv("data/kor_gas_day.csv")
train_len = int(len(df) * train_rate)

df_train, df_val = df.iloc[:train_len], df.iloc[train_len:]
train_dataset = GasDataset(df_train, 7, 'A')
val_dataset = GasDataset(df_val, 7, 'A')

In [25]:
input_szie = 7
output_szie = 1
hidden_szie = 128
learning_rate = 0.001
epochs = 1000
batch_szie = 32

model = LSTM(input_szie, hidden_szie, output_szie).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = torch.nn.MSELoss().to(device)

train_loader = DataLoader(train_dataset, batch_size=batch_szie, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_szie, shuffle=True)

In [28]:
train(model, train_loader, val_loader, optimizer, criterion, epochs)


Test set: Average loss: 2.4294
