### Import dataset

In [None]:
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 100)
import matplotlib.pyplot as plt

In [None]:
df1 = pd.read_csv('/content/drive/MyDrive/비타민/시계열/final_elec.csv')
df2 = pd.read_csv('/content/drive/MyDrive/비타민/시계열/final_gas.csv', encoding = 'cp949')
df3 = pd.read_csv('/content/drive/MyDrive/비타민/시계열/final_kau.csv')

In [None]:
df2.rename(columns = {'date' : 'Date'}, inplace = True)

### Merge dataset

In [None]:
combined = pd.merge(df1[['Date', '최대전력(MW)', '공급예비력(MW)', '공급예비율(%)']], df2, on = 'Date', how = 'inner')
combined

In [None]:
df3.rename(columns = {'date': 'Date'}, inplace = True)

In [None]:
total_df = pd.merge(combined, df3[['Date', '종가', '거래량']], on = 'Date', how = 'inner')
total_df

In [None]:
total_df['종가'] = total_df['종가'].str.replace(',', '').astype(float)
total_df['거래량'] = total_df['거래량'].str.replace(',', '').astype(float)

In [None]:
total_df.isnull().sum()

In [None]:
total_df['gas'].fillna(method = 'bfill', inplace = True)
total_df.isnull().sum()

In [None]:
total_df['diff_close'] = total_df['종가'].pct_change(1)

In [None]:
total_df = total_df.dropna()

In [None]:
total_df = total_df[['최대전력(MW)', '공급예비력(MW)', '공급예비율(%)', 'gas', '종가', '거래량',
       'diff_close']]

### Choose dataset & Hyperparameter setting

In [None]:
data, dataname = total_df, 'total_df' # stock_df / total_df
TARGET = "diff_close"                      # "diff_close"
SEQ_SIZE = 120                        # 60 / 120
PRED_SIZE = 20
BATCH_SIZE = 8                    #  4 / 8
HIDDEN_SIZE = 128               # 64 / 128
EPOCHS = 1000

### Make train datset

In [None]:
def split_xy(dataset, time_steps, y_column):
    x, y = list(), list()
    for i in range(len(dataset)):
        x_end_number = i + time_steps
        y_end_number = x_end_number + y_column

        if y_end_number > len(dataset):
            break
        tmp_x = dataset.iloc[i:x_end_number, :]  # Adjusted for Pandas
        tmp_y = dataset.iloc[x_end_number:y_end_number, :].loc[:, TARGET]
        x.append(tmp_x.values)  # Convert to numpy array
        y.append(tmp_y.values)  # Convert to numpy array

    return np.array(x), np.array(y)

X, y = split_xy(data, SEQ_SIZE, PRED_SIZE)
print(X[0,:],"\n", y[0])
print("X size : ", X.shape)
print("y size : ", y.shape)

### Define X_test

In [None]:
X_test = data.tail(SEQ_SIZE).values.reshape(1, SEQ_SIZE, data.shape[1])
print(X_test)
print("X_test size : ", X_test.shape)

### Standardization

In [None]:
from sklearn.preprocessing import StandardScaler

X = X.reshape(X.shape[0], X.shape[1] * X.shape[2])
scaler = StandardScaler()
scaler.fit(X)
X = scaler.transform(X)
X = X.reshape(X.shape[0], SEQ_SIZE, data.shape[1])

X_test = X_test.reshape(X_test.shape[0], X_test.shape[1] * X_test.shape[2])
X_test = scaler.transform(X_test)
X_test = X_test.reshape(X_test.shape[0], SEQ_SIZE, data.shape[1])

print("X size : ", X.shape)
print("X_test size : ", X_test.shape)

### Split train-validation dataset

In [None]:
# to DataLoader
import torch
from torch.utils.data import Dataset, DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

X_train, X_valid, y_train, y_valid = train_test_split(X, y, random_state = 1, test_size = 0.2)

# to tensor
X_train = torch.tensor(X_train.astype(np.float32), dtype = torch.float32)
X_valid = torch.tensor(X_valid.astype(np.float32), dtype = torch.float32)
y_train = torch.tensor(y_train.astype(np.float32), dtype = torch.float32)
y_valid = torch.tensor(y_valid.astype(np.float32), dtype = torch.float32)

# to DataLoader
train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size = BATCH_SIZE, shuffle = True)
val_loader = DataLoader(TensorDataset(X_valid, y_valid), batch_size = BATCH_SIZE, shuffle = False)

### Modeling

In [None]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.nn import Parameter
from torch import Tensor
from torch.optim.lr_scheduler import ReduceLROnPlateau

import copy
class EarlyStopping:
    def __init__(self, patience = 5, min_delta = 0, restore_best_weights = True):
        self.patience = patience
        self.min_delta = min_delta
        self.restore_best_weights = restore_best_weights
        self.best_model = None
        self.best_loss = None
        self.counter = 0
        self.status = ""

    def __call__(self, model, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
            self.best_model = copy.deepcopy(model.state_dict())
        elif self.best_loss - val_loss >= self.min_delta:
            self.best_model = copy.deepcopy(model.state_dict())
            self.best_loss = val_loss
            self.counter = 0
            self.status = f"Improvement found, counter reset to {self.counter}"
        else:
            self.counter += 1
            self.status = f"No improvement in the last {self.counter} epochs"
            if self.counter >= self.patience:
                self.status = f"Early stopping triggered after {self.counter} epochs."
                if self.restore_best_weights:
                    model.load_state_dict(self.best_model)
                return True
        return False

class GRUModel(nn.Module):
    def __init__(self,input_dim):
        super(GRUModel,self).__init__()
        self.gru = nn.GRU(input_dim, hidden_size = HIDDEN_SIZE, batch_first = True)
        self.dropout = nn.Dropout(0.2)
        self.seq = nn.Sequential(nn.Linear(HIDDEN_SIZE, 32),
                                nn.ReLU(),
                                nn.Linear(32, 32),
                                nn.ReLU(),
                                nn.Linear(32, 32),
                                nn.ReLU(),
                                nn.Linear(32, 32),
                                nn.ReLU(),
                                nn.Linear(32, PRED_SIZE)
                                )
    def forward(self,x):
        x,_ = self.gru(x)
        x = self.dropout(x[:,-1,:])
        x = self.seq(x)
        return x

class RMSELoss(nn.Module):
    def __init__(self):
        super(RMSELoss,self).__init__()
        self.mse = nn.MSELoss()

    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))

In [None]:
model = GRUModel(X_train.shape[2])
criterion = RMSELoss()
optimizer = optim.Adam(model.parameters(), lr = 0.001)
scheduler = ReduceLROnPlateau(optimizer, 'min', factor = 0.1, patience = 20, verbose = True)

### RUN!!

In [None]:
epoch_counter = 0
patience = 30
best_loss = float('inf')
done = False
es = EarlyStopping(patience=patience)
tr_losses_fp, val_losses_fp = [],[]

while not done and epoch_counter<EPOCHS:
    epoch_counter+=1

    # train
    model.train()
    train_losses = []
    for x_batch,y_batch in train_loader:
        optimizer.zero_grad()
        output = model(x_batch)
        loss = criterion(output,y_batch)
        loss.backward()
        optimizer.step()
        train_losses.append(loss.item())
    train_loss = np.mean(train_losses)
    tr_losses_fp.append(train_loss)

    # validation
    model.eval()
    val_losses = []
    with torch.no_grad():
        for x_batch, y_batch in val_loader:
            output = model(x_batch)
            loss = criterion(output, y_batch)
            val_losses.append(loss.item())
    val_loss = np.mean(val_losses)
    val_losses_fp.append(val_loss)
    scheduler.step(val_loss)

    if es(model, val_loss):
        done = True

    if val_loss < best_loss:
        best_loss = val_loss
        best_train_loss = train_loss

    print(f"Epoch {epoch_counter}/{EPOCHS}, Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}")

In [None]:
print(f"Best validation loss : {best_loss}")

In [None]:
print(f"Best train loss : {best_train_loss}")

### Visualize train-validation loss

In [None]:
import matplotlib.pyplot as plt
plt.plot(range(len(tr_losses_fp)),tr_losses_fp,color='blue',label='train_loss')
plt.plot(range(len(val_losses_fp)),val_losses_fp,color='red',label='val_loss')
plt.legend()
plt.show()

### Prediction

In [None]:
# evaluation
model.eval()
with torch.no_grad():
    X_test = torch.tensor(X_test.astype(np.float32), dtype = torch.float32)
    pred = model(X_test)

pred = pred.detach().numpy()
print(pred)

In [None]:
if TARGET == "diff_close" :
    endPrice = data['종가'].iloc[-1]
    pred_close = []

    for i in pred[0] :
        endPrice = endPrice + endPrice*i
        pred_close.append(endPrice)

    pred = np.array(pred_close).reshape(1, PRED_SIZE)
    pred
else :
    pass

### Plotting

In [None]:
dates = ["2024-08-26", "2024-08-27", "2024-08-28", "2024-08-29", "2024-08-30",
         "2024-09-02", "2024-09-03", "2024-09-04", "2024-09-05", "2024-09-06",
         "2024-09-09", "2024-09-10", "2024-09-11", "2024-09-12", "2024-09-13",
         "2024-09-19", "2024-09-20", "2024-09-23", "2024-09-24", "2024-09-25"]

# Convert dates to pandas datetime for better handling in plotting
date_indices = pd.to_datetime(dates)
pred = np.array(pred).reshape(PRED_SIZE)

# Plotting
plt.figure(figsize=(10, 5))
plt.plot(date_indices, pred, color='red', alpha=0.6, label='Prediction')
plt.legend()
plt.gcf().autofmt_xdate()  # Auto-format the dates for better readability
plt.title("Prediction Over Specified Dates")
plt.xlabel("Date")
plt.ylabel("Prediction Value")
plt.grid(True)
plt.show()

In [None]:
pd.DataFrame(pred, columns = ['Predicted_Close'])