In [1]:
from ML_functions import make_dataframe

_, df = make_dataframe(60,20)

In [9]:
import pandas as pd
for i in range(1,12):
    TIME = pd.DataFrame({'label' : [i] * 60436})
    globals()['df_'+str(i)] = df[60436*(i-1):60436*i].reset_index(drop=True)
    globals()['df_'+str(i)] = pd.concat([globals()['df_'+str(i)], TIME], axis = 1)
    globals()['df_'+str(i)].drop(columns = 'TIME', inplace = True)

In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from tqdm import tqdm
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from sklearn.preprocessing import MinMaxScaler

## Model

In [8]:
class moving_avg(nn.Module):
    def __init__(self, kernel_size, stride):
        super(moving_avg, self).__init__()
        self.kernel_size = kernel_size
        self.stride = stride
        self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride = stride)
    
    def forward(self, x):
        front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1)
        end = x[:, -1:, :].repeat(1, (self.kernel_size-1) // 2, 1)
        x = torch.cat([front, x, end], dim = 1)
        x = self.avg(x.permute(0,2,1))
        x = x.permute(0,2,1)
        return x
    
class series_decomp(nn.Module):
    def __init__(self, kernel_size):
        super(series_decomp, self).__init__()
        self.moving_avg = moving_avg(kernel_size, stride = 1)

    def forward(self, x):
        moving_mean = self.moving_avg(x)
        res = x - moving_mean
        return res, moving_mean
    

class DLinear(nn.Module):
    """ D-Linear """

    def __init__(self, window_size, forecast_size, feature_size, kernel_size = 25, individual = False):
        super(DLinear, self).__init__()

        self.seq_len = window_size
        self.pred_len = forecast_size
        self.channels = feature_size
        self.decomposition = series_decomp(kernel_size)
        self.individual = individual

        if self.individual:
            self.Linear_Seasonal = nn.ModuleList()
            self.Linear_Trend = nn.ModuleList()

            for i in range(self.channels):
                self.Linear_Seasonal.append(nn.Linear(self.seq_len, self.pred_len))
                self.Linear_Trend.append(nn.Linear(self.seqlen, self.pred_len))

        else:
            self.Linear_Seasonal = nn.Linear(self.seq_len, self.pred_len)
            self.Linear_Trend = nn.Linear(self.seq_len, self.pred_len)

    def forward(self, x):
        seasonal_init, trend_init = self.decomposition(x)
        seasonal_init, trend_init = seasonal_init.permute(0,2,1), trend_init.permute(0,2,1)

        if self.individual:
            seasonal_output = torch.zeros([seasonal_init.size(0), seasonal_init(1), self.pred_len], dtype = seasonal_init.dtype).to(seasonal_init.device)
            trend_output = torch.zeros([trend_init.size(0), trend_init.size(1), self.pred_len], dtype = trend_init.dtype).to(trend_init.device)

            for i in range(self.channels):
                seasonal_output[:, i, :] = self.Linear_Seasonal[i](seasonal_init[:, i, :])
                trend_output[:, i, :] = self.Linear_Trend[i](trend_init[:, i, :])

        else:
            seasonal_output = self.Linear_Seasonal(seasonal_init)
            trend_output = self.Linear_Trend(trend_init)

        x = seasonal_output + trend_output
        return x.permute(0,2,1) # to [Batch_size, Output Length, Channel]
    

class NLinear(nn.Module):
    def __init__(self, window_size, forecast_size, feature_size, individual = False):
        super(NLinear, self).__init__()
        self.window_size = window_size
        self.forecast_size = forecast_size
        self.channels = feature_size
        self.individual = individual

        if self.individual:
            self.Linear = torch.nn.ModuleList()
            for i in range(self.channels):
                self.Linear.append(torch.nn.Linear(self.window_size, self.forecast_size))

        else:
            self.Linear = torch.nn.Linear(self.window_size, self.forecast_size)

    def forward(self, x):
        seq_last = x[:, -1:, :].detach()
        x = x-seq_last

        if self.individual:
            output = torch.zeros([x.size(0), self.forecast_size, x.size(2)],dtype = x.dtype).to(x.device)
            for i in range(self.channels):
                output[:, :, i] = self.Linear[i](x[:, :, i])
            x = output
        else:
            x = self.Linear(x.permute(0,2,1)).permute(0,2,1)

        x = x + seq_last
        
        return x

In [9]:
scaler = MinMaxScaler()

def targetParsing(data, target, index = False):
    if index == False:
        result = data.loc[:, target]
    else:
        result = data.iloc[:, target]
    
    return list(result.index), result.to_numpy()

def transform(raw, check_inverse=False):
    data = raw.reshape(-1,1)
    if check_inverse == False:
        return scaler.fit_transform(data)
    else:
        return scaler.inverse_transform(data)[:, 0]
    
class WindowDataset(Dataset):
    def __init__(self,y, input_window, output_window, stride = 1):
        L = y.shape[0]
         
        # Stride 씩 움직일 때 마다 생기는 총 sample 개수 
        num_samples = (L - input_window - output_window) // stride + 1

        X,Y = np.zeros([input_window, num_samples]), np.zeros([output_window, num_samples])

        for i in np.arange(num_samples):
            start_x = stride * i
            end_x = start_x + input_window
            X[:, i] = y[start_x : end_x]

            start_y = stride*i + input_window
            end_y = start_y + output_window

            Y[:, i] = y[start_y : end_y]

        X = X.reshape(X.shape[0], X.shape[1], 1).transpose((1,0,2)) # X : (num_samples, input_window, 1)
        Y = Y.reshape(Y.shape[0], Y.shape[1], 1).transpose((1,0,2)) # Y : (num_samples, output_window, 1)

        self.X, self.Y = X, Y
    def __len__(self):
        return len(self.X) 
    
    def __getitem__(self, idx):
        return self.X[idx], self.Y[idx]
    
def customDataLoader(data, window_size:int, forecast_size : int, batch_size:int):
    train = transform(data)[:-window_size, 0]
    dataset = WindowDataset(train, window_size, forecast_size)
    result = DataLoader(dataset, batch_size = batch_size)
    return result

class trainer():
    def __init__(self, data, dataloader, window_size, forecast_size, name='DLinear',feature_size=1 ,lr = 0.001):
        self.device = torch.device('cuda') if torch.cuda.is_available() else 'cpu'
        self.data = data
        self.trains = transform(data)[:-window_size, 0]
        self.dataloader = dataloader
        self.window_size = window_size
        self.forecast_size = forecast_size

        if name == 'DLinear':
            self.model = DLinear(window_size, forecast_size).to(self.device)
        else:
            self.model = NLinear(window_size, forecast_size).to(self.device)
        
        self.feature_size = feature_size
        self.name = name
        self.criterion = nn.MSELoss()
        self.optimizer = torch.optim.Adam(paras = self.model.parameters(), lr = lr)

    def train(self, epoch = 50):
        self.model.train()
        progress = tqdm(range(epoch))
        loss_list = []

        for _ in progress:
            batch_loss = 0.0

            for (inputs, outputs) in self.dataloader:
                self.optimizer.zero_grad()
                output = self.model(inputs.float().to(self.device))
                loss = self.criterion(output, outputs.float().to(self.device))
                loss.backward()
                self.optimizer.step()
                back_loss += loss.item()

            loss_list.append(batch_loss.cpu())
            progress.set_description("loss : {:0.6f}".format(batch_loss.cpu().item() / len(self.dataloader)))
        plt.plot(loss_list)

    def evaluate(self):
        window_size = self.window_size
        input = torch.tensor(self.trains[-window_size:]).reshape(1,-1,1).float().to(self.device)
        self.model.eval()
        predictions = self.model(input)
        return predictions.detach().cpu().numpy()

    def implement(self):
        process = trainer(self.data, self.dataloader, self.window_size, self.forecast_size, self.feature_size, self.name)
        process.train()
        evaluate = process.evaluate()
        result = transform(evaluate, check_inverse=True)
        return result
    
    
def figureplot(date,data,pred,window_size,forecast_size):
    datenum=mdates.date2num(date)
    len=data.shape[0]
    fig, ax = plt.subplots(figsize=(20,5))
    ax.plot(datenum[len-window_size:len], data[len-window_size:], label="Real")
    ax.plot(datenum[len-forecast_size:len], pred, label="LSTM-linear")
    locator = mdates.AutoDateLocator()
    formatter = mdates.AutoDateFormatter(locator)
    ax.xaxis.set_major_locator(locator)
    ax.xaxis.set_major_formatter(formatter)
    plt.legend()
    plt.show()

In [10]:
raw=pd.read_csv('./서인천IC-부평IC 평균속도.csv',encoding='CP949').set_index('집계일시').drop('Unnamed: 0',axis=1)

In [11]:
date, data = targetParsing(raw, 0, index = True)

In [12]:
window_size = 60
forecast_size = 5

In [13]:
dataloader = customDataLoader(data, window_size, forecast_size, batch_size = 4)

In [17]:
data

array([ 98.63, 100.53,  99.86,  99.34,  93.64,  93.92,  91.78,  90.57,
        87.03,  81.75,  77.13,  76.01,  76.09,  46.5 ,  56.69,  54.91,
        67.57,  69.82,  79.98,  82.14,  81.02,  82.58,  85.55,  91.8 ,
        91.87,  93.9 ,  95.61,  96.73,  94.15,  99.36, 100.24,  97.64,
        94.25,  90.01,  86.25,  86.81,  88.37,  85.6 ,  84.18,  83.35,
        81.26,  77.43,  84.89,  84.12,  81.19,  85.15,  93.34,  98.84,
       100.22, 100.22,  98.65,  97.45,  95.32,  89.67,  75.37,  48.91,
        48.06,  58.29,  70.01,  74.31,  76.01,  74.91,  69.59,  64.77,
        78.63,  64.41,  51.38,  59.73,  77.89,  85.24,  90.58,  97.63,
        99.36,  98.24,  96.13,  93.54,  92.1 ,  90.03,  77.64,  61.32,
        59.25,  59.08,  58.55,  56.2 ,  68.7 ,  63.62,  55.86,  57.99,
        61.51,  62.23,  56.89,  48.77,  65.57,  75.84,  78.58,  89.09,
        97.29,  96.91,  96.52,  95.49,  93.89,  94.44,  92.  ,  91.81,
        90.23,  86.25,  84.58,  81.55,  84.2 ,  84.33,  82.21,  77.65,
      

In [16]:
for a,b in dataloader:
    print(a,b)
    break

tensor([[[0.9278],
         [0.9511],
         [0.9429],
         [0.9365],
         [0.8667],
         [0.8701],
         [0.8439],
         [0.8291],
         [0.7857],
         [0.7211],
         [0.6645],
         [0.6507],
         [0.6517],
         [0.2892],
         [0.4141],
         [0.3923],
         [0.5473],
         [0.5749],
         [0.6994],
         [0.7258],
         [0.7121],
         [0.7312],
         [0.7676],
         [0.8442],
         [0.8450],
         [0.8699],
         [0.8908],
         [0.9046],
         [0.8730],
         [0.9368],
         [0.9476],
         [0.9157],
         [0.8742],
         [0.8222],
         [0.7762],
         [0.7830],
         [0.8022],
         [0.7682],
         [0.7508],
         [0.7407],
         [0.7151],
         [0.6681],
         [0.7595],
         [0.7501],
         [0.7142],
         [0.7627],
         [0.8630],
         [0.9304],
         [0.9473],
         [0.9473],
         [0.9281],
         [0.9134],
         [0.