# Libirary import

In [None]:
""" os """
import os

""" torch """
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms
from torchvision.transforms import Compose, Resize, ToTensor
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader, random_split, SubsetRandomSampler, WeightedRandomSampler
from torch.utils.data.sampler import SubsetRandomSampler
from torch import Tensor

"""glob"""
from glob import glob

""" tqdm """
import time
from tqdm import tqdm

"""Pandas"""
import pandas as pd

""" numpy """
import numpy as np
from numpy import argmax
from PIL import Image

"""JSON"""
import json

"""sklearn"""
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, RobustScaler

"""seaborn"""
import seaborn as sns

"""scipy"""
from scipy import io
from scipy import signal
from scipy.fft import fft, ifft,fftfreq
from scipy import stats

"""time"""
import time

"""PIL"""
from PIL import Image

import re
import shutil
import random
import matplotlib.pyplot as plt
import scipy

from IPython.display import Image

import imageio
import easydict

In [None]:
import gc
gc.collect()
torch.cuda.empty_cache()

# Path init

In [None]:
import sys
print(sys.version)
sys.path.append('/Function')
from preprocessing import get_x_y_pairs, preprocessing 

In [None]:
work_path = "" # Our data paths, it is not opened
os.chdir(work_path)
os.listdir()

In [None]:
len(os.listdir())

# Hyper parameters

In [None]:
args = easydict.EasyDict({
    #init
    "Task"              :"Time series forecasting",
    "Dataset"           :"GSD patients data", 
    "Patient"           :'more then 14 days patients',
    "Data_Balanced"     :"Unknown",
    "Filtering"         :"None",
    "Normalization"     :"RobustScaler",
    "Loss"              :"mse loss",
    "Preprocess"        :"",
    "Model_name"        :"TSMixer",
    "basic_path"        :"", # result save path
    
    #hyper parameters
    "seed"              :1,
    "lr"                :0.005,
    "batch_size"        :128,  # 4096
    
    "test_batch_size"   :4096,
    "window_size"       :48, # 12 hours
    "forcast_size"      :4, # 1 hour
    "epochs"            :100,
    "no_cuda"           :False,
    "log_interval"      :100,
    
})

# Set the seed and GPU

In [None]:
torch.manual_seed(args.seed)
use_cuda = not args.no_cuda and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")


kwargs = {'num_workers':0,'pin_memory':True} if use_cuda else {}

!nvcc --version
#https://pytorch.org/get-started/previous-versions/
print('--------------------------------------')
print('현재 torch 버전:',torch.__version__)
print('학습을 진행하는 기기:',device)
print('cuda index:', torch.cuda.current_device())
print('gpu 개수:', torch.cuda.device_count())
print('graphic name:', torch.cuda.get_device_name())

# Model load

In [None]:
import sys
sys.path.append('/Model')
from N_Linear import LTSF_NLinear
from D_Linear import LTSF_DLinear
from TSMixer import TSMixer
from PatchTST import PatchTST

from Model_performance import model_performance_forecasting

# 1. TS Mixer

In [None]:
input  = torch.randn(256, args.window_size, 7) 
target = torch.randn(256, args.forcast_size, 7) 

model = TSMixer(sequence_length=args.window_size, prediction_length=args.forcast_size, input_channels=7, output_channels=7, num_blocks=8)

output = model(input)
output.shape

# 2. MLinear

In [None]:
input  = torch.randn(256, args.window_size, 7) 
target = torch.randn(256, args.forcast_size, 7) 


model = LTSF_NLinear(window_size=args.window_size, forcast_size=args.forcast_size, individual=False, feature_size=7)



output = model(input)
output.shape

# 3.PatchTST

In [None]:
input  = torch.randn(256, args.window_size, 7) 
target = torch.randn(256, args.forcast_size, 7) 

model = PatchTST(channel = 7, window_size=args.window_size, forcast_size=args.forcast_size)



output = model(input)
output.shape

# Custom Dataset

In [None]:
class CustomDataset(Dataset):
    
    def __init__(self, data_indexs, data_df, transform=None):
        self.data_indexs = data_indexs
        self.data_df     = data_df
        self.transform = transform
    
    def __getitem__(self, idx):
        
        start = self.data_indexs[idx]
        X  = self.data_df[start:start+args.window_size] # window
        Y  = self.data_df[start+args.window_size:start+args.window_size+args.forcast_size] # forecast
        
        
        """data"""
        year      = np.expand_dims(np.array(X['Year']),axis=1)
        month     = np.expand_dims(np.array(X['Month']),axis=1)
        day       = np.expand_dims(np.array(X['Day']),axis=1)
        minutes   = np.expand_dims(np.array(X['Minutes']),axis=1)
        min_sin   = np.expand_dims(np.array(X['Minutes_sin']),axis=1)
        min_cos   = np.expand_dims(np.array(X['Minutes_cos']),axis=1)
        glu       = np.expand_dims(np.array(X['Glucose_level_original']),axis=1)
        

        data = np.concatenate((year, month, day, minutes, min_sin, min_cos, glu),axis=1)
        
        
        """get label"""
        
        year      = np.expand_dims(np.array(Y['Year']),axis=1)
        month     = np.expand_dims(np.array(Y['Month']),axis=1)
        day       = np.expand_dims(np.array(Y['Day']),axis=1)
        minutes   = np.expand_dims(np.array(Y['Minutes']),axis=1)
        min_sin   = np.expand_dims(np.array(Y['Minutes_sin']),axis=1)
        min_cos   = np.expand_dims(np.array(Y['Minutes_cos']),axis=1)
        glu       = np.expand_dims(np.array(Y['Glucose_level_original']),axis=1)
        

        label = np.concatenate((year, month, day, minutes, min_sin, min_cos, glu),axis=1) # B, 4, 7

        
        return data, label
    
    def __len__(self):
        return len(self.data_indexs)


# Data paths

In [None]:
data_paths = os.listdir()
data_paths
len(data_paths)

# Patients selection(more than 14 days)

In [None]:
new_data_paths = []

for i in range(0,len(data_paths),1):
    data_path = data_paths[i]
    df = pd.read_csv(data_path, engine='python', encoding='utf-8')
    
    day=np.round(len(df)/(4*24), 1)
    if day < 14:
        pass
    else:
        new_data_paths.append(data_path)

In [None]:
len(new_data_paths)
data_paths = new_data_paths
len(data_paths)

# Experiment

In [None]:
for k in range(0,len(data_paths),1):
    
    
    """init"""
    data_path           = data_paths[k]
    args.experiment_num = k
    df                  = pd.read_csv(data_path)
    
    """make folder"""
    result_save_path = args.basic_path + args.Model_name +'/'+str(args.experiment_num)

    folder_path = result_save_path
    try:
        if not(os.path.isdir(folder_path)):
            os.makedirs(os.path.join(folder_path))
    except OSError as e:
        if e.errno != errno.EEXIST:
            print("Failed to create directory!!!!!")
            raise
            
            
    """---------------------------------------------Dataset split---------------------------------------------"""
    train_df, valid_df, test_df = preprocessing(df, RobustScaler())
    print(len(train_df), len(valid_df), len(test_df))
    
    """---------------------------------------------Make pair---------------------------------------------"""
    train_indexs   = get_x_y_pairs(train_df, args.window_size, args.forcast_size)
    valid_indexs   = get_x_y_pairs(valid_df, args.window_size, args.forcast_size)
    test_indexs  = get_x_y_pairs(test_df, args.window_size, args.forcast_size) # 전체 시간대

    print(len(train_indexs),len(valid_indexs),len(test_indexs))
    
    """---------------------------------------------Custom dataset---------------------------------------------"""
    train_dataset      = CustomDataset(train_indexs, train_df, transforms.Compose([transforms.ToTensor()]))
    validation_dataset = CustomDataset(valid_indexs, valid_df, transforms.Compose([transforms.ToTensor()]))
    test_dataset       = CustomDataset(test_indexs, test_df, transforms.Compose([transforms.ToTensor()])) # 전체 시간대

    print(len(train_dataset),len(validation_dataset), len(test_dataset))
    
    """---------------------------------------------Data Loader---------------------------------------------"""
    """Train"""
    args.train_loader = torch.utils.data.DataLoader(
        dataset=train_dataset,
        batch_size = args.batch_size,
        shuffle = False, 
        **kwargs
    )

    """Validation"""
    args.validation_loader = torch.utils.data.DataLoader(
        dataset=validation_dataset,
        batch_size = args.batch_size,
        shuffle = False,
        **kwargs
    )

    """Test"""
    args.test_loader = torch.utils.data.DataLoader(
        dataset=test_dataset,
        batch_size = args.test_batch_size,
        shuffle = False,
        **kwargs
    )

    print("Length of the train_loader:", len(args.train_loader))
    print("Length of the val_loader:", len(args.validation_loader))
    print("Length of the test_loader:", len(args.test_loader))
    
    """---------------------------------------------optimizer---------------------------------------------"""
    args.device = device
    
    #args.model = TSMixer(sequence_length=args.window_size, prediction_length=args.forcast_size, input_channels=7, output_channels=7, num_blocks=8).to(device)
    #args.model = LTSF_NLinear(window_size=args.window_size, forcast_size=args.forcast_size, individual=False, feature_size=7).to(device)
    args.model= PatchTST(channel = 7, window_size=args.window_size, forcast_size=args.forcast_size).to(device)
    
    
    args.optimizer = optim.Adam(args.model.parameters(), lr=args.lr)
    args.criterion = nn.MSELoss()
    
    
    """---------------------------------------------Train---------------------------------------------"""
    path = folder_path
    
    train_loader      = args.train_loader
    validation_loader = args.validation_loader

    model     = args.model
    optimizer = args.optimizer
    criterion = args.criterion

    device = args.device

    train_losses        = []
    avg_train_losses    = []
    Train_baths_ACC     = [] 
    Train_ACC           = [] 
    Train_AUROC         = []


    """Validaion"""
    valid_losses        = []
    avg_valid_losses    = []
    Validation_ACC      = []
    Valid_ACC_per_Class = []
    Validation_AUROC    = []

    best_loss  = 100000000000
    #best_MAE   = 100000
    #best_MSE   = 100000
    best_MAPE  = 100000000000
    best_epoch = 0
    best_model_save_path = path +'/'+ 'best model of experiment ' + str(args.experiment_num)

    start = time.time()

    for epoch in range(1, args.epochs + 1):

        """Train"""
        model.train()
        train_loss = 0

        for batch_idx, (data,target) in enumerate(train_loader):
            data, target = data.to(device, dtype=torch.float), target.to(device, dtype=torch.float)
            optimizer.zero_grad()

            output = model(data).to(device)

            """loss"""
            loss = 0
            for i in range(output.shape[-1]):
                pred   = output[:,:,i]
                actual = target[:,:,i]
                loss += criterion(pred.view_as(actual), actual)

            """update and save loss"""
            train_loss += loss
            loss.backward()
            optimizer.step()

            if batch_idx % args.log_interval == 0:

                #1.
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.item()))

                
        """Validation"""
        model.eval()

        valid_loss = 0
        total = len(validation_loader.dataset)

        true_labels = []
        pred_labels = []

        with torch.no_grad():
            for batch_idx, (data, target) in enumerate(validation_loader):
                data, target = data.to(device, dtype=torch.float), target.to(device, dtype=torch.float)

                output = model(data).to(device)
                
                """loss"""
                loss = 0
                for i in range(output.shape[-1]):
                    pred   = output[:,:,i]
                    actual = target[:,:,i]
                    loss += criterion(pred.view_as(actual), actual)
                valid_loss += loss
                
                true_labels.append(target.detach().cpu().numpy()) 
                pred_labels.append(output.detach().cpu().numpy())

                

            """caclulate performance"""
            valid_df = model_performance_forecasting(pred_labels, true_labels)


            """Loss and ACC """
            train_loss /= len(train_loader)
            valid_loss /= len(validation_loader)
            avg_train_losses.append(train_loss.cpu().numpy())
            avg_valid_losses.append(valid_loss.cpu().numpy())

            print('------------------------------------------')
            print('Valid set: Average loss: {:.4f}'.format(valid_loss))
            print('------------------------------------------')
            print('Valid set: Year MAPE: {:.4f}'.format(valid_df[valid_df['Varitare'] == 'year']['MAPE'].values[0]))
            print('Valid set: Month MAPE: {:.4f}'.format(valid_df[valid_df['Varitare'] == 'month']['MAPE'].values[0]))
            print('Valid set: Day MAPE: {:.4f}'.format(valid_df[valid_df['Varitare'] == 'day']['MAPE'].values[0]))
            print('Valid set: Minutes MAPE: {:.4f}'.format(valid_df[valid_df['Varitare'] == 'minutes']['MAPE'].values[0]))
            print('Valid set: Min_sin MAPE: {:.4f}'.format(valid_df[valid_df['Varitare'] == 'min_sin']['MAPE'].values[0]))
            print('Valid set: Min_cos MAPE: {:.4f}'.format(valid_df[valid_df['Varitare'] == 'min_cos']['MAPE'].values[0]))
            print('-------------------------------------------')
            print('Valid set: Glucose MAE: {:.4f}'.format(valid_df[valid_df['Varitare'] == 'glu']['MAE'].values[0]))
            print('Valid set: Glucose MSE: {:.4f}'.format(valid_df[valid_df['Varitare'] == 'glu']['MSE'].values[0]))
            print('Valid set: Glucose MAPE: {:.4f}'.format(valid_df[valid_df['Varitare'] == 'glu']['MAPE'].values[0]))
            print('-------------------------------------------')

            """Save best model"""
            if valid_df[valid_df['Varitare'] == 'glu']['MAPE'].values[0] < best_MAPE:
                torch.save(model, best_model_save_path)
                print("best model was saved.")
                print('-------------------------------------------')
                best_loss = valid_loss
                best_epoch = epoch
                best_MAPE = valid_df[valid_df['Varitare'] == 'glu']['MAPE'].values[0]
                #best_MAE   = MAE
                #best_MSE   = MSE
                #best_MAPE  = MAPE
                
                valid_df['best_epoch'] = best_epoch
                valid_df.to_excel(folder_path +'/Valid Result.xlsx',index=True)
                
            print('----------------------------------------------------------------')

    
    """figure save"""
    fig = plt.figure(figsize=(10,8))
    plt.plot(range(1,len(avg_train_losses)+1),avg_train_losses, label='Training Loss')
    plt.plot(range(1,len(avg_valid_losses)+1),avg_valid_losses, label='Validation Loss')

    plt.xlabel('epochs')
    plt.ylabel('loss')

    plt.xlim(0, len(avg_train_losses)+1) 

    plt.grid(True)
    plt.legend()
    plt.tight_layout()
    plt.savefig(path +'/Loss.png', dpi = 300) 
    plt.cla()
    plt.clf()
    plt.close()
    
    """---------------------------------------------Test---------------------------------------------"""
    test_loader  = args.test_loader
    
    device = args.device
    criterion = args.criterion

    best_model_save_path = path +'/'+ 'best model of experiment ' + str(args.experiment_num)
    model = torch.load(best_model_save_path)
    model.to(device)

    model.eval()

    test_loss = 0
    true_labels = []
    pred_labels = []

    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(test_loader):

            data, target = data.to(device, dtype=torch.float), target.to(device, dtype=torch.float)

            output = model(data).to(device)

            """loss"""
            loss = 0
            for i in range(output.shape[-1]):
                pred   = output[:,:,i]
                actual = target[:,:,i]
                loss += criterion(pred.view_as(actual), actual)
            test_loss += loss


            true_labels.append(target.detach().cpu().numpy()) 
            pred_labels.append(output.detach().cpu().numpy())


        test_loss /= len(test_loader)
        print('Test set: Average loss(MSE): {:.4f}'.format(test_loss))
        test_df = model_performance_forecasting(pred_labels, true_labels)
        test_df.to_excel(folder_path +'/Test Result.xlsx',index=True)
        

    

    """---------------------------------------------Save---------------------------------------------"""
    args_copy = args.copy()
    del_list = ['train_loader','validation_loader','test_loader','model']

    for i in range(len(del_list)):
        del args_copy[del_list[i]]

    df = pd.DataFrame(args_copy,index = [0]).T
    df.to_excel(folder_path +'/Settings.xlsx',index=True)

    print('-------------------------------[{}/{}]-------------------------------'.format(k, len(data_paths)))