In [1]:
from google.colab import drive
drive.mount("/content/gdrvie/")


Mounted at /content/gdrvie/


In [2]:
import os
os.chdir("/content/gdrvie/MyDrive/SKT")
os.getcwd()

'/content/gdrvie/MyDrive/SKT'

In [3]:
#오류가 어디서 났는지 자세히 보여주게끔 하는 코드
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [4]:
#패키지들
import os
import sys
import argparse
import json

import pandas as pd
from time import time

import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader

from utils.utils import *
from utils.torchUtils import *
from layers.models import *
from layers.graphLearningLayers import *
from layers.nriLayers import *
from utils.dataloader import *

In [5]:
!nvidia-smi

Tue Oct 25 13:08:22 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  A100-SXM4-40GB      Off  | 00000000:00:04.0 Off |                    0 |
| N/A   30C    P0    43W / 400W |      0MiB / 40536MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [6]:
#args
args = argparse.Namespace(
    # data path
    data_type='skt',
    data_path='./data/skt',
    pred_steps=3,
    tr=0.7,
    val=0.2,
    standardize=True, #action='store_true'
    exclude_TA=True,  #action='store_true'
    lag=7,
    cache_file='./data/cache.pickle',
    # training options
    batch_size=2,
    fine_tunning_every=12, #nri multistep에 처음생김
    epoch=30,
    epoch_online=30, #nri multistep에 처음생김
    lr=0.001,
    kl_loss_penalty=0.01,
    patience=5,
    delta=0.01,
    print_log_option=10,
    verbose=True,  #action='store_true'
    train_ar = True, #action='store_true'이고 아마 이게 True
    train_online = False, #action='store_true'이고 아마 이게 False
    # reg_loss_penalty=1e-2,
    # kl_weight=0.1,
    # gradient_max_norm=5,

    # model options
    model_path='./data/skt/multi_NRI',
    num_blocks=3,
    k=2,
    top_k=4,
    embedding_dim=256,
    #alpha=3,
    beta=0.5,
    tau=0.1,
    model_name='latest_checkpoint.pth.tar',
    n_hid_encoder=256,
    msg_hid=256,
    msg_out=256,
    n_hid_decoder=256,
    save_result = True #action='store_true'
    #hard=True,
    # To test
    #test=False,   #학습하고 싶을땐 True로 바꿔
    #model_file='latest_checkpoint.pth.tar',
    #model_type='proto',
    #num_folds=1

)

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [8]:
# make a path to save a model
if not os.path.exists(args.model_path):
    print("Making a path to save the model...")
    os.makedirs(args.model_path, exist_ok=True)
else:
    print("The path already exists, skip making the path...")

print(f'saving the commandline arguments in the path: {args.model_path}...')
args_file = os.path.join(args.model_path, 'commandline_args.txt')
with open(args_file, 'w') as f:
    json.dump(args.__dict__, f, indent=2)

The path already exists, skip making the path...
saving the commandline arguments in the path: ./data/skt/multi_NRI...


In [9]:
def main(args):
    # read data
    print("Loading data...")
    if args.data_type == 'skt':
        # load gestures-data
        data = load_skt(args) if not args.exclude_TA else load_skt_without_TA(args)
    else:
        print("Unkown data type, data type should be \"skt\"")
        sys.exit()

    # define training, validation, test datasets and their dataloaders respectively
    train_data, valid_data, test_data \
        = TimeSeriesDataset(*data['train'], lag=args.lag, pred_steps=args.pred_steps), \
          TimeSeriesDataset(*data['valid'], lag=args.lag, pred_steps=args.pred_steps), \
          TimeSeriesDataset(*data['test'], lag=args.lag, pred_steps=args.pred_steps)
    train_loader, valid_loader, test_loader \
        = DataLoader(train_data, batch_size=args.batch_size, shuffle=False), \
          DataLoader(valid_data, batch_size=args.batch_size, shuffle=False), \
          DataLoader(test_data, batch_size=args.fine_tunning_every, shuffle=False)

    print("Loading data done!")

    model = NRIMulti(
        num_heteros=args.num_heteros,
        num_time_series=args.num_ts,
        time_lags=args.lag,
        #device=device,
        tau=args.tau,
        n_hid_encoder=args.n_hid_encoder,
        msg_hid=args.msg_hid,
        msg_out=args.msg_out,
        n_hid_decoder=args.n_hid_decoder,
        pred_steps=args.pred_steps,
        device=device
    ).to(device)

    # setting training args...
    criterion = nn.MSELoss()
    optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), args.lr)
    # optimizer.load_state_dict(ckpt['optimizer'])
    early_stopping = EarlyStopping(
        patience= args.patience,
        verbose= args.verbose,
        delta = args.delta,
        path= args.model_path,
        model_name= args.model_name
    )

    # train the multi-step heads using the training data
    if args.train_ar:
        train(args, model, train_loader, valid_loader, optimizer, criterion, early_stopping, device)
    else:
        print('skip training auto-regressives predictions...')
    # test the multi-step HeteroNRI (model) using test data
    # fine tune the model every 'args.fine_tunning_every'
    # test the fine-tuned model using the next batch of the test dataset.

    if args.train_online:
        print('start online-learning...')
    else:
        print('start evaluating...')
    # record time elapsed of the fine-tunning
    # the time should be less than 5 minutes...
  
  
  
    #test  
    te_mse = [[] for _ in range(args.pred_steps)]; te_r2 = [[] for _ in range(args.pred_steps)]; 
    te_mae= [[] for _ in range(args.pred_steps)]
    weights = []
    time_ellapsed = []

    criterion_mask = nn.BCELoss()
    # test_loader_iter = iter(test_loader)

    predictions = []
    labels = []
    graphs = []

    for batch_idx, x in enumerate(test_loader): 

        x['input'], x['mask'], x['label'], x['label_mask'] \
        = x['input'].to(device), x['mask'].to(device), x['label'].to(device), x['label_mask'].to(device)
        
        # test
        model.eval() 
        with torch.no_grad():
            out = model(x, args.beta)
            preds = out['preds'].detach().cpu().numpy()
            label = x['label'].detach().cpu().numpy()

            for t in range(args.pred_steps):
                te_mse[t].append(mean_squared_error(label[...,t,:].flatten(), preds[...,t,:].flatten()))
                te_mae[t].append(mean_absolute_error(label[...,t,:].flatten(), preds[...,t,:].flatten()))
                te_r2[t].append(r2_score(label[...,t,:].flatten(), preds[...,t,:].flatten()))
            weights.append(len(out['preds']))

            # record labels and predictions 
            predictions.append(out['preds'].detach().cpu()) # bs, c, t, n
            labels.append(x['label'].detach().cpu()) # bs, c, t, n
            if out['adj_mat'] is not None: 
                graphs.append(out['adj_mat'].detach().cpu()) # bs, c, n, n or bs, n, n
        
        if args.train_online:
            model.train()
            ts = time()
            # feed forward
            print(f'[Batch: {batch_idx+1} / {len(test_loader)}] online learning...')
            for epoch in range(args.epoch_online):
                with torch.set_grad_enabled(True):
                    out = model(x, args.beta)
                    mse_loss = criterion(out['outs_label'], x['label'])
                    if out['outs_mask'] is not None: 
                        bce_loss = criterion_mask(out['outs_mask'], x['label_mask'])
                        loss = mse_loss + bce_loss
                    else: 
                        loss = mse_loss
                    if out['kl_loss'] is not None: 
                        loss += args.kl_loss_penalty * out['kl_loss']
                    # if out['regularization_loss'] is not None: 
                    #     loss += args.reg_loss_penalty * out['regularization_loss']
                # backward 
                model.zero_grad()
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
            tf = time()
            time_ellapsed.append(tf-ts)
            print(f'[Batch: {batch_idx+1} / {len(test_loader)}] online learning done in {tf-ts:4f} sec')

    te_mse = np.array(te_mse)
    te_mae = np.array(te_mae)
    te_r2 = np.array(te_r2)
    time_ellapsed = np.array(time_ellapsed) if args.train_online else float('nan')
   
    te_mse_mean = np.average(te_mse, weights= weights, axis= 1)
    te_r2_mean  = np.average(te_r2, weights= weights, axis= 1)
    te_mae_mean  = np.average(te_mae, weights=weights, axis= 1)
    time_ellapsed_mean = np.average(time_ellapsed, weights=weights) if args.train_online else float('nan')

    te_mse_std = np.average((te_mse-te_mse_mean[:, np.newaxis])**2, weights= weights, axis= 1)
    te_r2_std = np.average((te_r2-te_r2_mean[:, np.newaxis])**2, weights= weights, axis= 1)
    te_mae_std = np.average((te_mae-te_mae_mean[:, np.newaxis])**2, weights= weights, axis= 1)
    time_ellapsed_std = np.average((time_ellapsed-time_ellapsed_mean)**2, weights=weights) if args.train_online else float('nan')
    
    perf = {}
    for t in range(args.pred_steps):
        perf[f'r2_{t}'] = [te_r2_mean[t]]
        perf[f'mae_{t}'] = [te_mae_mean[t]]
        perf[f'mse_{t}'] = [te_mse_mean[t]]
        perf[f'r2_std_{t}'] = [te_r2_std[t]]
        perf[f'mae_std_{t}'] = [te_mae_std[t]]
        perf[f'te_mse_std_{t}'] = [te_mse_std[t]]
    perf['mean_fine_tunning_time'] = [time_ellapsed_mean]
    perf['std_fine_tunning_time'] = [time_ellapsed_std]

    print(perf)

    if args.save_result: 
        
        print('saving the predictions...')

        predictions = torch.concat(predictions, dim=0) # num_obs, num_cells, preds_steps, num_time_series
        labels = torch.concat(labels, dim=0) # num_obs, num_cells, preds_steps, num_time_series   

        #예측값 저장 및 figure 저장

        for t in range(args.pred_steps):
            p = torch.permute(predictions[:,:,t, :], (1, 0, 2)) # num_cells, num_obs, num_time_series 
            p = p.numpy()
            if args.cache is not None: 
                # preds = inv_min_max_scaler(preds, args.cache, args.columns)
                p = inv_min_max_scaler_ver2(p, args.cache, args.columns)

            l = torch.permute(labels[:,:,t, :], (1, 0, 2)) # num_cells, num_obs, num_time_series
            l = l.numpy()
            num_cells = l.shape[0]
            if args.cache is not None: 
                # labels = inv_min_max_scaler(labels, args.cache, args.columns)
                l = inv_min_max_scaler_ver2(l, args.cache, args.columns)
        
            # saving figures: predictions vs labels
            for i in tqdm(range(num_cells), total= num_cells):
                enb_id = args.decoder.get(i)
                write_csv(args, f'test/predictions_{t}_step', f'predictions_{enb_id}.csv', p[i, ...], args.columns)
                write_csv(args, f'test/labels_{t}_step', f'labels_{enb_id}.csv', l[i, ...], args.columns)   
                
                fig, axes = plt.subplots(len(args.columns), 1, figsize= (10,3*len(args.columns)))

                for j in range(len(args.columns)):
                    col_name = args.columns[j]
                    fig.axes[j].set_title(f'time-seris plot: {col_name}')
                    fig.axes[j].plot(p[i,:,j], label= 'prediction')
                    fig.axes[j].plot(l[i,:,j], label= 'label')
                    fig.axes[j].legend()
                
                fig.suptitle(f"Prediction and True label plot of {enb_id}", fontsize=20, position= (0.5, 1.0+0.05))
                fig.tight_layout()
                # make a path to save a figures 
                fig_path = os.path.join(args.model_path, f'test/figures/{t}_step')
                if not os.path.exists(fig_path):
                    # print("Making a path to save figures...")
                    print(f"{fig_path}")
                    os.makedirs(fig_path, exist_ok= True)
                # else:
                #     print("The path to save figures already exists, skip making the path...")
                fig_file = os.path.join(fig_path, f'figure_{enb_id}.png')
                fig.savefig(fig_file)
                plt.close('all')

    return perf 







In [10]:
# #NRI hyper parameters
# embedding_dims = [64,128,256,512]
# lags = [7,12,24,36,48]
# taus = [0.01,0.1,0.5,1.0,5.0 ]

In [11]:
#single step의 optimal hyper parameter로 결과 내기
print(f'saving the commandline arguments in the path: {args.model_path}...')
args_file = os.path.join(args.model_path, 'commandline_args.txt')
with open(args_file, 'w') as f:
    json.dump(args.__dict__, f, indent=2)

saving the commandline arguments in the path: ./data/skt/multi_NRI...


In [12]:
perf = main(args)
print("Test done!")

Loading data...


100%|██████████| 306/306 [00:07<00:00, 39.57it/s] 


the shape of X       : (306, 2293, 9)
Loading data done!


  nn.init.xavier_normal(m.weight.data)
  nn.init.xavier_normal(m.weight.data)


Start training...


  soft_max_1d = F.softmax(trans_input)


Epoch [1/30] Batch [10/798]:                     loss = 306.7317810058594
Epoch [1/30] Batch [20/798]:                     loss = 216.354736328125
Epoch [1/30] Batch [30/798]:                     loss = 197.12939453125
Epoch [1/30] Batch [40/798]:                     loss = 163.13299560546875
Epoch [1/30] Batch [50/798]:                     loss = 239.72499084472656
Epoch [1/30] Batch [60/798]:                     loss = 364.12115478515625
Epoch [1/30] Batch [70/798]:                     loss = 774.9683837890625
Epoch [1/30] Batch [80/798]:                     loss = 585.0399169921875
Epoch [1/30] Batch [90/798]:                     loss = 513.418701171875
Epoch [1/30] Batch [100/798]:                     loss = 757.2833862304688
Epoch [1/30] Batch [110/798]:                     loss = 566.9803466796875
Epoch [1/30] Batch [120/798]:                     loss = 477.3011169433594
Epoch [1/30] Batch [130/798]:                     loss = 272.2059631347656
Epoch [1/30] Batch [140/798]:      

  soft_max_1d = F.softmax(trans_input)
  soft_max_1d = F.softmax(trans_input)
  soft_max_1d = F.softmax(trans_input)
  soft_max_1d = F.softmax(trans_input)
  soft_max_1d = F.softmax(trans_input)
  soft_max_1d = F.softmax(trans_input)
  soft_max_1d = F.softmax(trans_input)
  soft_max_1d = F.softmax(trans_input)
  soft_max_1d = F.softmax(trans_input)
  soft_max_1d = F.softmax(trans_input)
  soft_max_1d = F.softmax(trans_input)
  soft_max_1d = F.softmax(trans_input)
  soft_max_1d = F.softmax(trans_input)
  soft_max_1d = F.softmax(trans_input)
  soft_max_1d = F.softmax(trans_input)
  soft_max_1d = F.softmax(trans_input)
  soft_max_1d = F.softmax(trans_input)
  soft_max_1d = F.softmax(trans_input)


{'r2_0': [0.8882805588878341], 'mae_0': [6.428371996901154], 'mse_0': [312.2692759932436], 'r2_std_0': [0.0002818646246640451], 'mae_std_0': [0.44523016580936353], 'te_mse_std_0': [10509.925725986517], 'r2_1': [0.8877035878865028], 'mae_1': [6.893153812011443], 'mse_1': [311.41612561256096], 'r2_std_1': [0.0003531381098438325], 'mae_std_1': [0.38104827377095807], 'te_mse_std_1': [9386.002321093332], 'r2_2': [0.886171665701003], 'mae_2': [7.352809549996216], 'mse_2': [318.09875453759105], 'r2_std_2': [0.00031670012511800987], 'mae_std_2': [0.38987253000845945], 'te_mse_std_2': [10649.577121642009], 'mean_fine_tunning_time': [nan], 'std_fine_tunning_time': [nan]}
saving the predictions...


100%|██████████| 306/306 [07:58<00:00,  1.56s/it]
100%|██████████| 306/306 [07:51<00:00,  1.54s/it]
100%|██████████| 306/306 [08:01<00:00,  1.57s/it]

Test done!





In [13]:
#최종 r2,mse,mae 저장
for k, v in perf.items(): 
    print(f'{k}: {v[0]:.4f}')
csv_file = os.path.join(args.model_path, 'perf.csv')
pd.DataFrame(perf).to_csv(csv_file, index= False)

r2_0: 0.8883
mae_0: 6.4284
mse_0: 312.2693
r2_std_0: 0.0003
mae_std_0: 0.4452
te_mse_std_0: 10509.9257
r2_1: 0.8877
mae_1: 6.8932
mse_1: 311.4161
r2_std_1: 0.0004
mae_std_1: 0.3810
te_mse_std_1: 9386.0023
r2_2: 0.8862
mae_2: 7.3528
mse_2: 318.0988
r2_std_2: 0.0003
mae_std_2: 0.3899
te_mse_std_2: 10649.5771
mean_fine_tunning_time: nan
std_fine_tunning_time: nan
