In [5]:
from data_provider.data_factory import data_provider
from exp.exp_basic import Exp_Basic
from models import Informer, Autoformer, Transformer, DLinear, Linear, NLinear, PatchTST
from utils.tools import EarlyStopping, adjust_learning_rate, visual, test_params_flop
from utils.metrics import metric

import numpy as np
import torch
import torch.nn as nn
from torch import optim
from torch.optim import lr_scheduler 

import os
import time

import warnings
import matplotlib.pyplot as plt
import random
import pandas
import matplotlib.dates as mdates


warnings.filterwarnings('ignore')

class Exp_Main(Exp_Basic):
    def __init__(self, args):
        super(Exp_Main, self).__init__(args)

    def _build_model(self):
        model_dict = {
            'Autoformer': Autoformer,
            'Transformer': Transformer,
            'Informer': Informer,
            'DLinear': DLinear,
            'NLinear': NLinear,
            'Linear': Linear,
            'PatchTST': PatchTST,
        }
        model = model_dict[self.args.model].Model(self.args).float()

        if self.args.use_multi_gpu and self.args.use_gpu:
            model = nn.DataParallel(model, device_ids=self.args.device_ids)
        return model

    def _get_data(self, flag):
        print('get data')
        data_set, data_loader = data_provider(self.args, flag)
        print('gotten data')
        return data_set, data_loader

    def _select_optimizer(self):
        model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
        return model_optim

    def _select_criterion(self):
        criterion = nn.MSELoss()
        return criterion

    def vali(self, vali_data, vali_loader, criterion):
        total_loss = []
        self.model.eval()
        with torch.no_grad():
            for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(vali_loader):
                batch_x = batch_x.float().to(self.device)
                batch_y = batch_y.float()

                batch_x_mark = batch_x_mark.float().to(self.device)
                batch_y_mark = batch_y_mark.float().to(self.device)

                # decoder input
                dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
                dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
                # encoder - decoder
                if self.args.use_amp:
                    with torch.cuda.amp.autocast():
                        if 'Linear' in self.args.model or 'TST' in self.args.model:
                            outputs = self.model(batch_x)
                        else:
                            if self.args.output_attention:
                                outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                            else:
                                outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
                else:
                    if 'Linear' in self.args.model or 'TST' in self.args.model:
                        outputs = self.model(batch_x)
                    else:
                        if self.args.output_attention:
                            outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                        else:
                            outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
                f_dim = -1 if self.args.features == 'MS' else 0
                outputs = outputs[:, -self.args.pred_len:, f_dim:]
                batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)

                pred = outputs.detach().cpu()
                true = batch_y.detach().cpu()

                loss = criterion(pred, true)

                total_loss.append(loss)
        total_loss = np.average(total_loss)
        self.model.train()
        return total_loss

    def train(self, setting):
        train_data, train_loader = self._get_data(flag='train')
        vali_data, vali_loader = self._get_data(flag='val')
        test_data, test_loader = self._get_data(flag='test')

        path = os.path.join(self.args.checkpoints, setting)
        if not os.path.exists(path):
            os.makedirs(path)

        time_now = time.time()

        train_steps = len(train_loader)
        early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)

        model_optim = self._select_optimizer()
        criterion = self._select_criterion()

        if self.args.use_amp:
            scaler = torch.cuda.amp.GradScaler()
            
        scheduler = lr_scheduler.OneCycleLR(optimizer = model_optim,
                                            steps_per_epoch = train_steps,
                                            pct_start = self.args.pct_start,
                                            epochs = self.args.train_epochs,
                                            max_lr = self.args.learning_rate)

        param_count = sum(p.numel() for p in self.model.parameters())

        print('parameter_count_test: ', param_count)
        
        for epoch in range(self.args.train_epochs):
            iter_count = 0
            train_loss = []

            self.model.train()
            epoch_time = time.time()
            for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader):
                iter_count += 1
                model_optim.zero_grad()
                batch_x = batch_x.float().to(self.device)

                batch_y = batch_y.float().to(self.device)
                batch_x_mark = batch_x_mark.float().to(self.device)
                batch_y_mark = batch_y_mark.float().to(self.device)

                # decoder input
                dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
                dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)

                # encoder - decoder
                if self.args.use_amp:
                    with torch.cuda.amp.autocast():
                        if 'Linear' in self.args.model or 'TST' in self.args.model:
                            outputs = self.model(batch_x)
                        else:
                            if self.args.output_attention:
                                outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                            else:
                                outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)

                        f_dim = -1 if self.args.features == 'MS' else 0
                        outputs = outputs[:, -self.args.pred_len:, f_dim:]
                        batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
                        loss = criterion(outputs, batch_y)
                        train_loss.append(loss.item())
                else:
                    if 'Linear' in self.args.model or 'TST' in self.args.model:
                            outputs = self.model(batch_x)
                    else:
                        if self.args.output_attention:
                            outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                            
                        else:
                            outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark, batch_y)
                    # print(outputs.shape,batch_y.shape)
                    f_dim = -1 if self.args.features == 'MS' else 0
                    outputs = outputs[:, -self.args.pred_len:, f_dim:]
                    batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
                    loss = criterion(outputs, batch_y)
                    train_loss.append(loss.item())

                if (i + 1) % 100 == 0:
                    print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
                    speed = (time.time() - time_now) / iter_count
                    left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)
                    print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
                    iter_count = 0
                    time_now = time.time()

                if self.args.use_amp:
                    scaler.scale(loss).backward()
                    scaler.step(model_optim)
                    scaler.update()
                else:
                    loss.backward()
                    model_optim.step()
                    
                if self.args.lradj == 'TST':
                    adjust_learning_rate(model_optim, scheduler, epoch + 1, self.args, printout=False)
                    scheduler.step()

            print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
            train_loss = np.average(train_loss)
            vali_loss = self.vali(vali_data, vali_loader, criterion)
            test_loss = self.vali(test_data, test_loader, criterion)

            print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
                epoch + 1, train_steps, train_loss, vali_loss, test_loss))
            early_stopping(vali_loss, self.model, path)
            if early_stopping.early_stop:
                print("Early stopping")
                break

            if self.args.lradj != 'TST':
                adjust_learning_rate(model_optim, scheduler, epoch + 1, self.args)
            else:
                print('Updating learning rate to {}'.format(scheduler.get_last_lr()[0]))

        best_model_path = path + '/' + 'checkpoint.pth'
        self.model.load_state_dict(torch.load(best_model_path))

        return self.model

    def saveData(self, y, y_pred, start_time, end_time, hrs):  
        print(len(y), len(y_pred))
        time_index = pandas.date_range(start=start_time, end=end_time, freq='H')
        time_index = time_index[:len(y)]
        # if len(y) != len(time_index):
        #     raise ValueError(f"Length of y ({len(y)}) does not match number of time steps ({len(time_index)}).")
        # if len(y_pred) != len(time_index):
        #     raise ValueError(f"Length of y_pred ({len(y_pred)}) does not match number of time steps ({len(time_index)}).")
        df = pandas.DataFrame({
            'y': y,
            'y_pred': y_pred
        }, index=time_index)
        
        df = df.reset_index().rename(columns={'index': 'DATE'})
        df.to_csv(f'data_results/patchtst_predictions_bigpatch_bigmodel_{hrs}.csv')


    
    def test(self, setting, test=0):
        save_data = True
        pred_len = 1
        test_data, test_loader = self._get_data(flag='test')
        
        if test:
            print('loading model')
            self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth')))

        preds = []
        trues = []
        inputx = []
        folder_path = './test_results/' + setting + '/'
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)
        
        self.model.eval()
        param_count = sum(p.numel() for p in self.model.parameters())

        print('parameter_count_test: ', param_count)
        with torch.no_grad():
            for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader):
                batch_x = batch_x.float().to(self.device)
                batch_y = batch_y.float().to(self.device)

                batch_x_mark = batch_x_mark.float().to(self.device)
                batch_y_mark = batch_y_mark.float().to(self.device)

                # decoder input
                dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
                dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
                # encoder - decoder
                if self.args.use_amp:
                    with torch.cuda.amp.autocast():
                        if 'Linear' in self.args.model or 'TST' in self.args.model:
                            outputs = self.model(batch_x)
                        else:
                            if self.args.output_attention:
                                outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                            else:
                                outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
                else:
                    if 'Linear' in self.args.model or 'TST' in self.args.model:
                            outputs = self.model(batch_x)
                    else:
                        if self.args.output_attention:
                            outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]

                        else:
                            outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)

                f_dim = -1 if self.args.features == 'MS' else 0
                # print(outputs.shape,batch_y.shape)
                outputs = outputs[:, -self.args.pred_len:, f_dim:]
                batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
                outputs = outputs.detach().cpu().numpy()
                batch_y = batch_y.detach().cpu().numpy()

                pred = outputs  # outputs.detach().cpu().numpy()  # .squeeze()
                true = batch_y  # batch_y.detach().cpu().numpy()  # .squeeze()
                print(pred.shape)
                preds.append(pred)
                trues.append(true)
                inputx.append(batch_x.detach().cpu().numpy())
                if i % 15 == 0:
                    input = batch_x.detach().cpu().numpy()
                    gt = np.concatenate((input[0, :, -1], true[0, :, -1]), axis=0)
                    pd = np.concatenate((input[0, :, -1], pred[0, :, -1]), axis=0)
                    visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf'))

        if self.args.test_flop:
            test_params_flop((batch_x.shape[1],batch_x.shape[2]))
            exit()
            
        folder_path = './results/' + setting + '/'
        # result save
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)
            
        preds = np.array(preds)
        trues = np.array(trues)

        predss = preds[:,:,-1,-1].flatten()
        truess = trues[:,:,-1,-1].flatten()

        start_date = '2023-01-01 00:00'
        end_date = '2023-12-31 23:00'
        
        if save_data:
            start_date = '2020-01-11 13:00:00' 
            end_date = '2024-05-28 03:00:00'
        
        time_index = pandas.date_range(start=start_date, end=end_date, freq='H')

        # Adjust the time index to match the length of the data
        min_len = min(len(time_index), len(predss))
        time_index = time_index[:min_len]
        #truess = np.roll(truess, pred_len)
        
        predss = predss[:min_len]
        truess = truess[:min_len]

        mae, mse, rmse, mape, mspe, rse, corr, wape, nse, pfe, tpe, rfactor, pfactor95 = metric(preds, trues)
        
        if save_data:
            self.saveData(truess, predss, start_date, end_date, pred_len)

        
        else:
            #gtt = np.concatenate((time_index, truess), axis = 0)
            #pdt = np.concatenate((time_index, predss), axis = 0)
            
            #visual(gtt, pdt, 'some_file.pdf')
            
            # Plotting
            # Set the font to Times New Roman
            plt.rcParams['font.family'] = 'serif'
            plt.rcParams['font.size'] = 14  # Increase base font size
            plt.rcParams['font.weight'] = 'normal'
    
            fig, ax = plt.subplots(figsize=(12, 6))
            
            ax.plot(time_index, predss, label='PatchTST Predictions', color='blue')
            ax.plot(time_index, truess, label='Observed height', color='#BB0000')
            ax.plot([], [], ' ', label=f"NSE: {round(nse,4)}")
            ax.plot([], [], ' ', label=f"WAPE: {round(wape,2)}")
            
            ax.set_xlabel('Time', fontsize=16)
            ax.set_ylabel('River Level', fontsize=16)
            
            ax.set_ylim(0, 2)
            ax.legend(loc='upper left', fontsize=16)
            ax.tick_params(axis='both', which='major', labelsize=16)
            
            axins1 = inset_axes(ax, width="60%", height="40%", loc='upper right')
            axins1.plot(time_index, predss, color='blue')
            axins1.plot(time_index, truess, color='#BB0000')
            axins1.set_xlabel('Time', fontsize=14)
            axins1.set_ylabel('River Level', fontsize=14)
            
            axins1.xaxis.set_major_locator(mdates.DayLocator(interval=5))
            axins1.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))
    
            #may have to change it
            axins1.set_ylim(0,1)
            start_date = pandas.Timestamp('2023-05-01 07:00')  # Adjust date to match your dataset
            end_date = pandas.Timestamp('2023-06-02 06:00')  # Adjust date to match your dataset
            axins1.set_xlim(start_date, end_date)
        
            
            # Apply mark_inset to show the bounds of the inset on the main plot
            mark_inset(ax, axins1, loc1=3, loc2=4, fc="none", ec="0.25")  # Try adjusting loc1 and loc2 here        

            plt.tight_layout()

            # Save the figure
    
            plt.savefig('PatchTSTfig2.png', dpi=300)  # Update the path as needed
            plt.close(fig)
    
        print('mse:{}, mae:{}, rse:{}, rmse:{}, wape:{}, nse:{}, pfe:{}, tpe:{}, rfactor:{}, pfactor95:{}'.format(mse, mae, rse, rmse, wape, nse, pfe, tpe, rfactor, pfactor95))
        f = open("result.txt", 'a')
        f.write(setting + "  \n")
        f.write('mse:{}, mae:{}, rse:{}, rmse:{}, wape:{}, nse:{}, pfe:{}, tpe:{}, rfactor:{}, pfactor95:{}'.format(mse, mae, rse, rmse, wape, nse, pfe, tpe, rfactor, pfactor95))
        f.write('\n')
        f.write('\n')
        f.close()

        # np.save(folder_path + 'metrics.npy', np.array([mae, mse, rmse, mape, mspe,rse, corr]))
        np.save(folder_path + 'pred.npy', preds)
        # np.save(folder_path + 'true.npy', trues)
        # np.save(folder_path + 'x.npy', inputx)
        return

    def predict(self, setting, load=False):
        pred_data, pred_loader = self._get_data(flag='pred')

        if load:
            path = os.path.join(self.args.checkpoints, setting)
            best_model_path = path + '/' + 'checkpoint.pth'
            self.model.load_state_dict(torch.load(best_model_path))

        preds = []

        self.model.eval()
        with torch.no_grad():
            for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(pred_loader):
                batch_x = batch_x.float().to(self.device)
                batch_y = batch_y.float()
                batch_x_mark = batch_x_mark.float().to(self.device)
                batch_y_mark = batch_y_mark.float().to(self.device)

                # decoder input
                dec_inp = torch.zeros([batch_y.shape[0], self.args.pred_len, batch_y.shape[2]]).float().to(batch_y.device)
                dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
                # encoder - decoder
                if self.args.use_amp:
                    with torch.cuda.amp.autocast():
                        if 'Linear' in self.args.model or 'TST' in self.args.model:
                            outputs = self.model(batch_x)
                        else:
                            if self.args.output_attention:
                                outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                            else:
                                outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
                else:
                    if 'Linear' in self.args.model or 'TST' in self.args.model:
                        outputs = self.model(batch_x)
                    else:
                        if self.args.output_attention:
                            outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                        else:
                            outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
                pred = outputs.detach().cpu().numpy()  # .squeeze()
                preds.append(pred)

        preds = np.array(preds)
        preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1])

        # result save
        folder_path = './results/' + setting + '/'
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)

        np.save(folder_path + 'real_prediction.npy', preds)

        return


In [6]:
krishna = 20

In [7]:
from mpl_toolkits.axes_grid1.inset_locator import inset_axes, mark_inset

class Args:
    def __init__(self):
        self.random_seed = 2021
        self.is_training = 0
        self.model_id = 'gaze_height_full'
        self.model = 'PatchTST'
        self.data = 'gaze_height_full'
        self.root_path = './dataset/'
        self.data_path = 'chattahoochee_1hr_02336490.csv'
        self.features = 'M'
        self.target = 'gaze_height'
        self.freq = 'h'
        self.checkpoints = './checkpoints/'
        self.seq_len = 13
        self.label_len = 0
        self.pred_len = 1 #should be set using loop
        self.enc_in = 7
        self.e_layers = 16
        self.n_heads = 8
        self.d_model = 16
        self.d_ff = 512
        self.dropout = 0.3
        self.fc_dropout = 0.3
        self.head_dropout = 0
        self.patch_len = 16
        self.stride = 8
        self.des = 'Exp'
        self.train_epochs = 30
        self.embed = 'fixed'
        self.itr = 1 
        self.batch_size = 512 
        self.learning_rate = 0.002
        #fc_dropout = 0.05
        #head_dropout = 0.0
        #patch_len = 16
        #stride = 8
        self.padding_patch = 'end'
        self.revin = True
        self.affine = False
        self.subtract_last = 0
        self.decomposition = 0
        self.kernel_size = 25
        self.individual = 0
        self.embed_type = 0
        #enc_in = 7
        #e_layers = 2
        self.dec_in = 7
        self.c_out = 7
        #d_model = 512
        #n_heads = 8
        self.d_layers = 1
        #d_ff = 2048
        self.moving_avg = 25
        self.factor = 1
        self.distil = True
        self.dropout = 0.05
        self.embed = 'fixed'
        self.activation = 'gelu'
        self.output_attention = False
        self.do_predict = False
        self.num_workers = 10
        #itr = 2
        #train_epochs = 100
        #batch_size = 128
        self.patience = 6
        #learning_rate = 0.0001
        #des = 'test'
        self.loss = 'mse'
        self.lradj = 'type3'
        self.pct_start = 0.3
        self.use_amp = False
        self.use_gpu = torch.cuda.is_available()
        self.gpu = 0
        self.use_multi_gpu = False
        self.devices = '0,1,2,3'
        self.test_flop = False
        self.tempfeat = -3

args = Args()

# Set random seed for reproducibility
random.seed(args.random_seed)
torch.manual_seed(args.random_seed)
np.random.seed(args.random_seed)

# GPU configuration
if args.use_gpu and args.use_multi_gpu:
    device_ids = [int(id_) for id_ in args.devices.split(',')]
    gpu = device_ids[0]

# Print out the experiment settings
print('Experiment Configuration:')
print('Model ID:', args.model_id)
print('Model:', args.model)
print('Dataset:', args.data)

# Example Experiment Code
Exp = Exp_Main  # Assuming Exp_Main is defined elsewhere

if args.is_training:
    for ii in range(args.itr):
        setting = f'{args.model_id}_{args.model}_{args.data}_ft{args.features}_sl{args.seq_len}_ll{args.label_len}_pl{args.pred_len}_dm{args.d_model}_nh{args.n_heads}_el{args.e_layers}_dl{args.d_layers}_df{args.d_ff}_fc{args.factor}_eb{args.embed}_dt{args.distil}_{args.des}_{ii}'
        exp = Exp(args)  # Assuming Exp is instantiated with all required settings
        print(f'>>>>>>> Start training : {setting} >>>>>>>>>>>>>>>>>>>>>>>>>>')
        trained_model = exp.train(setting)
        print(f'>>>>>>> Testing : {setting} <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
        exp.test(setting)

        if args.do_predict:
            print(f'>>>>>>> Predicting : {setting} <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
            exp.predict(setting, True)

        torch.cuda.empty_cache()
else:
    ii = 0
    setting = f'{args.seq_len}_{args.pred_len}_{args.model}_{args.data}_ft{args.features}_sl{args.seq_len}_ll{args.label_len}_pl{args.pred_len}_dm{args.d_model}_nh{args.n_heads}_el{args.e_layers}_dl{args.d_layers}_df{args.d_ff}_fc{args.factor}_eb{args.embed}_dt{args.distil}_{args.des}_{ii}'
    exp = Exp(args)  # set experiments
    print(f'>>>>>>> Testing : {setting} <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
    exp.test(setting, test=1)
    torch.cuda.empty_cache()


Experiment Configuration:
Model ID: gaze_height_full
Model: PatchTST
Dataset: gaze_height_full
Use GPU: cuda:0
>>>>>>> Testing : 13_1_PatchTST_gaze_height_full_ftM_sl13_ll0_pl1_dm16_nh8_el16_dl1_df512_fc1_ebfixed_dtTrue_Exp_0 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
get data
woring..............
working.................
2023-04-26 09:00:00 2024-05-28 03:00:00
test 9418
gotten data
loading model
parameter_count_test:  289345
(512, 1, 5)
(512, 1, 5)
(512, 1, 5)
(512, 1, 5)
(512, 1, 5)
(512, 1, 5)
(512, 1, 5)
(512, 1, 5)
(512, 1, 5)
(512, 1, 5)
(512, 1, 5)
(512, 1, 5)
(512, 1, 5)
(512, 1, 5)
(512, 1, 5)
(512, 1, 5)
(512, 1, 5)
(512, 1, 5)
9216 9216
mse:0.0002809483848977834, mae:0.009862545877695084, rse:0.05633654072880745, rmse:0.01676151528954506, wape:2.481062090353372, nse:0.9968261942267418, pfe:0.0050705671310424805, tpe:74820, rfactor:0.05633654072880745, pfactor95:2432.0


In [4]:


test_data, test_loader = exp._get_data(flag='test')

train_data, train_loader = exp._get_data(flag='train')

get data
woring..............
working.................
2023-04-26 09:00:00 2024-05-28 03:00:00
test 9418
gotten data
get data
woring..............
working.................
2010-01-01 00:00:00 2022-12-17 00:00:00
train 113003
gotten data


In [18]:
dir(test_data)
#dir(test_loader)

['__add__',
 '__annotations__',
 '__class__',
 '__class_getitem__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__orig_bases__',
 '__parameters__',
 '__read_data__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_is_protocol',
 'data_path',
 'data_stamp',
 'data_x',
 'data_y',
 'features',
 'freq',
 'inverse_transform',
 'label_len',
 'pred_len',
 'root_path',
 'scale',
 'scaler',
 'seq_len',
 'set_type',
 'target',
 'timeenc']

In [5]:
someval1 = next(iter(train_loader))
someval2 = next(iter(test_loader))

In [6]:
device = torch.device("cuda:0")

background = someval1[0].float().to(device)
tests = someval2[0].float().to(device)

In [7]:
someval2[1].float()

tensor([[[-0.4120,  0.0218, -0.1749,  0.0952],
         [-0.4120,  0.0218, -0.1749, -0.0277],
         [-0.4120, -0.0440, -0.1749, -0.1041],
         [-0.3507,  0.0218, -0.1749, -0.1506],
         [-0.4120, -0.3729, -0.1749, -0.1772],
         [-0.4120, -0.4387, -0.1749, -0.2038]],

        [[-0.4120,  0.0218, -0.1749, -0.0277],
         [-0.4120, -0.0440, -0.1749, -0.1041],
         [-0.3507,  0.0218, -0.1749, -0.1506],
         [-0.4120, -0.3729, -0.1749, -0.1772],
         [-0.4120, -0.4387, -0.1749, -0.2038],
         [-0.4120, -0.3729, -0.1749, -0.2270]],

        [[-0.4120, -0.0440, -0.1749, -0.1041],
         [-0.3507,  0.0218, -0.1749, -0.1506],
         [-0.4120, -0.3729, -0.1749, -0.1772],
         [-0.4120, -0.4387, -0.1749, -0.2038],
         [-0.4120, -0.3729, -0.1749, -0.2270],
         [-0.4120, -0.4387, -0.1749, -0.2470]],

        ...,

        [[ 0.2013, -1.2941,  0.6544, -0.3965],
         [ 0.2627, -1.2283,  0.6544, -0.4928],
         [ 0.2627, -1.1625,  0.6544, -0.

In [8]:
ss1 = someval2[0][:5,:,:].float().to(device)
out1 = exp.model(ss1)

In [9]:
someval2[1][:5,:,:]

tensor([[[-0.4120,  0.0218, -0.1749,  0.0952],
         [-0.4120,  0.0218, -0.1749, -0.0277],
         [-0.4120, -0.0440, -0.1749, -0.1041],
         [-0.3507,  0.0218, -0.1749, -0.1506],
         [-0.4120, -0.3729, -0.1749, -0.1772],
         [-0.4120, -0.4387, -0.1749, -0.2038]],

        [[-0.4120,  0.0218, -0.1749, -0.0277],
         [-0.4120, -0.0440, -0.1749, -0.1041],
         [-0.3507,  0.0218, -0.1749, -0.1506],
         [-0.4120, -0.3729, -0.1749, -0.1772],
         [-0.4120, -0.4387, -0.1749, -0.2038],
         [-0.4120, -0.3729, -0.1749, -0.2270]],

        [[-0.4120, -0.0440, -0.1749, -0.1041],
         [-0.3507,  0.0218, -0.1749, -0.1506],
         [-0.4120, -0.3729, -0.1749, -0.1772],
         [-0.4120, -0.4387, -0.1749, -0.2038],
         [-0.4120, -0.3729, -0.1749, -0.2270],
         [-0.4120, -0.4387, -0.1749, -0.2470]],

        [[-0.3507,  0.0218, -0.1749, -0.1506],
         [-0.4120, -0.3729, -0.1749, -0.1772],
         [-0.4120, -0.4387, -0.1749, -0.2038],
       

In [10]:
out1

tensor([[[-0.4007,  0.1089, -0.1788,  0.2372],
         [-0.4042,  0.1331, -0.1856,  0.1686],
         [-0.4126,  0.1409, -0.1951,  0.1468],
         [-0.4267,  0.1347, -0.2096,  0.1169],
         [-0.4495,  0.1300, -0.2234,  0.1247],
         [-0.4766,  0.1213, -0.2308,  0.1102]],

        [[-0.4148,  0.0238, -0.1861,  0.0812],
         [-0.4189,  0.0158, -0.1958, -0.0048],
         [-0.4354, -0.0082, -0.2156, -0.0261],
         [-0.4536, -0.0234, -0.2262, -0.0785],
         [-0.4820, -0.0291, -0.2364, -0.0816],
         [-0.5046, -0.0343, -0.2300, -0.0931]],

        [[-0.4115, -0.0038, -0.1768, -0.0035],
         [-0.4196, -0.0197, -0.1833, -0.0793],
         [-0.4433, -0.0409, -0.1996, -0.1009],
         [-0.4652, -0.0509, -0.2129, -0.1516],
         [-0.4833, -0.0492, -0.2207, -0.1584],
         [-0.4960, -0.0416, -0.2241, -0.1635]],

        [[-0.4251, -0.0630, -0.1795, -0.0767],
         [-0.4523, -0.1046, -0.1922, -0.1330],
         [-0.4913, -0.1346, -0.2015, -0.1568],
       

In [11]:
background.dtype

torch.float32

In [12]:
someval1[1].shape

torch.Size([512, 6, 4])

In [13]:
someval1[0].shape

torch.Size([512, 24, 4])

In [14]:
tests[:5,:,:]

tensor([[[ 0.0173,  0.0218, -0.1749,  0.8760],
         [-0.0440,  0.0218, -0.2441,  0.9025],
         [-0.1054,  0.0218, -0.2441,  0.9225],
         [-0.1054,  0.0876, -0.2441,  0.9258],
         [-0.1667, -0.0440, -0.3132,  0.9125],
         [-0.1667,  0.0218, -0.3132,  0.8859],
         [-0.3507, -0.0440, -0.3132,  0.8394],
         [-0.3507,  0.0218, -0.3132,  0.7896],
         [-0.4120,  0.0218, -0.3132,  0.7597],
         [-0.3507,  0.1534, -0.3132,  0.7530],
         [-0.3507,  0.1534, -0.2441,  0.7796],
         [-0.2894,  0.1534, -0.2441,  0.8294],
         [-0.2280,  0.2192, -0.1058,  0.8992],
         [-0.2894,  0.2192, -0.1749,  0.9557],
         [-0.1667,  0.2192, -0.0367,  0.9956],
         [-0.2894,  0.1534, -0.1058,  1.0188],
         [-0.3507,  0.1534, -0.1749,  1.0122],
         [-0.3507,  0.0876, -0.1058,  0.9956],
         [-0.4120,  0.0218, -0.1749,  0.9524],
         [-0.3507, -0.0440, -0.1749,  0.8859],
         [-0.3507, -0.0440, -0.1749,  0.8162],
         [-0.

In [15]:
out = exp.model(background)

In [16]:
out.shape

torch.Size([512, 6, 4])

In [17]:
background.shape

torch.Size([512, 24, 4])

In [18]:
def f(x):
    x = torch.from_numpy().to(device)
    out = exp.model(x)
    return out[:, 5, 2].numpy()


def time_series_masker(data, mask):
    # Replace masked elements with the mean of the series (simplified approach)
    masked_data = data.copy()
    masked_data[mask] = np.mean(data, axis=0)
    return masked_data

# Convert background to a NumPy array if it's not already
background_np = background.cpu().detach().numpy()
test_np = tests.cpu().detach().numpy()
# Create a masker that uses the background data for replacing "masked" features
masker = shap.maskers.Independent(data=background_np)
#masker = shap.maskers.Tabular(background_np, hclustering='correlation')


# Create the SHAP explainer using the model function and the masker
explainer = shap.Explainer(f, background_np)
shap_values = explainer(test_np)

NameError: name 'shap' is not defined

In [None]:
background_np.shape

In [None]:
test_np.shape

In [None]:
start_date = '2023-01-01 00:00'
end_date = '2023-12-31 23:00'
        
time_index = pd.date_range(start=start_date, end=end_date, freq='H')

In [None]:
def plotResults(predss, truess, model_name, start_date='2023-01-01 00:00', end_date = '2023-12-31 23:00'):
    import matplotlib.pyplot as plt
    import matplotlib.dates as mdates
    from mpl_toolkits.axes_grid1.inset_locator import inset_axes, mark_inset

    time_index = pandas.date_range(start=start_date, end=end_date, freq='H')

    # Adjust the time index to match the length of the data
    min_len = min(len(time_index), len(predss))
    time_index = time_index[:min_len]
    truess = np.roll(truess, 6)
    
    predss = predss[:min_len]
    truess = truess[:min_len]

    
    #gtt = np.concatenate((time_index, truess), axis = 0)
    #pdt = np.concatenate((time_index, predss), axis = 0)
    
    #visual(gtt, pdt, 'some_file.pdf')
    
    # Plotting

    fig, ax = plt.subplots(figsize=(12, 6))
    plt.title('{0} Model Predictions with a 6-Hour Forecast Horizon for year 2023'.format(model_name))
    
    ax.plot(time_index, predss, label='{0} Predictions'.format(model_name), color='blue')
    ax.plot(time_index, truess, label='Actual height', color='#BB0000')
    ax.set_xlabel('Time')
    ax.set_ylabel('Gaze height')
    
    ax.set_ylim(-1, 10)
    ax.legend(loc='upper left')
    
    axins1 = inset_axes(ax, width="60%", height="40%", loc='upper right')
    axins1.plot(time_index, predss, color='blue')
    axins1.plot(time_index, truess, color='#BB0000')
    axins1.set_xlabel('time')
    axins1.set_ylabel('Gaze Height')

    axins1.xaxis.set_major_locator(mdates.DayLocator(interval=5))
    axins1.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))

    #may have to change it
    axins1.set_ylim(-1,6)
    start_date = pandas.Timestamp('2023-05-01')  # Adjust date to match the dataset
    end_date = pandas.Timestamp('2023-06-01')  # Adjust date to match the dataset
    axins1.set_xlim(start_date, end_date)

    
    # Apply mark_inset to show the bounds of the inset on the main plot
    mark_inset(ax, axins1, loc1=3, loc2=4, fc="none", ec="0.25")      
    
    plt.tight_layout()

    # Save the figure
    
    plt.savefig('Results.png') 
    plt.close(fig)


In [None]:
plotResults(out1.detach().cpu().numpy(), ss1.detach().cpu().numpy(), 'PatchTST', start_date='2023-01-01 00:00', end_date = '2023-12-31 23:00')