In [None]:
from utils.tools import dotdict
from exp.exp_informer import Exp_Informer
import torch
import pandas as pd
import glob

In [None]:
TRAIN_PATH = 'flood_data/train/*.csv'
VALID_PATH = 'flood_data/valid/*.csv'
#TEST_PATH = 'flood_data/test/*.csv'
PASS = 'data/masterFileForLean_2017_2019.csv'

pd.set_option('display.max_columns', 999)


def load_csv(filepath):
    files = glob.glob(filepath)
    lis = []
    for filename in files:
        parser = lambda date: pd.to_datetime(date, format='%Y%m%d%H')
        df = pd.read_csv(filename, index_col=0, parse_dates=True, date_parser=parser,
                    usecols=[
                        0, 5, 6, 7, 8, 9, 10, 11, 12, 13,
                        14, 15, 16, 17, 18, 20, 21, 22, 23, 24,
                        26, 27, 28, 29, 30, 32, 33, 34, 35, 36,
                        38, 39, 40, 41, 42, 44, 45, 46, 47, 48,
                        50, 51, 52, 53, 54, 56, 57, 58, 59, 60,
                        62, 63, 64, 65, 66, 68, 69, 70, 71, 72,
                        106, 107, 108, 109, 110, 111, 112, 113, 114, 115,
                        116, 117, 118, 119, 120, 121, 122, 123, 124, 125,
                        126, 127, 128, 129, 130, 131, 132, 133, 134, 135,
                        136, 137, 138, 139, 140, 141, 142, 143, 144, 145,
                        146, 147, 148, 149, 150, 186
                    ])
        lis.append(df)
        df = pd.concat(lis, axis=0)
        df.index.name = 'date'
    return df

data = load_csv(PASS)
print(data.shape)
data.head()

In [None]:
data.to_csv('data/dataset.csv')

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
from pylab import rcParams

rcParams['figure.figsize'] = 30,30
data.hist(bins=20)
plt.tight_layout()
plt.savefig('data_hist.png', dpi=200)
plt.show()

In [None]:
df = load_csv('data/2020.csv')
print(df.shape)
df.head()

In [None]:
df.to_csv('data/test.csv')

In [None]:
args = dotdict()

args.model = 'informer' # model of experiment, options: [informer, informerstack, informerlight(TBD)]

args.data = 'custom' # data
args.root_path = 'data/' # root path of data file
args.data_path = 'dataset.csv' # data file
args.features = 'MS' # forecasting task, options:[M, S, MS]; M:multivariate predict multivariate, S:univariate predict univariate, MS:multivariate predict univariate
args.target = 'A_temp' # target feature in S or MS task
args.freq = 'h' # freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], you can also use more detailed freq like 15min or 3h
args.checkpoints = 'checkpoints' # location of model checkpoints

args.seq_len = 12 #96 # input sequence length of Informer encoder
args.label_len = 6 #48 # start token length of Informer decoder
args.pred_len = 3 #24 # prediction sequence length
# Informer decoder input: concat[start token series(label_len), zero padding series(pred_len)]

args.enc_in = 7 # encoder input size
args.dec_in = 7 # decoder input size
args.c_out = 7 # output size
args.factor = 5 # probsparse attn factor
args.d_model = 512 # dimension of model
args.n_heads = 8 # num of heads
args.e_layers = 2 # num of encoder layers
args.d_layers = 1 # num of decoder layers
args.d_ff = 2048 # dimension of fcn in model
args.dropout = 0.05 # dropout
args.attn = 'prob' # attention used in encoder, options:[prob, full]
args.embed = 'timeF' # time features encoding, options:[timeF, fixed, learned]
args.activation = 'relu' #'gelu' # activation
args.padding == 0
args.distil = False #True # whether to use distilling in encoder
args.output_attention = False # whether to output attention in ecoder

args.batch_size = 16 #32 
args.learning_rate = 0.01 #0.0001
args.loss = 'mse'
args.lradj = 'type1'
args.use_amp = False # whether to use automatic mixed precision training

args.num_workers = 0
args.itr = 1
args.train_epochs = 6
args.patience = 10
args.des = 'exp'

args.use_gpu = True if torch.cuda.is_available() else False
args.gpu = 0

args.use_multi_gpu = False
args.devices = '0,1,2,3'

In [None]:
args.use_gpu = True if torch.cuda.is_available() and args.use_gpu else False

if args.use_gpu and args.use_multi_gpu:
    args.dvices = args.devices.replace(' ','')
    device_ids = args.devices.split(',')
    args.device_ids = [int(id_) for id_ in device_ids]
    args.gpu = args.device_ids[0]

In [None]:
# Set augments by using data name
data_parser = {
    'ETTh1':{'data':'ETTh1.csv','T':'OT','M':[7,7,7],'S':[1,1,1],'MS':[7,7,1]},
    'ETTh2':{'data':'ETTh2.csv','T':'OT','M':[7,7,7],'S':[1,1,1],'MS':[7,7,1]},
    'ETTm1':{'data':'ETTm1.csv','T':'OT','M':[7,7,7],'S':[1,1,1],'MS':[7,7,1]},
    'ETTm2':{'data':'ETTm2.csv','T':'OT','M':[7,7,7],'S':[1,1,1],'MS':[7,7,1]},
    'custom':{'data':'dataset.csv', 'T':'A_temp', 'M':[105,105,105], 'S':[1,1,1], 'MS':[105,105,1]}
}
if args.data in data_parser.keys():
    data_info = data_parser[args.data]
    args.data_path = data_info['data']
    args.target = data_info['T']
    args.enc_in, args.dec_in, args.c_out = data_info[args.features]

In [None]:
args.detail_freq = args.freq
args.freq = args.freq[-1:]

In [None]:
print('Args in experiment:')
print(args)

In [None]:
Exp = Exp_Informer

In [None]:
for ii in range(args.itr):
    # setting record of experiments
    setting = '{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_at{}_fc{}_eb{}_dt{}_{}_{}'.format(args.model, args.data, args.features, 
                args.seq_len, args.label_len, args.pred_len,
                args.d_model, args.n_heads, args.e_layers, args.d_layers, args.d_ff, args.attn, args.factor, args.embed, args.distil, args.des, ii)

    # set experiments
    exp = Exp(args)
    
    # train
    print('>>>>>>>start training : {}>>>>>>>>>>>>>>>>>>>>>>>>>>'.format(setting))
    exp.train(setting)
    
    # test
    print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
    exp.test(setting)

    torch.cuda.empty_cache()

setting = 'informer_custom_ftMS_sl96_ll48_pl24_dm512_nh8_el2_dl1_df2048_atprob_fc5_ebtimeF_dtTrue_exp_0'

In [None]:
import numpy as np
import os

In [None]:
exp = Exp(args)
#exp.predict(setting, True)

In [None]:
# here is the detailed code of function predict

def predict(exp, setting, load=False):
    pred_data, pred_loader = exp._get_data(flag='pred')
        
    if load:
        path = os.path.join(exp.args.checkpoints, setting)
        best_model_path = path+'/'+'checkpoint.pth'
        exp.model.load_state_dict(torch.load(best_model_path))

    exp.model.eval()
        
    preds = []
        
    for i, (batch_x,batch_y,batch_x_mark,batch_y_mark) in enumerate(pred_loader):
        batch_x = batch_x.float().to(exp.device)
        batch_y = batch_y.float()
        batch_x_mark = batch_x_mark.float().to(exp.device)
        batch_y_mark = batch_y_mark.float().to(exp.device)

        # decoder input
        dec_inp = torch.zeros_like(batch_y[:,-exp.args.pred_len:,:]).float()
        dec_inp = torch.cat([batch_y[:,:exp.args.label_len,:], dec_inp], dim=1).float().to(exp.device)
        # encoder - decoder
        if exp.args.use_amp:
            with torch.cuda.amp.autocast():
                if exp.args.output_attention:
                    outputs = exp.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                else:
                    outputs = exp.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
        else:
            if exp.args.output_attention:
                outputs = exp.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
            else:
                outputs = exp.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
        f_dim = -1 if exp.args.features=='MS' else 0
        batch_y = batch_y[:,-exp.args.pred_len:,f_dim:].to(exp.device)
        
        pred = outputs.detach().cpu().numpy()#.squeeze()
        
        preds.append(pred)

    preds = np.array(preds)
    preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1])
    
    # result save
    folder_path = './results/' + setting +'/'
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    
    np.save(folder_path+'real_prediction.npy', preds)
    
    return preds


In [None]:
# you can also use this prediction function to get result
prediction = predict(exp, setting, False)

In [None]:
import matplotlib.pyplot as plt

plt.figure()
plt.plot(prediction[0,:,-1])
plt.show()

In [None]:
from data.data_loader import Dataset_Pred
from torch.utils.data import DataLoader

In [None]:
Data = Dataset_Pred
timeenc = 0 if args.embed!='timeF' else 1
flag = 'pred'; shuffle_flag = False; drop_last = False; batch_size = 1

freq = args.detail_freq

data_set = Data(
    root_path=args.root_path,
    data_path=args.data_path,
    flag=flag,
    size=[args.seq_len, args.label_len, args.pred_len],
    features=args.features,
    target=args.target,
    timeenc=timeenc,
    freq=freq
)
data_loader = DataLoader(
    data_set,
    batch_size=batch_size,
    shuffle=shuffle_flag,
    num_workers=args.num_workers,
    drop_last=drop_last)

In [None]:
len(data_set), len(data_loader)

### 視覚化

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [None]:
# When we finished exp.train(setting) and exp.test(setting), we will get a trained model and the results of test experiment
# The results of test experiment will be saved in ./results/{setting}/pred.npy (prediction of test dataset) and ./results/{setting}/true.npy (groundtruth of test dataset)

preds = np.load('./results/'+setting+'/pred.npy')
trues = np.load('./results/'+setting+'/true.npy')
reals = np.load('./results/'+setting+'/real_prediction.npy')

# [samples, pred_len, dimensions]
preds.shape, trues.shape, reals.shape

In [None]:
from utils.tools import StandardScaler

In [None]:
scaler = StandardScaler()
new_pred = scaler.inverse_transform(preds)

In [None]:
print(new_pred.shape)

In [None]:
plt.plot(new_pred[:,:,-1])

In [None]:
SAVE_PASS = 'result_img/' + setting
os.makedirs(SAVE_PASS, exist_ok=True)
filename = os.path.join(SAVE_PASS, setting+'.png')
print(filename)

In [None]:
# draw OT prediction
plt.figure()
plt.plot(trues[:,:,-1], label='GroundTruth')
plt.plot(preds[:,:,-1], label='Prediction')
plt.legend()
plt.savefig(filename, dpi=200)
plt.show()

In [None]:
df['A_temp'].plot()

In [None]:
df['A_temp'].tail()