In [5]:
"""
実行時に設定するパラメータ
"""

# Informerの学習パラメータ
# Informerを学習するときに使用したパラメータの文字列をここで代入する
#ARG_STR = "--model informer --data ETTh1 --attn prob --freq h --checkpoints 'checkpoints/ETTh1_sample30_window30' --features S  --e_layers 1  --d_layers 1 --dropout 0.3 --learning_rate 0.0001 --embed timeF"
ARG_STR = "--model informer --data NaturalGas --root_path './Informer2020/data/NaturalGas/' --data_path combined_data.csv --features S --attn prob --freq h --checkpoints 'checkpoints/NaturalGas_sample30_window30' --e_layers 1  --d_layers 1 --dropout 0.3 --learning_rate 0.0001 --embed timeF"

# 学習済みのInformerモデルが保存されているパス
INFORMER_CKPT_PATH = "checkpoints/informer-small.pth"

In [6]:
import sys
sys.path.append("Informer2020")

import argparse
import random
import tqdm
import numpy as np
import torch
import pandas as pd
import os

import dataset
from model.informer_model import InformerModel
from model.model import Model
from model.moment_model import MomentModel
from propose import ProposedModelWithMoe, ProposedModel
from evaluation import evaluate_mse, evaluate_nll

from main_informer import parse_args

In [7]:
def set_seed(seed: int) -> None:
    # random
    random.seed(seed)

    # numpy
    np.random.seed(seed)

    # pytorch
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.mps.manual_seed(seed)
    torch.backends.cudnn.deterministic = True


def load_moment_model(args: argparse.Namespace) -> Model:
    return MomentModel(param="AutonLab/MOMENT-1-large", pred_len=args.pred_len)


def load_moment_model_finetuned(
    args: argparse.Namespace,
    train_dataset: torch.utils.data.Dataset,
    valid_dataset: torch.utils.data.Dataset,
) -> Model:
    model: MomentModel = load_moment_model(args=args)
    model.fine_tuning(train_dataset=train_dataset, valid_dataset=valid_dataset, args=args)
    return model


def load_informer_model(args: argparse.Namespace) -> Model:
    return InformerModel(args, checkpoint_path=INFORMER_CKPT_PATH)


def load_proposed_model(moment_model: Model, informer_model: Model) -> Model:
    model = ProposedModel(moment_model=moment_model, informer_model=informer_model)
    return model


def load_proposed_model_with_moe(
    moment_model: Model,
    informer_model: Model,
    input_size: int,
    train_dataset: torch.utils.data.Dataset,
    valid_dataset: torch.utils.data.Dataset,
    args: argparse.Namespace,
) -> Model:
    model = ProposedModelWithMoe(
        moment_model=moment_model,
        informer_model=informer_model,
        input_size=input_size,
    )
    model.train(train_dataset=train_dataset, valid_dataset=valid_dataset, args=args)
    return model

In [8]:
set_seed(0)

args = parse_args(ARG_STR)
save_file_name = args.checkpoints
if not os.path.exists(save_file_name):
    os.mkdir(save_file_name)
print("args:", args)

train_dataset, valid_dataset, test_dataset = dataset.load_dataset(args=args)
input_size = args.seq_len

moment_model = load_moment_model(args=args)
informer_model = load_informer_model(args=args)
proposed_model = load_proposed_model(moment_model, informer_model,input_size,train_dataset,args)

args: Namespace(model='informer', data='NaturalGas', root_path='./Informer2020/data/NaturalGas/', data_path='combined_data.csv', features='S', target='actual_wdl_gj', freq='h', checkpoints='checkpoints/NaturalGas_sample30_window30', seq_len=96, label_len=48, pred_len=24, enc_in=1, dec_in=1, c_out=1, d_model=512, n_heads=8, e_layers=1, d_layers=1, s_layers=[3, 2, 1], d_ff=2048, factor=5, padding=0, distil=True, dropout=0.3, attn='prob', embed='timeF', activation='gelu', output_attention=False, do_predict=False, mix=True, cols=None, num_workers=0, itr=2, train_epochs=6, batch_size=32, patience=3, learning_rate=0.0001, des='test', loss='mse', lradj='type1', use_amp=False, inverse=False, use_gpu=False, gpu=0, use_multi_gpu=False, devices='0,1,2,3', detail_freq='h')
[test] self.target actual_wdl_gj
[test] cols ['date', 'schedule_interval', 'transmission_id', 'sched_inj_gj', 'sched_wdl_gj', 'price_value', 'administered_price', 'actual_wdl_gj', 'actual_inj_gj']
train 6275
[test] self.target a

RuntimeError: Error(s) in loading state_dict for Informer:
	size mismatch for enc_embedding.value_embedding.tokenConv.weight: copying a param with shape torch.Size([16, 1, 3]) from checkpoint, the shape in current model is torch.Size([512, 1, 3]).
	size mismatch for enc_embedding.value_embedding.tokenConv.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for enc_embedding.position_embedding.pe: copying a param with shape torch.Size([1, 5000, 16]) from checkpoint, the shape in current model is torch.Size([1, 5000, 512]).
	size mismatch for enc_embedding.temporal_embedding.embed.weight: copying a param with shape torch.Size([16, 4]) from checkpoint, the shape in current model is torch.Size([512, 4]).
	size mismatch for enc_embedding.temporal_embedding.embed.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for dec_embedding.value_embedding.tokenConv.weight: copying a param with shape torch.Size([16, 1, 3]) from checkpoint, the shape in current model is torch.Size([512, 1, 3]).
	size mismatch for dec_embedding.value_embedding.tokenConv.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for dec_embedding.position_embedding.pe: copying a param with shape torch.Size([1, 5000, 16]) from checkpoint, the shape in current model is torch.Size([1, 5000, 512]).
	size mismatch for dec_embedding.temporal_embedding.embed.weight: copying a param with shape torch.Size([16, 4]) from checkpoint, the shape in current model is torch.Size([512, 4]).
	size mismatch for dec_embedding.temporal_embedding.embed.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for encoder.attn_layers.0.attention.query_projection.weight: copying a param with shape torch.Size([16, 16]) from checkpoint, the shape in current model is torch.Size([512, 512]).
	size mismatch for encoder.attn_layers.0.attention.query_projection.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for encoder.attn_layers.0.attention.key_projection.weight: copying a param with shape torch.Size([16, 16]) from checkpoint, the shape in current model is torch.Size([512, 512]).
	size mismatch for encoder.attn_layers.0.attention.key_projection.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for encoder.attn_layers.0.attention.value_projection.weight: copying a param with shape torch.Size([16, 16]) from checkpoint, the shape in current model is torch.Size([512, 512]).
	size mismatch for encoder.attn_layers.0.attention.value_projection.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for encoder.attn_layers.0.attention.out_projection.weight: copying a param with shape torch.Size([16, 16]) from checkpoint, the shape in current model is torch.Size([512, 512]).
	size mismatch for encoder.attn_layers.0.attention.out_projection.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for encoder.attn_layers.0.conv1.weight: copying a param with shape torch.Size([32, 16, 1]) from checkpoint, the shape in current model is torch.Size([2048, 512, 1]).
	size mismatch for encoder.attn_layers.0.conv1.bias: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([2048]).
	size mismatch for encoder.attn_layers.0.conv2.weight: copying a param with shape torch.Size([16, 32, 1]) from checkpoint, the shape in current model is torch.Size([512, 2048, 1]).
	size mismatch for encoder.attn_layers.0.conv2.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for encoder.attn_layers.0.norm1.weight: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for encoder.attn_layers.0.norm1.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for encoder.attn_layers.0.norm2.weight: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for encoder.attn_layers.0.norm2.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for encoder.norm.weight: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for encoder.norm.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for decoder.layers.0.self_attention.query_projection.weight: copying a param with shape torch.Size([16, 16]) from checkpoint, the shape in current model is torch.Size([512, 512]).
	size mismatch for decoder.layers.0.self_attention.query_projection.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for decoder.layers.0.self_attention.key_projection.weight: copying a param with shape torch.Size([16, 16]) from checkpoint, the shape in current model is torch.Size([512, 512]).
	size mismatch for decoder.layers.0.self_attention.key_projection.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for decoder.layers.0.self_attention.value_projection.weight: copying a param with shape torch.Size([16, 16]) from checkpoint, the shape in current model is torch.Size([512, 512]).
	size mismatch for decoder.layers.0.self_attention.value_projection.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for decoder.layers.0.self_attention.out_projection.weight: copying a param with shape torch.Size([16, 16]) from checkpoint, the shape in current model is torch.Size([512, 512]).
	size mismatch for decoder.layers.0.self_attention.out_projection.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for decoder.layers.0.cross_attention.query_projection.weight: copying a param with shape torch.Size([16, 16]) from checkpoint, the shape in current model is torch.Size([512, 512]).
	size mismatch for decoder.layers.0.cross_attention.query_projection.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for decoder.layers.0.cross_attention.key_projection.weight: copying a param with shape torch.Size([16, 16]) from checkpoint, the shape in current model is torch.Size([512, 512]).
	size mismatch for decoder.layers.0.cross_attention.key_projection.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for decoder.layers.0.cross_attention.value_projection.weight: copying a param with shape torch.Size([16, 16]) from checkpoint, the shape in current model is torch.Size([512, 512]).
	size mismatch for decoder.layers.0.cross_attention.value_projection.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for decoder.layers.0.cross_attention.out_projection.weight: copying a param with shape torch.Size([16, 16]) from checkpoint, the shape in current model is torch.Size([512, 512]).
	size mismatch for decoder.layers.0.cross_attention.out_projection.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for decoder.layers.0.conv1.weight: copying a param with shape torch.Size([32, 16, 1]) from checkpoint, the shape in current model is torch.Size([2048, 512, 1]).
	size mismatch for decoder.layers.0.conv1.bias: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([2048]).
	size mismatch for decoder.layers.0.conv2.weight: copying a param with shape torch.Size([16, 32, 1]) from checkpoint, the shape in current model is torch.Size([512, 2048, 1]).
	size mismatch for decoder.layers.0.conv2.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for decoder.layers.0.norm1.weight: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for decoder.layers.0.norm1.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for decoder.layers.0.norm2.weight: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for decoder.layers.0.norm2.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for decoder.layers.0.norm3.weight: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for decoder.layers.0.norm3.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for decoder.norm.weight: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for decoder.norm.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for projection.weight: copying a param with shape torch.Size([1, 16]) from checkpoint, the shape in current model is torch.Size([1, 512]).

In [5]:
proposed_model_with_moe = load_proposed_model_with_moe(moment_model, informer_model,input_size,train_dataset,args)

100%|██████████| 196/196 [08:43<00:00,  2.67s/it]

Epoch [1/1], Loss: 0.2007





In [6]:
# 予測結果の評価
print("args:", args)
results = {}

for method, model in {
    "informer": informer_model,
    "moment": moment_model,
    "proposed": proposed_model,
    "proposed+moe": proposed_model_with_moe,
}.items():
    print(f"testing: {method}")
    test_dataloader = dataset.to_dataloader(test_dataset, args, "test")

    y_pred = []
    y_true = []
    for index, batch in tqdm.tqdm(test_dataloader):
        y_pred.append(model.predict_distr(batch).detach().tolist())
        y_true.append(batch[1][:, -1].squeeze().detach().tolist())

    y_pred, y_true = np.array(y_pred).reshape(-1, 2), np.array(y_true).flatten()
    results[method] = {
        "mse": evaluate_mse(y_pred, y_true),
        "nll": evaluate_nll(y_pred, y_true),
    }
    print(results[method])

    np.save(f"checkpoints/{args.data}/y_pred_{method}.npy", y_pred)

results

args: Namespace(model='informer', data='NaturalGas', root_path='./Informer2020/data/NaturalGas/', data_path='combined_data.csv', features='S', target='actual_wdl_gj', freq='h', checkpoints='checkpoints/NaturalGas_sample30_window30', seq_len=96, label_len=48, pred_len=24, enc_in=1, dec_in=1, c_out=1, d_model=512, n_heads=8, e_layers=1, d_layers=1, s_layers=[3, 2, 1], d_ff=2048, factor=5, padding=0, distil=True, dropout=0.3, attn='prob', embed='timeF', activation='gelu', output_attention=False, do_predict=False, mix=True, cols=None, num_workers=0, itr=2, train_epochs=6, batch_size=32, patience=3, learning_rate=0.0001, des='test', loss='mse', lradj='type1', use_amp=False, inverse=False, use_gpu=False, gpu=0, use_multi_gpu=False, devices='0,1,2,3', detail_freq='h')
testing: informer


100%|██████████| 56/56 [03:51<00:00,  4.14s/it]


{'mse': 0.2771660422917495, 'nll': 211.86042112751724}
testing: moment


100%|██████████| 56/56 [1:14:28<00:00, 79.79s/it]


{'mse': 0.2898083649419951, 'nll': 237.83403569447142}
testing: proposed


100%|██████████| 56/56 [1:18:16<00:00, 83.86s/it]


{'mse': 0.26338581409654455, 'nll': 175.2089096087653}
testing: proposed+moe


100%|██████████| 56/56 [1:18:21<00:00, 83.96s/it]

{'mse': 0.2735648845773251, 'nll': 440.9727333241699}





{'informer': {'mse': 0.2771660422917495, 'nll': 211.86042112751724},
 'moment': {'mse': 0.2898083649419951, 'nll': 237.83403569447142},
 'proposed': {'mse': 0.26338581409654455, 'nll': 175.2089096087653},
 'proposed+moe': {'mse': 0.2735648845773251, 'nll': 440.9727333241699}}

In [7]:
pd.DataFrame(results).to_csv("results.csv", index=False)