Прогнозирование Trans-MAF нерегулярных временных рядов.

## Imports

In [1]:
import copy
import json
import os
import pickle
import sys
import warnings

sys.path.append('../pytorch-ts')

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import seaborn as sns
import torch
from gluonts.dataset.multivariate_grouper import MultivariateGrouper
from gluonts.dataset.repository.datasets import dataset_recipes, get_dataset
from gluonts.dataset.split import split
from gluonts.dataset.util import to_pandas
from gluonts.evaluation import MultivariateEvaluator
from gluonts.evaluation.backtest import make_evaluation_predictions
from pts import Trainer
from pts.model.transformer_tempflow import TransformerTempFlowEstimator

In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Read Datasets

In [12]:
exchange = get_dataset("exchange_rate", regenerate=False)

solar = get_dataset("solar_nips", regenerate=False)

electricity = get_dataset("electricity_nips", regenerate=False)

In [13]:
# electricity
electricity_train_grouper = MultivariateGrouper(
    max_target_dim=min(2000, int(electricity.metadata.feat_static_cat[0].cardinality))
    )

electricity_test_grouper = MultivariateGrouper(
    num_test_dates=int(len(electricity.test) / len(electricity.train)), 
    max_target_dim=min(2000, int(electricity.metadata.feat_static_cat[0].cardinality))
    )

# solar
solar_train_grouper = MultivariateGrouper(
    max_target_dim=min(2000, int(solar.metadata.feat_static_cat[0].cardinality))
    )

solar_test_grouper = MultivariateGrouper(
    num_test_dates=int(len(solar.test) / len(solar.train)), 
    max_target_dim=min(2000, int(solar.metadata.feat_static_cat[0].cardinality))
    )

# exchange
exchange_train_grouper = MultivariateGrouper(
    max_target_dim=min(2000, int(exchange.metadata.feat_static_cat[0].cardinality))
    )

exchange_test_grouper = MultivariateGrouper(
    num_test_dates=int(len(exchange.test) / len(exchange.train)), 
    max_target_dim=min(2000, int(exchange.metadata.feat_static_cat[0].cardinality))
    )

In [14]:
# solar
solar_dataset_train = solar_train_grouper(solar.train)
solar_dataset_test = solar_test_grouper(solar.test)

# electricity
electricity_dataset_train = electricity_train_grouper(electricity.train)
electricity_dataset_test = electricity_test_grouper(electricity.test)

# exchange
exchange_dataset_train = exchange_train_grouper(exchange.train)
exchange_dataset_test = exchange_test_grouper(exchange.test)

## Train models

In [18]:
RUN_FOLDER = './reports/maf/run_02'

MODELS_FOLDER = os.path.join(RUN_FOLDER, 'models')

PREDICTIONS_FOLDER = os.path.join(RUN_FOLDER, 'predictions')

for folder in [RUN_FOLDER, MODELS_FOLDER, PREDICTIONS_FOLDER]:
    if not os.path.exists(folder):
        os.mkdir(folder)

### Solar

#### Train

In [63]:
%%time

train_size = solar_dataset_train[0]['target'].shape[1]
n_series = solar_dataset_train[0]['target'].shape[0]

for i in np.linspace(0.1, 0.8, 8):
    
    print(f"\n### NaNs probability = {i} ###\n")
    
    train = copy.copy(solar_dataset_train)
    test = copy.copy(solar_dataset_test)

    nans_prob = np.round(i, 1)

    for j in range(n_series):

        # take random indices
        idxs = np.random.choice(range(0, train_size), size=int(nans_prob * train_size), replace=False)

        # nan train values
        train[0]['target'][j, idxs] = np.nan

        # nan test values
        for k in range(len(test)):
            test[k]['target'][j, idxs] = np.nan
    
    train = list(train) * 100

    # train
    estimator = TransformerTempFlowEstimator(
        input_size=278,
        target_dim=int(solar.metadata.feat_static_cat[0].cardinality),
        prediction_length=solar.metadata.prediction_length,
        context_length=solar.metadata.prediction_length * 4,
        flow_type='MAF',
        dequantize=True,
        freq=solar.metadata.freq,
        trainer=Trainer(
            device='cpu',
            epochs=14,
            learning_rate=1e-3,
            num_batches_per_epoch=100,
            batch_size=64,
        )
    )

    predictor = estimator.train(train, num_workers=4)

    # save model
    torch.save(predictor, os.path.join(MODELS_FOLDER, f"estimator__solar_{nans_prob}.ckpt"))

    solar_dataset_train = solar_train_grouper(solar.train)
    solar_dataset_test = solar_test_grouper(solar.test)


### NaNs probability = 0.5 ###



100%|██████████| 99/99 [01:10<00:00,  1.41it/s, epoch=1/14, avg_loss=58.3]
100%|██████████| 99/99 [01:10<00:00,  1.40it/s, epoch=2/14, avg_loss=-117] 
100%|██████████| 99/99 [01:11<00:00,  1.39it/s, epoch=3/14, avg_loss=-136]
100%|██████████| 99/99 [01:11<00:00,  1.39it/s, epoch=4/14, avg_loss=-140]
100%|██████████| 99/99 [01:10<00:00,  1.40it/s, epoch=5/14, avg_loss=-144]
100%|██████████| 99/99 [01:09<00:00,  1.41it/s, epoch=6/14, avg_loss=-147]
100%|██████████| 99/99 [01:10<00:00,  1.40it/s, epoch=7/14, avg_loss=-149]
100%|██████████| 99/99 [01:11<00:00,  1.39it/s, epoch=8/14, avg_loss=-150]
100%|██████████| 99/99 [01:10<00:00,  1.40it/s, epoch=9/14, avg_loss=-152]
100%|██████████| 99/99 [01:10<00:00,  1.40it/s, epoch=10/14, avg_loss=-153]
100%|██████████| 99/99 [01:10<00:00,  1.40it/s, epoch=11/14, avg_loss=-153]
100%|██████████| 99/99 [01:10<00:00,  1.40it/s, epoch=12/14, avg_loss=-154]
100%|██████████| 99/99 [01:10<00:00,  1.40it/s, epoch=13/14, avg_loss=-154]
100%|██████████| 99/


### NaNs probability = 0.6 ###



100%|██████████| 99/99 [01:08<00:00,  1.44it/s, epoch=1/14, avg_loss=41.3]
100%|██████████| 99/99 [01:10<00:00,  1.40it/s, epoch=2/14, avg_loss=-126]
100%|██████████| 99/99 [01:10<00:00,  1.40it/s, epoch=3/14, avg_loss=-133]
100%|██████████| 99/99 [01:10<00:00,  1.40it/s, epoch=4/14, avg_loss=-139]
100%|██████████| 99/99 [01:10<00:00,  1.41it/s, epoch=5/14, avg_loss=-143]
100%|██████████| 99/99 [01:10<00:00,  1.40it/s, epoch=6/14, avg_loss=-146]
100%|██████████| 99/99 [01:10<00:00,  1.40it/s, epoch=7/14, avg_loss=-148]
100%|██████████| 99/99 [01:10<00:00,  1.40it/s, epoch=8/14, avg_loss=-149]
100%|██████████| 99/99 [01:10<00:00,  1.40it/s, epoch=9/14, avg_loss=-151]
100%|██████████| 99/99 [01:10<00:00,  1.40it/s, epoch=10/14, avg_loss=-152]
100%|██████████| 99/99 [01:10<00:00,  1.40it/s, epoch=11/14, avg_loss=-153]
100%|██████████| 99/99 [01:10<00:00,  1.40it/s, epoch=12/14, avg_loss=-154]
100%|██████████| 99/99 [01:10<00:00,  1.40it/s, epoch=13/14, avg_loss=-154]
100%|██████████| 99/9


### NaNs probability = 0.7000000000000001 ###



100%|██████████| 99/99 [01:08<00:00,  1.45it/s, epoch=1/14, avg_loss=39.1]
100%|██████████| 99/99 [01:10<00:00,  1.41it/s, epoch=2/14, avg_loss=-125]
100%|██████████| 99/99 [01:09<00:00,  1.42it/s, epoch=3/14, avg_loss=-136]
100%|██████████| 99/99 [01:10<00:00,  1.40it/s, epoch=4/14, avg_loss=-105] 
100%|██████████| 99/99 [01:10<00:00,  1.40it/s, epoch=5/14, avg_loss=-128]
100%|██████████| 99/99 [01:10<00:00,  1.41it/s, epoch=6/14, avg_loss=-135]
100%|██████████| 99/99 [01:10<00:00,  1.41it/s, epoch=7/14, avg_loss=-140]
100%|██████████| 99/99 [01:10<00:00,  1.40it/s, epoch=8/14, avg_loss=-142]
100%|██████████| 99/99 [01:13<00:00,  1.35it/s, epoch=9/14, avg_loss=-143]
100%|██████████| 99/99 [01:11<00:00,  1.38it/s, epoch=10/14, avg_loss=-145]
100%|██████████| 99/99 [01:10<00:00,  1.41it/s, epoch=11/14, avg_loss=-145]
100%|██████████| 99/99 [01:10<00:00,  1.40it/s, epoch=12/14, avg_loss=-146]
100%|██████████| 99/99 [01:10<00:00,  1.40it/s, epoch=13/14, avg_loss=-147]
100%|██████████| 99/


### NaNs probability = 0.8 ###



100%|██████████| 99/99 [01:08<00:00,  1.45it/s, epoch=1/14, avg_loss=49.4]
100%|██████████| 99/99 [01:10<00:00,  1.40it/s, epoch=2/14, avg_loss=-105] 
100%|██████████| 99/99 [01:10<00:00,  1.40it/s, epoch=3/14, avg_loss=-120]
100%|██████████| 99/99 [01:10<00:00,  1.41it/s, epoch=4/14, avg_loss=-126]
100%|██████████| 99/99 [01:10<00:00,  1.41it/s, epoch=5/14, avg_loss=-132]
100%|██████████| 99/99 [01:10<00:00,  1.40it/s, epoch=6/14, avg_loss=-139]
100%|██████████| 99/99 [01:10<00:00,  1.41it/s, epoch=7/14, avg_loss=-143]
100%|██████████| 99/99 [01:10<00:00,  1.41it/s, epoch=8/14, avg_loss=-147]
100%|██████████| 99/99 [01:10<00:00,  1.41it/s, epoch=9/14, avg_loss=-151]
100%|██████████| 99/99 [01:10<00:00,  1.41it/s, epoch=10/14, avg_loss=-155]
100%|██████████| 99/99 [01:10<00:00,  1.40it/s, epoch=11/14, avg_loss=-155]
100%|██████████| 99/99 [01:10<00:00,  1.41it/s, epoch=12/14, avg_loss=-157]
100%|██████████| 99/99 [01:10<00:00,  1.40it/s, epoch=13/14, avg_loss=-159]
100%|██████████| 99/

CPU times: user 36min 44s, sys: 16min 7s, total: 52min 52s
Wall time: 1h 6min 9s


#### Predict

In [29]:
n_series = solar_dataset_train[0]['target'].shape[0]
train_size = solar_dataset_train[0]['target'].shape[1]

for i in np.linspace(0.1, 0.8, 8):
    print(f"\n### NaNs probability = {i} ###\n")

    nans_prob = np.round(i, 1)

    predictor = torch.load(os.path.join(MODELS_FOLDER, f"estimator__solar_{nans_prob}.ckpt"))

    test = copy.copy(solar_dataset_test)
    
    for j in range(n_series):

        # take random indices
        idxs = np.random.choice(range(0, train_size), size=int(nans_prob * train_size), replace=False)

        # nan test values
        for k in range(len(test)):
            test[k]['target'][j, idxs] = np.nan

    success = 0
    num_samples = 8

    while success != 1 and num_samples != 100:
        try:
            # evaluate model
            evaluator = MultivariateEvaluator(
                quantiles=(np.arange(20)/20.0)[1:],
                target_agg_funcs={'sum': np.sum}
            )

            forecast_it, ts_it = make_evaluation_predictions(
                dataset=test,
                predictor=predictor,
                num_samples=num_samples
            )

            forecasts = list(forecast_it)
            targets = list(ts_it)

            agg_metric, _ = evaluator(targets, forecasts, num_series=len(test))

            # save metrics
            with open(os.path.join(PREDICTIONS_FOLDER, f"agg_metrics__solar_{nans_prob}.json"), 'w') as fp:
                json.dump(agg_metric, fp)
            
            # save forecasts
            with open(os.path.join(PREDICTIONS_FOLDER, f"targets__solar_{nans_prob}.json"), 'wb') as handle:
                pickle.dump(targets, handle, protocol=pickle.HIGHEST_PROTOCOL)
                
            with open(os.path.join(PREDICTIONS_FOLDER, f"forecasts__solar_{nans_prob}.json"), 'wb') as handle:
                pickle.dump(forecasts, handle, protocol=pickle.HIGHEST_PROTOCOL)

            success = 1

        except:
            num_samples += 1

    if num_samples == 100 and success == 0:
        print(print(f"\n### NaNs probability = {i} NOT SUCCESS ###\n"))
        
    solar_dataset_test = solar_test_grouper(solar.test)


### NaNs probability = 0.4 ###



Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [


### NaNs probability = 0.4 NOT SUCCESS ###

None

### NaNs probability = 0.5 ###



Running evaluation: 7it [00:00, 229.10it/s]
Running evaluation: 7it [00:00, 246.74it/s]
Running evaluation: 7it [00:00, 253.88it/s]
Running evaluation: 7it [00:00, 250.05it/s]
Running evaluation: 7it [00:00, 253.30it/s]
Running evaluation: 7it [00:00, 257.99it/s]
Running evaluation: 7it [00:00, 256.46it/s]
Running evaluation: 7it [00:00, 260.55it/s]
Running evaluation: 7it [00:00, 253.89it/s]
Running evaluation: 7it [00:00, 246.99it/s]
Running evaluation: 7it [00:00, 252.28it/s]
Running evaluation: 7it [00:00, 248.26it/s]
Running evaluation: 7it [00:00, 251.75it/s]
Running evaluation: 7it [00:00, 248.71it/s]
Running evaluation: 7it [00:00, 245.36it/s]
Running evaluation: 7it [00:00, 259.98it/s]
Running evaluation: 7it [00:00, 266.64it/s]
Running evaluation: 7it [00:00, 246.58it/s]
Running evaluation: 7it [00:00, 252.78it/s]
Running evaluation: 7it [00:00, 260.10it/s]
Running evaluation: 7it [00:00, 253.68it/s]
Running evaluation: 7it [00:00, 249.92it/s]
Running evaluation: 7it [00:00, 


### NaNs probability = 0.6000000000000001 ###



Running evaluation: 7it [00:00, 258.46it/s]
Running evaluation: 7it [00:00, 219.17it/s]
Running evaluation: 7it [00:00, 256.36it/s]
Running evaluation: 7it [00:00, 245.79it/s]
Running evaluation: 7it [00:00, 253.29it/s]
Running evaluation: 7it [00:00, 258.71it/s]
Running evaluation: 7it [00:00, 241.76it/s]
Running evaluation: 7it [00:00, 248.58it/s]
Running evaluation: 7it [00:00, 234.70it/s]
Running evaluation: 7it [00:00, 249.55it/s]
Running evaluation: 7it [00:00, 245.26it/s]
Running evaluation: 7it [00:00, 247.98it/s]
Running evaluation: 7it [00:00, 243.89it/s]
Running evaluation: 7it [00:00, 251.30it/s]
Running evaluation: 7it [00:00, 251.83it/s]
Running evaluation: 7it [00:00, 254.66it/s]
Running evaluation: 7it [00:00, 254.73it/s]
Running evaluation: 7it [00:00, 249.29it/s]
Running evaluation: 7it [00:00, 249.69it/s]
Running evaluation: 7it [00:00, 258.02it/s]
Running evaluation: 7it [00:00, 251.10it/s]
Running evaluation: 7it [00:00, 258.96it/s]
Running evaluation: 7it [00:00, 


### NaNs probability = 0.7000000000000001 ###



Running evaluation: 2it [00:00, 201.55it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 4it [00:00, 243.16it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 7it [00:00, 265.01it/s]
Running evaluation: 7it [00:00, 236.34it/s]
Running evaluation: 7it [00:00, 256.17it/s]
Running evaluation: 7it [00:00, 256.42it/s]
Running evaluation: 7it [00:00, 254.40it/s]
Running evaluation: 7it [00:00, 254.46it/s]
Running evaluation: 7it [00:00, 264.76it/s]
Running evaluation: 7it [00:00, 247.58it/s]
Running evaluation: 7it [00:00, 241.65it/s]
Running evaluation: 7it [00:00, 231.96it/s]
Running evaluation: 7it [00:00, 251.17it/s]
Running evaluation: 7it [00:00, 250.82it/s]
Running evaluation: 7it [00:00, 255.18it/s]
Running evaluation: 7it [00:00, 255.53it/s]
Running evaluation: 7it [00:00, 250.53it/s]
Running evaluation: 7it [00:00, 246.44it/s]
Running evaluation: 7it [00:00, 250.59it/s]
Running evaluation: 7it [00:00, 250.04it/s]
Running evaluation: 7it [00:00, 239.16it/s


### NaNs probability = 0.8 ###



Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [


### NaNs probability = 0.8 NOT SUCCESS ###

None


### Electricity

#### Train

In [90]:
%%time

train_size = electricity_dataset_train[0]['target'].shape[1]
test_size = electricity_dataset_test[0]['target'].shape[1]

n_series = electricity_dataset_train[0]['target'].shape[0]

for i in np.linspace(0.1, 0.8, 8):
    print(f"\n### NaNs probability = {i} ###\n")
    
    train = copy.copy(electricity_dataset_train)
    test = copy.copy(electricity_dataset_test)

    nans_prob = np.round(i, 1)

    for j in range(n_series):

        # take random indices
        idxs = np.random.choice(range(0, train_size), size=int(nans_prob * train_size), replace=False)

        # nan train values
        train[0]['target'][j, idxs] = np.nan

        # nan test values
        for k in range(len(test)):
            idxs = np.random.choice(range(0, test_size), size=int(nans_prob * test_size), replace=False)

            test[k]['target'][j, idxs] = np.nan
    
    train = list(train) * 100

    # train
    estimator = TransformerTempFlowEstimator(
        input_size=744,
        target_dim=int(electricity.metadata.feat_static_cat[0].cardinality),
        prediction_length=electricity.metadata.prediction_length,
        context_length=electricity.metadata.prediction_length * 4,
        flow_type='MAF',
        dequantize=True,
        freq=electricity.metadata.freq,
        trainer=Trainer(
            device='cpu',
            epochs=14,
            learning_rate=1e-3,
            num_batches_per_epoch=100,
            batch_size=64,
        )
    )

    predictor = estimator.train(train, num_workers=4)

    # save model
    torch.save(predictor, os.path.join(MODELS_FOLDER, f"estimator__electricity_{nans_prob}.ckpt"))

    electricity_dataset_train = electricity_train_grouper(electricity.train)
    electricity_dataset_test = electricity_test_grouper(electricity.test)


### NaNs probability = 0.1 ###



100%|██████████| 99/99 [01:13<00:00,  1.35it/s, epoch=1/14, avg_loss=265]
100%|██████████| 99/99 [01:17<00:00,  1.29it/s, epoch=2/14, avg_loss=215]
100%|██████████| 99/99 [01:16<00:00,  1.29it/s, epoch=3/14, avg_loss=208]
100%|██████████| 99/99 [01:15<00:00,  1.31it/s, epoch=4/14, avg_loss=200]
100%|██████████| 99/99 [01:15<00:00,  1.31it/s, epoch=5/14, avg_loss=192]
100%|██████████| 99/99 [01:14<00:00,  1.33it/s, epoch=6/14, avg_loss=184]
100%|██████████| 99/99 [01:14<00:00,  1.32it/s, epoch=7/14, avg_loss=178]
100%|██████████| 99/99 [01:16<00:00,  1.30it/s, epoch=8/14, avg_loss=172]
100%|██████████| 99/99 [01:16<00:00,  1.29it/s, epoch=9/14, avg_loss=166]
100%|██████████| 99/99 [01:17<00:00,  1.27it/s, epoch=10/14, avg_loss=161]
100%|██████████| 99/99 [01:17<00:00,  1.28it/s, epoch=11/14, avg_loss=156]
100%|██████████| 99/99 [01:16<00:00,  1.29it/s, epoch=12/14, avg_loss=152]
100%|██████████| 99/99 [01:16<00:00,  1.30it/s, epoch=13/14, avg_loss=150]
100%|██████████| 99/99 [01:17<00:0


### NaNs probability = 0.2 ###



100%|██████████| 99/99 [01:14<00:00,  1.33it/s, epoch=1/14, avg_loss=265]
100%|██████████| 99/99 [01:15<00:00,  1.31it/s, epoch=2/14, avg_loss=221]
100%|██████████| 99/99 [01:15<00:00,  1.30it/s, epoch=3/14, avg_loss=213]
100%|██████████| 99/99 [01:14<00:00,  1.32it/s, epoch=4/14, avg_loss=206]
100%|██████████| 99/99 [01:16<00:00,  1.29it/s, epoch=5/14, avg_loss=198]
100%|██████████| 99/99 [01:17<00:00,  1.28it/s, epoch=6/14, avg_loss=191]
100%|██████████| 99/99 [01:18<00:00,  1.27it/s, epoch=7/14, avg_loss=186]
100%|██████████| 99/99 [01:17<00:00,  1.27it/s, epoch=8/14, avg_loss=179]
100%|██████████| 99/99 [01:17<00:00,  1.28it/s, epoch=9/14, avg_loss=174]
100%|██████████| 99/99 [01:17<00:00,  1.28it/s, epoch=10/14, avg_loss=169]
100%|██████████| 99/99 [01:17<00:00,  1.28it/s, epoch=11/14, avg_loss=165]
100%|██████████| 99/99 [01:17<00:00,  1.28it/s, epoch=12/14, avg_loss=161]
100%|██████████| 99/99 [01:16<00:00,  1.30it/s, epoch=13/14, avg_loss=159]
100%|██████████| 99/99 [01:18<00:0


### NaNs probability = 0.30000000000000004 ###



100%|██████████| 99/99 [01:16<00:00,  1.29it/s, epoch=1/14, avg_loss=301]
100%|██████████| 99/99 [01:16<00:00,  1.29it/s, epoch=2/14, avg_loss=263]
100%|██████████| 99/99 [01:15<00:00,  1.31it/s, epoch=3/14, avg_loss=258]
100%|██████████| 99/99 [01:16<00:00,  1.29it/s, epoch=4/14, avg_loss=251]
100%|██████████| 99/99 [01:17<00:00,  1.27it/s, epoch=5/14, avg_loss=245]
100%|██████████| 99/99 [01:18<00:00,  1.27it/s, epoch=6/14, avg_loss=240]
100%|██████████| 99/99 [01:17<00:00,  1.28it/s, epoch=7/14, avg_loss=236]
100%|██████████| 99/99 [01:16<00:00,  1.29it/s, epoch=8/14, avg_loss=232]
100%|██████████| 99/99 [01:18<00:00,  1.27it/s, epoch=9/14, avg_loss=228]
100%|██████████| 99/99 [01:15<00:00,  1.30it/s, epoch=10/14, avg_loss=225]
100%|██████████| 99/99 [01:19<00:00,  1.25it/s, epoch=11/14, avg_loss=223]
100%|██████████| 99/99 [01:21<00:00,  1.22it/s, epoch=12/14, avg_loss=221]
100%|██████████| 99/99 [01:19<00:00,  1.25it/s, epoch=13/14, avg_loss=219]
100%|██████████| 99/99 [01:18<00:0


### NaNs probability = 0.4 ###



100%|██████████| 99/99 [01:20<00:00,  1.23it/s, epoch=1/14, avg_loss=313]
100%|██████████| 99/99 [01:21<00:00,  1.22it/s, epoch=2/14, avg_loss=283]
100%|██████████| 99/99 [01:21<00:00,  1.22it/s, epoch=3/14, avg_loss=278]
100%|██████████| 99/99 [01:21<00:00,  1.22it/s, epoch=4/14, avg_loss=274]
100%|██████████| 99/99 [01:21<00:00,  1.22it/s, epoch=5/14, avg_loss=267]
100%|██████████| 99/99 [01:21<00:00,  1.22it/s, epoch=6/14, avg_loss=265]
100%|██████████| 99/99 [01:23<00:00,  1.19it/s, epoch=7/14, avg_loss=259]
100%|██████████| 99/99 [01:23<00:00,  1.19it/s, epoch=8/14, avg_loss=256]
100%|██████████| 99/99 [01:23<00:00,  1.19it/s, epoch=9/14, avg_loss=253]
100%|██████████| 99/99 [01:23<00:00,  1.19it/s, epoch=10/14, avg_loss=250]
100%|██████████| 99/99 [01:21<00:00,  1.21it/s, epoch=11/14, avg_loss=248]
100%|██████████| 99/99 [01:20<00:00,  1.22it/s, epoch=12/14, avg_loss=247]
100%|██████████| 99/99 [01:22<00:00,  1.21it/s, epoch=13/14, avg_loss=246]
100%|██████████| 99/99 [01:21<00:0


### NaNs probability = 0.5 ###



100%|██████████| 99/99 [01:23<00:00,  1.19it/s, epoch=1/14, avg_loss=320]
100%|██████████| 99/99 [01:25<00:00,  1.16it/s, epoch=2/14, avg_loss=290]
100%|██████████| 99/99 [01:25<00:00,  1.15it/s, epoch=3/14, avg_loss=284]
100%|██████████| 99/99 [01:25<00:00,  1.16it/s, epoch=4/14, avg_loss=278]
100%|██████████| 99/99 [01:25<00:00,  1.16it/s, epoch=5/14, avg_loss=278]
100%|██████████| 99/99 [01:25<00:00,  1.16it/s, epoch=6/14, avg_loss=270]
100%|██████████| 99/99 [01:27<00:00,  1.13it/s, epoch=7/14, avg_loss=266]
100%|██████████| 99/99 [01:26<00:00,  1.14it/s, epoch=8/14, avg_loss=263]
100%|██████████| 99/99 [01:25<00:00,  1.15it/s, epoch=9/14, avg_loss=261]
100%|██████████| 99/99 [01:26<00:00,  1.14it/s, epoch=10/14, avg_loss=258]
100%|██████████| 99/99 [01:27<00:00,  1.13it/s, epoch=11/14, avg_loss=257]
100%|██████████| 99/99 [01:27<00:00,  1.14it/s, epoch=12/14, avg_loss=256]
100%|██████████| 99/99 [01:27<00:00,  1.14it/s, epoch=13/14, avg_loss=255]
100%|██████████| 99/99 [01:25<00:0


### NaNs probability = 0.6 ###



100%|██████████| 99/99 [01:25<00:00,  1.16it/s, epoch=1/14, avg_loss=310]
100%|██████████| 99/99 [01:27<00:00,  1.13it/s, epoch=2/14, avg_loss=281]
100%|██████████| 99/99 [01:27<00:00,  1.13it/s, epoch=3/14, avg_loss=275]
100%|██████████| 99/99 [01:26<00:00,  1.14it/s, epoch=4/14, avg_loss=268]
100%|██████████| 99/99 [01:26<00:00,  1.14it/s, epoch=5/14, avg_loss=261]
100%|██████████| 99/99 [01:26<00:00,  1.14it/s, epoch=6/14, avg_loss=257]
100%|██████████| 99/99 [01:26<00:00,  1.14it/s, epoch=7/14, avg_loss=252]
100%|██████████| 99/99 [01:27<00:00,  1.13it/s, epoch=8/14, avg_loss=248]
100%|██████████| 99/99 [01:27<00:00,  1.14it/s, epoch=9/14, avg_loss=245]
100%|██████████| 99/99 [01:27<00:00,  1.14it/s, epoch=10/14, avg_loss=242]
100%|██████████| 99/99 [01:26<00:00,  1.14it/s, epoch=11/14, avg_loss=239]
100%|██████████| 99/99 [01:27<00:00,  1.14it/s, epoch=12/14, avg_loss=238]
100%|██████████| 99/99 [01:26<00:00,  1.14it/s, epoch=13/14, avg_loss=236]
100%|██████████| 99/99 [01:27<00:0


### NaNs probability = 0.7000000000000001 ###



100%|██████████| 99/99 [01:22<00:00,  1.21it/s, epoch=1/14, avg_loss=282]
100%|██████████| 99/99 [01:24<00:00,  1.18it/s, epoch=2/14, avg_loss=256]
100%|██████████| 99/99 [01:25<00:00,  1.15it/s, epoch=3/14, avg_loss=248]
100%|██████████| 99/99 [01:25<00:00,  1.16it/s, epoch=4/14, avg_loss=241]
100%|██████████| 99/99 [01:24<00:00,  1.17it/s, epoch=5/14, avg_loss=232]
100%|██████████| 99/99 [01:25<00:00,  1.16it/s, epoch=6/14, avg_loss=227]
100%|██████████| 99/99 [01:26<00:00,  1.15it/s, epoch=7/14, avg_loss=219]
100%|██████████| 99/99 [01:25<00:00,  1.15it/s, epoch=8/14, avg_loss=214]
100%|██████████| 99/99 [01:25<00:00,  1.15it/s, epoch=9/14, avg_loss=209]
100%|██████████| 99/99 [01:26<00:00,  1.15it/s, epoch=10/14, avg_loss=204]
100%|██████████| 99/99 [01:25<00:00,  1.15it/s, epoch=11/14, avg_loss=200]
100%|██████████| 99/99 [01:26<00:00,  1.15it/s, epoch=12/14, avg_loss=197]
100%|██████████| 99/99 [01:26<00:00,  1.15it/s, epoch=13/14, avg_loss=195]
100%|██████████| 99/99 [01:24<00:0


### NaNs probability = 0.8 ###



100%|██████████| 99/99 [01:23<00:00,  1.19it/s, epoch=1/14, avg_loss=237]
100%|██████████| 99/99 [01:24<00:00,  1.17it/s, epoch=2/14, avg_loss=207]
100%|██████████| 99/99 [01:26<00:00,  1.15it/s, epoch=3/14, avg_loss=193]
100%|██████████| 99/99 [01:24<00:00,  1.17it/s, epoch=4/14, avg_loss=179]
100%|██████████| 99/99 [01:25<00:00,  1.16it/s, epoch=5/14, avg_loss=165]
100%|██████████| 99/99 [01:24<00:00,  1.17it/s, epoch=6/14, avg_loss=150]
100%|██████████| 99/99 [01:25<00:00,  1.16it/s, epoch=7/14, avg_loss=154]
100%|██████████| 99/99 [01:27<00:00,  1.13it/s, epoch=8/14, avg_loss=139]
100%|██████████| 99/99 [01:25<00:00,  1.16it/s, epoch=9/14, avg_loss=126]
100%|██████████| 99/99 [01:26<00:00,  1.14it/s, epoch=10/14, avg_loss=116]
100%|██████████| 99/99 [01:37<00:00,  1.01it/s, epoch=11/14, avg_loss=108]
100%|██████████| 99/99 [01:29<00:00,  1.11it/s, epoch=12/14, avg_loss=101]
100%|██████████| 99/99 [01:26<00:00,  1.14it/s, epoch=13/14, avg_loss=96.1]
100%|██████████| 99/99 [01:25<00:

CPU times: user 1h 31min 51s, sys: 42min 45s, total: 2h 14min 36s
Wall time: 2h 34min 37s


#### Predict

In [30]:
# in electricity dataset test size is less than train size 
# so we will take test size - prediction length indeces range to nan values

train_size = electricity_dataset_train[0]['target'].shape[1]
test_size = electricity_dataset_test[0]['target'].shape[1]
test_size = test_size - electricity.metadata.prediction_length

n_series = electricity_dataset_train[0]['target'].shape[0]

for i in np.linspace(0.1, 0.8, 8):
    print(f"\n### NaNs probability = {i} ###\n")

    nans_prob = np.round(i, 1)

    predictor = torch.load(os.path.join(MODELS_FOLDER, f"estimator__electricity_{nans_prob}.ckpt"))

    test = copy.copy(electricity_dataset_test)
    
    for j in range(n_series):

        # nan test values
        for k in range(len(test)):
            idxs = np.random.choice(range(0, test_size), size=int(nans_prob * test_size), replace=False)

            test[k]['target'][j, idxs] = np.nan
    
    success = 0
    num_samples = 8

    while success != 1 and num_samples != 100:
        try:
            # evaluate model
            evaluator = MultivariateEvaluator(
                quantiles=(np.arange(20)/20.0)[1:],
                target_agg_funcs={'sum': np.sum}
            )

            forecast_it, ts_it = make_evaluation_predictions(
                dataset=test,
                predictor=predictor,
                num_samples=num_samples
            )

            forecasts = list(forecast_it)
            targets = list(ts_it)

            agg_metric, _ = evaluator(targets, forecasts, num_series=len(test))

            # save metrics
            with open(os.path.join(PREDICTIONS_FOLDER, f"agg_metrics__electricity_{nans_prob}.json"), 'w') as fp:
                json.dump(agg_metric, fp)
            
            # save forecasts
            with open(os.path.join(PREDICTIONS_FOLDER, f"targets__electricity_{nans_prob}.json"), 'wb') as handle:
                pickle.dump(targets, handle, protocol=pickle.HIGHEST_PROTOCOL)
                
            with open(os.path.join(PREDICTIONS_FOLDER, f"forecasts__electricity_{nans_prob}.json"), 'wb') as handle:
                pickle.dump(forecasts, handle, protocol=pickle.HIGHEST_PROTOCOL)

            success = 1

        except:
            num_samples += 1

    if num_samples == 100 and success == 0:
        print(print(f"\n### NaNs probability = {i} NOT SUCCESS ###\n"))
        
    electricity_dataset_test = electricity_test_grouper(electricity.test)


### NaNs probability = 0.3 ###



Running evaluation: 6it [00:00, 251.81it/s]
Running evaluation: 5it [00:00, 255.12it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 7it [00:00, 258.15it/s]
Running evaluation: 7it [00:00, 260.99it/s]
Running evaluation: 7it [00:00, 255.94it/s]
Running evaluation: 7it [00:00, 260.12it/s]
Running evaluation: 7it [00:00, 266.13it/s]
Running evaluation: 7it [00:00, 259.24it/s]
Running evaluation: 7it [00:00, 264.90it/s]
Running evaluation: 7it [00:00, 272.58it/s]
Running evaluation: 7it [00:00, 258.20it/s]
Running evaluation: 7it [00:00, 239.85it/s]
Running evaluation: 7it [00:00, 254.47it/s]
Running evaluation: 7it [00:00, 260.20it/s]
Running evaluation: 7it [00:00, 265.48it/s]
Running evaluation: 7it [00:00, 268.64it/s]
Running evaluation: 7it [00:00, 269.07it/s]
Running evaluation: 7it [00:00, 265.37it/s]
Running evaluation: 7it [00:00, 268.58it/s]
Running evaluation: 7it [00:00, 266.43it/s]
Running evaluation: 7it [00:00, 267.49it/s]
Running evaluation: 7it [00:00, 268.2


### NaNs probability = 0.4 ###



Running evaluation: 4it [00:00, 239.21it/s]
Running evaluation: 7it [00:00, 263.57it/s]
Running evaluation: 7it [00:00, 236.48it/s]
Running evaluation: 7it [00:00, 236.48it/s]
Running evaluation: 7it [00:00, 227.71it/s]
Running evaluation: 7it [00:00, 241.51it/s]
Running evaluation: 7it [00:00, 249.08it/s]
Running evaluation: 7it [00:00, 239.82it/s]
Running evaluation: 7it [00:00, 246.34it/s]
Running evaluation: 7it [00:00, 216.93it/s]
Running evaluation: 7it [00:00, 222.38it/s]
Running evaluation: 7it [00:00, 228.60it/s]
Running evaluation: 7it [00:00, 232.38it/s]
Running evaluation: 7it [00:00, 233.11it/s]
Running evaluation: 7it [00:00, 232.99it/s]
Running evaluation: 7it [00:00, 243.54it/s]
Running evaluation: 7it [00:00, 238.15it/s]
Running evaluation: 7it [00:00, 240.92it/s]
Running evaluation: 7it [00:00, 234.66it/s]
Running evaluation: 7it [00:00, 253.82it/s]
Running evaluation: 7it [00:00, 245.36it/s]
Running evaluation: 7it [00:00, 250.98it/s]
Running evaluation: 7it [00:00, 


### NaNs probability = 0.5 ###



Running evaluation: 7it [00:00, 264.91it/s]
Running evaluation: 7it [00:00, 236.18it/s]
Running evaluation: 7it [00:00, 249.63it/s]
Running evaluation: 7it [00:00, 257.12it/s]
Running evaluation: 7it [00:00, 266.00it/s]
Running evaluation: 7it [00:00, 251.59it/s]
Running evaluation: 7it [00:00, 247.75it/s]
Running evaluation: 7it [00:00, 261.63it/s]
Running evaluation: 7it [00:00, 239.09it/s]
Running evaluation: 7it [00:00, 246.06it/s]
Running evaluation: 7it [00:00, 251.83it/s]
Running evaluation: 7it [00:00, 253.54it/s]
Running evaluation: 7it [00:00, 252.79it/s]
Running evaluation: 7it [00:00, 238.85it/s]
Running evaluation: 7it [00:00, 254.53it/s]
Running evaluation: 7it [00:00, 259.48it/s]
Running evaluation: 7it [00:00, 261.03it/s]
Running evaluation: 7it [00:00, 246.71it/s]
Running evaluation: 7it [00:00, 262.38it/s]
Running evaluation: 7it [00:00, 253.10it/s]
Running evaluation: 7it [00:00, 248.66it/s]
Running evaluation: 7it [00:00, 267.88it/s]
Running evaluation: 7it [00:00, 


### NaNs probability = 0.6000000000000001 ###



Running evaluation: 2it [00:00, 234.27it/s]
Running evaluation: 7it [00:00, 259.79it/s]
Running evaluation: 7it [00:00, 255.29it/s]
Running evaluation: 7it [00:00, 251.85it/s]
Running evaluation: 7it [00:00, 257.18it/s]
Running evaluation: 7it [00:00, 249.90it/s]
Running evaluation: 7it [00:00, 253.65it/s]
Running evaluation: 7it [00:00, 248.52it/s]
Running evaluation: 7it [00:00, 259.70it/s]
Running evaluation: 7it [00:00, 232.03it/s]
Running evaluation: 7it [00:00, 203.60it/s]
Running evaluation: 7it [00:00, 190.61it/s]
Running evaluation: 7it [00:00, 245.23it/s]
Running evaluation: 7it [00:00, 249.27it/s]
Running evaluation: 7it [00:00, 260.38it/s]
Running evaluation: 7it [00:00, 262.01it/s]
Running evaluation: 7it [00:00, 253.54it/s]
Running evaluation: 7it [00:00, 251.18it/s]
Running evaluation: 7it [00:00, 258.37it/s]
Running evaluation: 7it [00:00, 253.93it/s]
Running evaluation: 7it [00:00, 264.41it/s]
Running evaluation: 7it [00:00, 261.33it/s]
Running evaluation: 7it [00:00, 


### NaNs probability = 0.7 ###



Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 1it [00:00, 221.63it/s]
Running evaluation: 1it [00:00, 205.04it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 1it [00:00, 211.42it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 1it [00:00, 198.89it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Runni


### NaNs probability = 0.7 NOT SUCCESS ###

None

### NaNs probability = 0.8 ###



Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [00:00, ?it/s]
Running evaluation: 0it [


### NaNs probability = 0.8 NOT SUCCESS ###

None


### Echange

In [91]:
exchange_freq = 'B'

#### Train

In [94]:
%%time

train_size = exchange_dataset_train[0]['target'].shape[1]
n_series = exchange_dataset_train[0]['target'].shape[0]

for i in np.linspace(0.1, 0.8, 8):
    print(f"\n### NaNs probability = {i} ###\n")
    
    train = copy.copy(exchange_dataset_train)

    nans_prob = np.round(i, 1)

    for j in range(n_series):

        # take random indices
        idxs = np.random.choice(range(0, train_size), size=int(nans_prob * train_size), replace=False)

        # nan train values
        train[0]['target'][j, idxs] = np.nan
    
    train = list(train) * 100

    # train
    estimator = TransformerTempFlowEstimator(
        input_size=28,
        target_dim=int(exchange.metadata.feat_static_cat[0].cardinality),
        prediction_length=exchange.metadata.prediction_length,
        context_length=exchange.metadata.prediction_length * 8,
        flow_type='MAF',
        dequantize=True,
        freq=exchange_freq,
        scaling=False,
        trainer=Trainer(
            device='cpu',
            epochs=14, 
            learning_rate=1e-3,
            num_batches_per_epoch=100,
            batch_size=64,
        )
    )

    predictor = estimator.train(train, num_workers=4)

    # save model
    torch.save(predictor, os.path.join(MODELS_FOLDER, f"estimator__exchange_{nans_prob}.ckpt"))

    exchange_dataset_train = exchange_train_grouper(exchange.train)


### NaNs probability = 0.1 ###



  self = reduction.pickle.load(from_parent)
  self = reduction.pickle.load(from_parent)
  self = reduction.pickle.load(from_parent)
  self = reduction.pickle.load(from_parent)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  entry[self.start_field] + idx + self.lead_time
  entry[self.start_field] + idx + self.lead_time
  entry[self.start_field] + idx + self.lead_time
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  entry[self.start_field] + idx + self.lead_time
  self = reduction.pickle.load(from_parent)
  self = reduction.pickle.load(from_parent)
  self = red


### NaNs probability = 0.2 ###



  self = reduction.pickle.load(from_parent)
  self = reduction.pickle.load(from_parent)
  self = reduction.pickle.load(from_parent)
  self = reduction.pickle.load(from_parent)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  entry[self.start_field] + idx + self.lead_time
  entry[self.start_field] + idx + self.lead_time
  entry[self.start_field] + idx + self.lead_time
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  entry[self.start_field] + idx + self.lead_time
  self = reduction.pickle.load(from_parent)
  self = reduction.pickle.load(from_parent)
  self = red


### NaNs probability = 0.30000000000000004 ###



  self = reduction.pickle.load(from_parent)
  self = reduction.pickle.load(from_parent)
  self = reduction.pickle.load(from_parent)
  self = reduction.pickle.load(from_parent)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  entry[self.start_field] + idx + self.lead_time
  entry[self.start_field] + idx + self.lead_time
  entry[self.start_field] + idx + self.lead_time
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  entry[self.start_field] + idx + self.lead_time
  self = reduction.pickle.load(from_parent)
  self = reduction.pickle.load(from_parent)
  self = red


### NaNs probability = 0.4 ###



  self = reduction.pickle.load(from_parent)
  self = reduction.pickle.load(from_parent)
  self = reduction.pickle.load(from_parent)
  self = reduction.pickle.load(from_parent)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  entry[self.start_field] + idx + self.lead_time
  entry[self.start_field] + idx + self.lead_time
  entry[self.start_field] + idx + self.lead_time
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  entry[self.start_field] + idx + self.lead_time
  self = reduction.pickle.load(from_parent)
  self = reduction.pickle.load(from_parent)
  self = red


### NaNs probability = 0.5 ###



  self = reduction.pickle.load(from_parent)
  self = reduction.pickle.load(from_parent)
  self = reduction.pickle.load(from_parent)
  self = reduction.pickle.load(from_parent)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  entry[self.start_field] + idx + self.lead_time
  entry[self.start_field] + idx + self.lead_time
  entry[self.start_field] + idx + self.lead_time
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  entry[self.start_field] + idx + self.lead_time
  self = reduction.pickle.load(from_parent)
  self = reduction.pickle.load(from_parent)
  self = red


### NaNs probability = 0.6 ###



  self = reduction.pickle.load(from_parent)
  self = reduction.pickle.load(from_parent)
  self = reduction.pickle.load(from_parent)
  self = reduction.pickle.load(from_parent)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  entry[self.start_field] + idx + self.lead_time
  entry[self.start_field] + idx + self.lead_time
  entry[self.start_field] + idx + self.lead_time
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  entry[self.start_field] + idx + self.lead_time
  self = reduction.pickle.load(from_parent)
  self = reduction.pickle.load(from_parent)
  self = red


### NaNs probability = 0.7000000000000001 ###



  self = reduction.pickle.load(from_parent)
  self = reduction.pickle.load(from_parent)
  self = reduction.pickle.load(from_parent)
  self = reduction.pickle.load(from_parent)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  entry[self.start_field] + idx + self.lead_time
  entry[self.start_field] + idx + self.lead_time
  entry[self.start_field] + idx + self.lead_time
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  entry[self.start_field] + idx + self.lead_time
  self = reduction.pickle.load(from_parent)
  self = reduction.pickle.load(from_parent)
  self = red


### NaNs probability = 0.8 ###



  self = reduction.pickle.load(from_parent)
  self = reduction.pickle.load(from_parent)
  self = reduction.pickle.load(from_parent)
  self = reduction.pickle.load(from_parent)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  entry[self.start_field] + idx + self.lead_time
  entry[self.start_field] + idx + self.lead_time
  entry[self.start_field] + idx + self.lead_time
  index = pd.period_range(start, periods=length, freq=start.freq)
  index = pd.period_range(start, periods=length, freq=start.freq)
  entry[self.start_field] + idx + self.lead_time
  self = reduction.pickle.load(from_parent)
  self = reduction.pickle.load(from_parent)
  self = red

CPU times: user 4h 11min 28s, sys: 1h 11min 3s, total: 5h 22min 31s
Wall time: 4h 3min 56s


#### Predict

In [30]:
n_series = exchange_dataset_train[0]['target'].shape[0]
train_size = exchange_dataset_train[0]['target'].shape[1]

for i in np.linspace(0.1, 0.8, 8):
    print(f"\n### NaNs probability = {i} ###\n")

    nans_prob = np.round(i, 1)

    predictor = torch.load(os.path.join(MODELS_FOLDER, f"estimator__exchange_{nans_prob}.ckpt"))

    test = copy.copy(exchange_dataset_test)
    
    for j in range(n_series):

        # take random indices
        idxs = np.random.choice(range(0, train_size), size=int(nans_prob * train_size), replace=False)

        # nan test values
        for k in range(len(test)):
            test[k]['target'][j, idxs] = np.nan
    
    # evaluate model
    evaluator = MultivariateEvaluator(
        quantiles=(np.arange(20)/20.0)[1:],
        target_agg_funcs={'sum': np.sum}
    )

    # for num_preds in range(10, 101)[::-1]:
    forecast_it, ts_it = make_evaluation_predictions(
        dataset=test,
        predictor=predictor,
        num_samples=20
    )

    forecasts = list(forecast_it)
    targets = list(ts_it)

    agg_metric, _ = evaluator(targets, forecasts, num_series=len(test))

    # save metrics
    with open(os.path.join(PREDICTIONS_FOLDER, f"agg_metrics__exchange_{nans_prob}.json"), 'w') as fp:
        json.dump(agg_metric, fp)
    
    # save forecasts
    with open(os.path.join(PREDICTIONS_FOLDER, f"targets__exchange_{nans_prob}.json"), 'wb') as handle:
        pickle.dump(targets, handle, protocol=pickle.HIGHEST_PROTOCOL)
        
    with open(os.path.join(PREDICTIONS_FOLDER, f"forecasts__exchange_{nans_prob}.json"), 'wb') as handle:
        pickle.dump(forecasts, handle, protocol=pickle.HIGHEST_PROTOCOL)

    exchange_dataset_test = exchange_test_grouper(exchange.test)


### NaNs probability = 0.1 ###



Running evaluation: 5it [00:00, 205.86it/s]
Running evaluation: 5it [00:00, 236.76it/s]
Running evaluation: 5it [00:00, 207.34it/s]
Running evaluation: 5it [00:00, 221.94it/s]
Running evaluation: 5it [00:00, 215.38it/s]
Running evaluation: 5it [00:00, 219.60it/s]
Running evaluation: 5it [00:00, 237.35it/s]
Running evaluation: 5it [00:00, 208.28it/s]
Running evaluation: 5it [00:00, 173.32it/s]



### NaNs probability = 0.2 ###



Running evaluation: 5it [00:00, 239.27it/s]
Running evaluation: 5it [00:00, 216.36it/s]
Running evaluation: 5it [00:00, 232.15it/s]
Running evaluation: 5it [00:00, 227.77it/s]
Running evaluation: 5it [00:00, 239.40it/s]
Running evaluation: 5it [00:00, 216.23it/s]
Running evaluation: 5it [00:00, 228.46it/s]
Running evaluation: 5it [00:00, 221.73it/s]
Running evaluation: 5it [00:00, 213.03it/s]



### NaNs probability = 0.30000000000000004 ###



Running evaluation: 5it [00:00, 248.94it/s]
Running evaluation: 5it [00:00, 221.96it/s]
Running evaluation: 5it [00:00, 240.63it/s]
Running evaluation: 5it [00:00, 229.58it/s]
Running evaluation: 5it [00:00, 243.09it/s]
Running evaluation: 5it [00:00, 232.98it/s]
Running evaluation: 5it [00:00, 223.28it/s]
Running evaluation: 5it [00:00, 244.59it/s]
Running evaluation: 5it [00:00, 216.39it/s]



### NaNs probability = 0.4 ###



Running evaluation: 5it [00:00, 255.06it/s]
Running evaluation: 5it [00:00, 213.20it/s]
Running evaluation: 5it [00:00, 229.69it/s]
Running evaluation: 5it [00:00, 214.28it/s]
Running evaluation: 5it [00:00, 184.18it/s]
Running evaluation: 5it [00:00, 236.79it/s]
Running evaluation: 5it [00:00, 238.57it/s]
Running evaluation: 5it [00:00, 229.89it/s]
Running evaluation: 5it [00:00, 209.10it/s]



### NaNs probability = 0.5 ###



Running evaluation: 5it [00:00, 251.23it/s]
Running evaluation: 5it [00:00, 230.23it/s]
Running evaluation: 5it [00:00, 226.55it/s]
Running evaluation: 5it [00:00, 179.15it/s]
Running evaluation: 5it [00:00, 246.35it/s]
Running evaluation: 5it [00:00, 230.62it/s]
Running evaluation: 5it [00:00, 237.85it/s]
Running evaluation: 5it [00:00, 242.25it/s]
Running evaluation: 5it [00:00, 182.82it/s]



### NaNs probability = 0.6 ###



Running evaluation: 5it [00:00, 261.70it/s]
Running evaluation: 5it [00:00, 230.87it/s]
Running evaluation: 5it [00:00, 231.56it/s]
Running evaluation: 5it [00:00, 236.21it/s]
Running evaluation: 5it [00:00, 235.49it/s]
Running evaluation: 5it [00:00, 224.67it/s]
Running evaluation: 5it [00:00, 242.45it/s]
Running evaluation: 5it [00:00, 237.41it/s]
Running evaluation: 5it [00:00, 184.73it/s]



### NaNs probability = 0.7000000000000001 ###



Running evaluation: 5it [00:00, 251.70it/s]
Running evaluation: 5it [00:00, 229.07it/s]
Running evaluation: 5it [00:00, 230.94it/s]
Running evaluation: 5it [00:00, 238.53it/s]
Running evaluation: 5it [00:00, 235.99it/s]
Running evaluation: 5it [00:00, 240.03it/s]
Running evaluation: 5it [00:00, 221.16it/s]
Running evaluation: 5it [00:00, 240.25it/s]
Running evaluation: 5it [00:00, 195.75it/s]



### NaNs probability = 0.8 ###



Running evaluation: 5it [00:00, 245.83it/s]
Running evaluation: 5it [00:00, 229.19it/s]
Running evaluation: 5it [00:00, 234.69it/s]
Running evaluation: 5it [00:00, 242.84it/s]
Running evaluation: 5it [00:00, 237.74it/s]
Running evaluation: 5it [00:00, 227.10it/s]
Running evaluation: 5it [00:00, 239.12it/s]
Running evaluation: 5it [00:00, 232.10it/s]
Running evaluation: 5it [00:00, 184.26it/s]
