In [None]:
import wandb
import pandas as pd
import numpy as np
from tqdm import tqdm
import sys
import os
sys.path.append(os.path.dirname(os.getcwd()))
from utils.load_model import load_model
import torch
from data_provider.data_factory import data_provider
from scipy.stats import iqr

In [None]:
experiment_tags = ['e29_icml_constrained_newmodels']

In [None]:
def cvar(errors, alpha=0.05):
    return np.mean(errors[errors >= np.quantile(errors, 1-alpha)])



In [None]:
def run_test(model, config, test_loader):
    preds = []
    trues = []

    model.eval()
    with torch.no_grad():
        for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader):
            batch_x = batch_x.float().to("cuda")
            batch_y = batch_y.float().to("cuda")

            batch_x_mark = batch_x_mark.float().to("cuda")
            batch_y_mark = batch_y_mark.float().to("cuda")

            # decoder input
            dec_inp = torch.zeros_like(batch_y[:, -config.pred_len:, :]).float()
            dec_inp = torch.cat([batch_y[:, :config.label_len, :], dec_inp], dim=1).float().to("cuda")
            
            # encoder - decoder
            if 'Linear' in config.model or 'TST' in config.model:
                        outputs = model(batch_x)
            else:
                if config.output_attention:
                    outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]

                else:
                    outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)

            f_dim = -1 if config.features == 'MS' else 0
            outputs = outputs[:, -config.pred_len:, f_dim:]
            batch_y = batch_y[:, -config.pred_len:, f_dim:].to("cuda")
            outputs = outputs.detach().cpu().numpy()
            batch_y = batch_y.detach().cpu().numpy()

            pred = outputs
            true = batch_y

            preds.append(pred)
            trues.append(true)

    preds = np.concatenate(preds, axis=0)
    trues = np.concatenate(trues, axis=0)
    #print('test shape:', preds.shape, trues.shape)
    preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1])
    trues = trues.reshape(-1, trues.shape[-2], trues.shape[-1])
    #print('test shape:', preds.shape, trues.shape)

    return trues, preds


In [None]:
def run_eval(run):
    model, config = load_model(run.entity, run.project,run.id)
    _, test_loader = data_provider(config, 'test')
    model.to("cuda")
    trues, preds = run_test(model, config, test_loader)
    errors = np.mean((trues - preds)**2, axis=(0, 2))
    mae = np.mean(np.abs(trues - preds), axis=(0, 2))
    metrics = {}
    for alpha in [0.01, 0.05, 0.1]:
        metrics[f'pointwise/cvar/{alpha}'] = cvar(errors, alpha)
        metrics[f'pointwise/quantile/{1-alpha}'] = np.quantile(errors, 1-alpha)
    metrics[f'pointwise/std'] = np.std(errors)
    metrics[f'pointwise/max'] = np.max(errors)
    metrics[f'pointwise/iqr'] = iqr(errors)
    metrics[f'mae/test/mean'] = np.mean(mae)
    metrics[f'mae/test/std'] = np.std(mae)
    return metrics

In [None]:
workspace = "alelab"
project = "Autoformer"
 #notebooks/cvar.ipynb
#'e20_icml_monotonic',
#'e20_icml_monotonic_no_resilience',
#'e21_icml_static_linear_no_resilience',
#'e21_icml_static_linear_resilience',]

state = "Finished"
query_dict={"$and": [
                {"tags": {"$in": experiment_tags}},
                {"state": state}
]}

api = wandb.Api(timeout=10000)
# get all runs that both: 1.  match any experiment tag and 2. are finished
runs = api.runs(f"{workspace}/{project}",query_dict)
print(f"Total runs: {len(runs)}")

In [None]:
#failed_runs = []
#for run in tqdm(runs):
    #metrics = run_eval(run)

In [None]:
failed_runs = []
for run in tqdm(runs):
    try:
        metrics = run_eval(run)
        wandb.init(id=run.id, entity=workspace, project=project, resume="must")
        wandb.log(metrics)
        wandb.finish()
    except:
        failed_runs.append(run.id)
        print(f'Failed run {run.id}')

In [None]:
for run in failed_runs:
    print(run)