# Train (multibin)

This notebook explores weighing over the multibin loss function.

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
np.random.seed(1234)

import sys
sys.path.append("../src")
from utils import data as udata
from utils import dists as udists
from utils import misc as u
from truth import mask_truths, impute, FILL_SPACE
from predictors import make_predictor
import losses
from pymmwr import Epiweek
from tqdm import tqdm, trange
import ledge.merge as merge
import ledge.update as update
import ledge.fill as fill
from functools import partial
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
import hyperopt.pyll.stochastic as st
import json
import os.path as path
import inspect

In [None]:
EXP_DIR = "../data/processed/cdc-flusight-ensemble/"
OUTPUT_DIR = "../models/cdc-flusight-ensemble/"
TARGET = "1-ahead"
REGION = "nat"
MAX_LAG = 29
TRAINING_SEASONS = list(range(2010, 2014))
LOSS_FN = partial(losses.logloss, multibin=True)
MERGE_FN = merge.latest

In [None]:
components = [udata.Component(EXP_DIR, m) for m in udata.available_models(EXP_DIR)]
ad = udata.ActualData(EXP_DIR)

# Evaluation

In [None]:
def evaluate(predictor, loss_fn):
    """
    Evaluate the predictor over all training seasons and regions and return mean score
    """
    
    first_losses = []
    final_losses = []
    
    with tqdm(total=len(TRAINING_SEASONS)) as pbar:
        for season in TRAINING_SEASONS:
            truths = [ad.get(TARGET, REGION, season, lag=l) for l in range(MAX_LAG + 1)]
            c_preds = [cmp.get(TARGET, REGION, season) for cmp in components]
            
            first_truth = merge.earliest(truths)
            final_truth = merge.latest(truths)
            pred, _ = predictor(truths, c_preds)
            first_losses.append(float(loss_fn(pred, first_truth).mean()))
            final_losses.append(float(loss_fn(pred, final_truth).mean()))
            pbar.update()

    return {
        "first_loss": np.mean(first_losses),
        "final_loss": np.mean(final_losses)
    }

## Follow the leader

In [None]:
def ftlfn(cfg):
    update_fn = partial(update.ftl, k=cfg["k"], lookback=cfg["lookback"])
    l = evaluate(make_predictor(LOSS_FN, MERGE_FN, update.ftl), LOSS_FN)
    return {
        "loss": l["final_loss"],
        "status": STATUS_OK,
        "first_loss": l["first_loss"],
        "config": cfg
    }

trials = Trials()
space = {
    "k": 1 + hp.randint("k", 20),
    "lookback": 1 + hp.randint("lookback", 33)
}
best = fmin(ftlfn, space=space, algo=tpe.suggest, max_evals=20, trials=trials)
trials.best_trial["result"]

In [None]:
ofile = path.join(u.ensure_dir(path.join(OUTPUT_DIR, TARGET, REGION)), "ftl.json")
with open(ofile, "w") as fp:
    json.dump(trials.best_trial["result"], fp)