# Test

Test models on the [2014, 2016] set

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
np.random.seed(1234)

import sys
sys.path.append("../src")
from utils import data as udata
from utils import dists as udists
from utils import misc as u
from tqdm import tqdm, trange
import os.path as path
import json
import stringcase
import models
import inspect

In [2]:
EXP_DIR = "../data/processed/cdc-flusight-ensemble/"
INPUT_DIR = "../models/cdc-flusight-ensemble/"
TARGET = "1-ahead"
# We use all the regions as of now
REGION = None
# We have training data from the set [2010, 2016]. From these, we use
# [2010, 2013] for training and CV.
TESTING_SEASONS = list(range(2014, 2017))

In [4]:
components = [udata.Component(EXP_DIR, m) for m in udata.available_models(EXP_DIR)]
ad = udata.ActualData(EXP_DIR)

## Evaluation

We evaluate the models on the left out season and take mean across all such evaluations.

In [5]:
def evaluate_season(model, season):
    """
    Return score and model predictions for given season
    """
    
    yi, Xs, y = udata.get_seasons_data(ad, components, [season], TARGET, REGION)
    predictions = np.zeros_like(Xs[0])

    for i in range(len(yi)):
        # HACK: Check if this is an oracle
        # This should ideally go in as a flag in the model
        if "truth" in inspect.signature(model.predict).parameters:
            # This is an oracle
            predictions[i, :] = model.predict(yi.iloc[i], [X[i] for X in Xs], y[i])
        else:
            predictions[i, :] = model.predict(yi.iloc[i], [X[i] for X in Xs]) 
        # Pass in feedback if model accepts it
        try:
            model.feedback(y[i])
        except NotImplementedError:
            pass

    score = np.log(udists.prediction_probabilities([predictions], y, TARGET)).mean()
    return score, predictions

def evaluate(model, post_training_hook=None):
    """
    Evaluate on the testing seasons
    """
    
    scores = []
    for season in tqdm(TESTING_SEASONS):
        # Need to reset the model before every evaluation
        # TODO: This should be done even when not using a hook
        if post_training_hook:
            model = post_training_hook(model)
        score, _= evaluate_season(model, season)
        scores.append(score)

    return np.mean(scores)

def load_model(model):
    """
    Load weights from saved
    """
    
    model_file_name = f"{stringcase.spinalcase(type(model).__name__)}.json"
    model.load(path.join(INPUT_DIR, TARGET, model_file_name))
    return model

# Cross validation

## Oracle model
This is the oracle model which sets the upper limit for prediction

In [6]:
evaluate(models.OracleEnsemble(TARGET, len(components)))

100%|██████████| 3/3 [00:26<00:00,  8.89s/it]


-1.8314510639294843

## Mean ensemble

In [14]:
evaluate(models.MeanEnsemble(TARGET, len(components)))

100%|██████████| 3/3 [00:00<00:00,  5.41it/s]


-2.782174668037595

## Degenerate EM

In [15]:
m = load_model(models.DemWeightEnsemble(TARGET, len(components)))
evaluate(m)

100%|██████████| 3/3 [00:00<00:00,  6.09it/s]


-2.618871198618432

## K-partition Degenerate EM

In [16]:
m = load_model(models.KDemWeightEnsemble(TARGET, len(components), None))
evaluate(m)

100%|██████████| 3/3 [00:00<00:00,  4.88it/s]


-2.618871198618432

## Hit weight ensemble

In [17]:
m = load_model(models.HitWeightEnsemble(TARGET, len(components), None))
evaluate(m)

100%|██████████| 3/3 [00:00<00:00,  6.26it/s]


-2.6544628191807207

## Score weight ensemble

In [20]:
m = load_model(models.ScoreWeightEnsemble(TARGET, len(components), None))
evaluate(m)

100%|██████████| 3/3 [00:00<00:00,  5.15it/s]


-2.6252054613192026

## Multiplicative weight

In [24]:
# Define a post training hook
def pth(model):
    # Read initial weights from DEM
    with open(path.join(INPUT_DIR, TARGET, "dem-weight-ensemble.json")) as fp:
        model._weights = json.load(fp)["fit_params"]["weights"]
    return model

def pth_reset(model):
    model._weights = np.ones((model.n_comps,)) / model.n_comps
    return model

m = load_model(models.MPWeightEnsemble(TARGET, len(components), None))
evaluate(m, pth)

100%|██████████| 3/3 [00:01<00:00,  2.36it/s]


-2.6181334017298283