# Test

Test models on the live season.

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
np.random.seed(1234)

import sys
sys.path.append("../src")
from utils import data as udata
from utils import dists as udists
from utils import misc as u
from tqdm import tqdm, trange
import os.path as path
import json
import stringcase
import models
import inspect

In [11]:
EXP_DIR = "../data/processed/cdc-flusight-ensemble-live/"
INPUT_DIR = "../models/cdc-flusight-ensemble/"
TARGET = "1-ahead"
# We use all the regions as of now
REGION = None

In [12]:
components = [udata.Component(EXP_DIR, m) for m in udata.available_models(EXP_DIR)]
ad = udata.ActualData(EXP_DIR)

## Evaluation

In [17]:
def _evaluate(model):
    """
    Return score and model predictions
    """
    
    yi, Xs, y = udata.get_seasons_data(ad, components, [None], TARGET, REGION)
    predictions = np.zeros_like(Xs[0])

    for i in range(len(yi)):
        # HACK: Check if this is an oracle
        # This should ideally go in as a flag in the model
        if "truth" in inspect.signature(model.predict).parameters:
            # This is an oracle
            predictions[i, :] = model.predict(yi.iloc[i], [X[i] for X in Xs], y[i])
        else:
            predictions[i, :] = model.predict(yi.iloc[i], [X[i] for X in Xs]) 
        # Pass in feedback if model accepts it
        try:
            model.feedback(y[i])
        except NotImplementedError:
            pass

    score = np.log(udists.prediction_probabilities([predictions], y, TARGET)).mean()
    return score, predictions

def evaluate(model, post_training_hook=None):
    """
    Evaluate on the testing seasons
    """
    
    # Need to reset the model before every evaluation
    # TODO: This should be done even when not using a hook
    if post_training_hook:
        model = post_training_hook(model)
    score, _= _evaluate(model)
    return score

def load_model(model):
    """
    Load weights from saved
    """
    
    model_file_name = f"{stringcase.spinalcase(type(model).__name__)}.json"
    model.load(path.join(INPUT_DIR, TARGET, model_file_name))
    return model

# Cross validation

## Oracle model
This is the oracle model which sets the upper limit for prediction

In [18]:
evaluate(models.OracleEnsemble(TARGET, len(components)))

-2.2603335204015305

## Mean ensemble

In [19]:
evaluate(models.MeanEnsemble(TARGET, len(components)))

-3.326715479411095

## Degenerate EM

In [20]:
m = load_model(models.DemWeightEnsemble(TARGET, len(components)))
evaluate(m)

-3.2495550469619876

## K-partition Degenerate EM

In [21]:
m = load_model(models.KDemWeightEnsemble(TARGET, len(components), None))
evaluate(m)

-3.2495550469619876

## Hit weight ensemble

In [22]:
m = load_model(models.HitWeightEnsemble(TARGET, len(components), None))
evaluate(m)

-3.23448551225504

## Score weight ensemble

In [23]:
m = load_model(models.ScoreWeightEnsemble(TARGET, len(components), None))
evaluate(m)

-3.207411559712781

## Multiplicative weight

In [26]:
# Define a post training hook
def pth(model):
    # Read initial weights from DEM
    with open(path.join(INPUT_DIR, TARGET, "dem-weight-ensemble.json")) as fp:
        model._weights = json.load(fp)["fit_params"]["weights"]
    return model

def pth_reset(model):
    model._weights = np.ones((model.n_comps,)) / model.n_comps
    return model

m = load_model(models.MPWeightEnsemble(TARGET, len(components), None))
evaluate(m, pth)

-3.246089721420991