# Degenerate EM weights

This notebook generates weights for models using the degenerate EM method.

In [66]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import sys
sys.path.append("../src")

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import utils.data as udata
import utils.dists as udists
import utils.misc as u
import models
import os
import losses
from jrun import jin

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


We generate 5 type of weights for the models.
1. Equal weights. This assigns equal weight to each model.
2. Constant weights. This uses training data for all seasons, region and targets to assign one weight for each model.
3. Target based weights. This uses data chunks for all season, regions but separate targets.
4. Target type weights. Similar to target based weights but with target grouped in seasonal and week ahead.
5. Target and region based weights. Similar to target based weights but with another separation in region.

In [79]:
EXP_NAME = jin("exp", "seasons-4-to-3")
COMPONENTS = jin("components", u.available_models("../data"))
TEST_SPLIT_THRESH = jin("splitweek", 201443)
OUTPUT_DIR = u.ensure_dir(f"../weights/{EXP_NAME}")

In [80]:
class Component:
    """
    Helper class for working with components
    """
    
    def __init__(self, name):
        self.name = name
        self.loader = udata.ComponentDataLoader("../data", name)
components = [Component(name) for name in COMPONENTS]
actual_dl = udata.ActualDataLoader("../data")

## 1. Equal weights

In [81]:
weights = pd.DataFrame({
    "model": [c.name for c in components],
    "weight": [1/len(components) for c in components]
})
weights.to_csv(f"{OUTPUT_DIR}/equal.csv", index=False)

## 2. Constant weights

In [82]:
# Need to collect scores for every target
scores = []
for week in [1, 2, 3, 4]:
    y, Xs, yi = udata.get_week_ahead_training_data(
        week, None,
        actual_dl, [c.loader for c in components]
    )
    # Use only the training data
    train_indices = yi[:, 0] < TEST_SPLIT_THRESH
    scores.append(udists.score_predictions([X[train_indices] for X in Xs], y[train_indices]))
    
for s_target in ["peak", "peak_wk", "onset_wk"]:
    y, Xs, yi = udata.get_seasonal_training_data(
        s_target, None,
        actual_dl, [c.loader for c in components]
    )
    # Use only the training data
    train_indices = yi[:, 0] < TEST_SPLIT_THRESH
    scores.append(udists.score_predictions([X[train_indices] for X in Xs], y[train_indices]))
    
scores = np.concatenate(scores, axis=0)

In [83]:
weights = pd.DataFrame({
    "model": [c.name for c in components],
    "weight": models.dem(np.exp(scores))
})
weights.to_csv(f"{OUTPUT_DIR}/constant.csv", index=False)

## 3. Target based weights

## 4. Target type weights

## 5. Target and region weights