# Initial loss stats

I noticed a weird trend in the last tensorboards: the initial loss of the model is almost always greater on the reversed (i.e. labeled `backward`) dataset than in non-reversed one (labeled `forward`).
This looks suspicious, because for randomly initialized weights, the untrained model's loss should be equally likely greater for `forward` than for `backward` and for vice versa.

Most likely explanation is, the first point on my tensorboard learning curve plots corresponds to the loss _after the first epoch_, i.e. after some training has already been done.
Maybe I should call tensorboard callback just before the training loop to add the initial point.

Anyway, I explore this trend more rigorously in this notebook.

In [1]:
# Custom code imports
from models import ThreeFullyConnectedLayers

from generate_time_series import (load_two_body_problem_time_series,
                                  load_lorenz_attractor_time_series,
                                  load_belousov_zhabotinsky_time_series)

from datasets import (prepare_time_series_for_learning,
                      time_series_to_dataset,
                      AllDataHolder)

from train_test_utils import get_mean_loss_on_test_dataset

In [2]:
# Standard modules import
import numpy as np

In [3]:
# Dataholders
def load_two_body_problem_dataholder(window_len: int,
                                     target_len: int) -> AllDataHolder:
    twb = load_two_body_problem_time_series()
    dh = prepare_time_series_for_learning(train_ts=twb,
                                          test_ts=twb.copy(),
                                          window_len=window_len,
                                          target_len=target_len,
                                          take_each_nth_chunk=1)
    return dh


def load_lorenz_attractor_dataholder(window_len: int,
                                     target_len: int) -> AllDataHolder:
    lrz = load_lorenz_attractor_time_series()
    dh = prepare_time_series_for_learning(train_ts=lrz,
                                          test_ts=lrz.copy(),
                                          window_len=window_len,
                                          target_len=target_len,
                                          take_each_nth_chunk=1)
    return dh


def load_belousov_zhabotinsky_dataholder(window_len: int,
                                         target_len: int) -> AllDataHolder:
    bzh = load_belousov_zhabotinsky_time_series()
    dh = prepare_time_series_for_learning(train_ts=bzh,
                                          test_ts=bzh.copy(),
                                          window_len=window_len,
                                          target_len=target_len,
                                          take_each_nth_chunk=1)
    return dh

In [131]:
def get_loss_difference(model: ThreeFullyConnectedLayers,
                        dh: AllDataHolder) -> float:
    loss_forw = get_mean_loss_on_test_dataset(model, dh.forward.test_dataset)
    loss_back = get_mean_loss_on_test_dataset(model, dh.backward.test_dataset)
    return loss_back - loss_forw

def get_loss_difference_kepler(window_len: int = 20,
                               target_len: int = 1,
                               size: int = 7) -> float:
    dh = load_two_body_problem_dataholder(window_len=window_len,
                                          target_len=target_len)
    m = ThreeFullyConnectedLayers(window_len=window_len, target_len=target_len,
                                  hidden_layer1_size=size, hidden_layer2_size=size,
                                  datapoint_size=2)
    return get_loss_difference(m, dh)

def get_loss_difference_lorenz(window_len: int = 20,
                               target_len: int = 1,
                               size: int = 10) -> float:
    dh = load_lorenz_attractor_dataholder(window_len=window_len,
                                          target_len=target_len)
    m = ThreeFullyConnectedLayers(window_len=window_len, target_len=target_len,
                                  hidden_layer1_size=size, hidden_layer2_size=size,
                                  datapoint_size=3)
    return get_loss_difference(m, dh)

def get_loss_difference_belousov_zhabotinsky(window_len: int = 20,
                                             target_len: int = 1,
                                             size: int = 10) -> float:
    dh = load_belousov_zhabotinsky_dataholder(window_len=window_len,
                                              target_len=target_len)
    m = ThreeFullyConnectedLayers(window_len=window_len, target_len=target_len,
                                  hidden_layer1_size=size, hidden_layer2_size=size,
                                  datapoint_size=3)
    return get_loss_difference(m, dh)

In [116]:
%time get_loss_difference_kepler()

CPU times: user 523 ms, sys: 3.31 ms, total: 526 ms
Wall time: 487 ms


-0.000408440898611806

In [132]:
class LossCalculator:
    def __init__(self,
                 system: str,
                 window_len: int = 20,
                 target_len: int = 1,
                 size: int = 7) -> None:
        self.window_len = window_len
        self.target_len = target_len
        self.size = size
        
        if system == "kepler":
            loader_func = load_two_body_problem_dataholder
            self.datapoint_size = 2
        elif system == "lorenz":
            loader_func = load_lorenz_attractor_dataholder
            self.datapoint_size = 3
        elif system == "belousov-zhabotinsky":
            loader_func = load_belousov_zhabotinsky_dataholder
            self.datapoint_size = 3
        else:
            raise ValueError(f"system={system}, expected one of 'kepler', 'lorenz', 'belousov-zhabotinsky'")
        
        self.dh = loader_func(window_len=window_len, target_len=target_len)
        
    def __call__(self):
        m = ThreeFullyConnectedLayers(window_len=self.window_len,
                                      target_len=self.target_len,
                                      hidden_layer1_size=self.size,
                                      hidden_layer2_size=self.size,
                                      datapoint_size=self.datapoint_size)
        return get_loss_difference(m, self.dh)

In [133]:
loss = LossCalculator(system="kepler")