# Imports

In [None]:
import itertools
import os
import sys

import matplotlib as mpl
import numpy as np
import pandas as pd
import scipy.signal
import scipy.signal
import sklearn.isotonic
import sklearn.model_selection
import sklearn.preprocessing
from matplotlib import pyplot as plt

try:
    import batteryanalytics
except ModuleNotFoundError as ie:
    sys.path.append( os.path.join(os.path.abspath(""), "../") )
    import batteryanalytics
from batteryanalytics import utils as ba_utils
from batteryanalytics.nn import LSTM

from IPython.core.display import display, HTML, Image

# Parameters

In [None]:
basename = "mechanical_loading_data.csv.gz"
dirname = "../data"

window = 150
horizon = 300

# features = ["2000 Pounds [Pounds]", "Voltage [V]", "Temperature [C]"]
# features = ["2000 Pounds [Pounds]", "Voltage [V]"]
features = [
    "2000 Pounds [Pounds]", "Voltage [V]",
    "2000 Pounds [Pounds] (gradient 1)", "Voltage [V] (gradient 1)",
]
target = "Temperature [C]"

n_epochs = 5
training_filters = ["none", "savgol", "isotonic_regression"]
hidden_dims = [2,4,8,16]
n_models = 5

# Function Definitions

In [None]:
def suppress_stdout(func):
    import functools
    @functools.wraps(func)
    def wrapper(*args, **kwargs):
        from contextlib import redirect_stdout
        with open(os.devnull, "w") as fp:
            with redirect_stdout(fp):
                value = func(*args, **kwargs)
        return value
    return wrapper

# Data Loading

In [None]:
filename = os.path.join(dirname, basename)
raw_df = pd.read_csv(filename, header=[0,1,2], index_col=0, compression="gzip")
raw_df.info(verbose=True)
with pd.option_context("display.max_rows", 10, "display.max_columns", None):
    display(raw_df)
    display(raw_df.describe())

# Data Preprocessing

In [None]:
data_df = raw_df.copy()

levels_0 = list()
levels_1 = list()
for column in data_df.columns:
    if column[0] not in levels_0:
        levels_0.append(column[0])
    if column[1] not in levels_1:
        levels_1.append(column[1])

samples = {
    pair:data_df.xs(pair, axis="columns", level=(0,1), drop_level=False).dropna()
    for pair in itertools.product(levels_0, levels_1)
}
summary1_df = pd.DataFrame(
    [[
            sample_df.index[-1]-sample_df.index[0],
            len(sample_df.index)
    ] for sample_df in samples.values() ],
    columns=["time span", "samples"]
).describe()

for sample_df in samples.values():
    series = sample_df.loc[:,pd.IndexSlice[:,:,target]].copy()
    idx = np.squeeze(series.values).argmax()
    sample_df.drop(sample_df.index[2*idx+1:], inplace=True)
    # vmin, vmax = series.min(), series.max()
    # delta = vmax - vmin
    # idx = series > 0.05*delta + vmin
    # cut_off = np.squeeze(idx.iloc[::-1].idxmax().values).item()
    # sample_df.drop(sample_df.index[sample_df.index > cut_off], inplace=True)

summary2_df = pd.DataFrame(
    [[
            sample_df.index[-1]-sample_df.index[0],
            len(sample_df.index)
    ] for sample_df in samples.values() ],
    columns=["time span", "samples"]
).describe()

with pd.option_context("display.max_rows", 10, "display.max_columns", None):
    display(summary1_df)
    display(summary2_df)
    display(data_df)

## Data Visualization

In [None]:
def get_unique(items):
    seen = set()
    unique = list()
    for item in items:
        if item not in seen:
            unique.append(item)
            seen.add(item)
    return unique        
level0 = get_unique(raw_df.columns.get_level_values(0))
level1 = get_unique(raw_df.columns.get_level_values(1))
level2 = get_unique(raw_df.columns.get_level_values(2))

for c0 in level0:
    display(HTML(f"<h1>{c0}</h1>"))
    
    if not c0.startswith("500"):
        continue
    for c1 in level1:
        display(HTML(f"<h2>{c1}</h2>"))
        if not c1.startswith("20"):
            continue
        sample_df = samples[(c0,c1)]
        
        n_cols = 3
        n_rows = int(np.ceil(len(level2)/n_cols))
#         fig = plt.figure(constrained_layout=True, figsize=(n_cols*8,5*n_rows))
        fig = plt.figure(constrained_layout=True, figsize=(16,8))
        gs = mpl.gridspec.GridSpec(
            nrows=n_rows,
            ncols=n_cols,
            figure=fig
        )
        for ii,c2 in enumerate(level2):
            ax = fig.add_subplot(gs[ii])
            ax.plot(sample_df[(c0,c1,c2)], zorder=1)
            ax.plot(raw_df[(c0,c1,c2)], zorder=0)
            ax.set_xlim(raw_df.index[0], raw_df.index[-1])
            ax.set_title(c2)
            ax.set_xlabel("Time")
        fig.suptitle("{}, {}".format(c0,c1))
        plt.show()

# Model Training

In [None]:
def train_model(samples, n_split, features, target, window, horizon, hidden_dim, n_epochs, training_filter, model_id):
    model = LSTM(
        window=window,
        horizon=horizon,
        hidden_dim=hidden_dim,
        n_epochs=n_epochs,
        batch_learning=False,
        batch_size=32,
        shuffle=True,
        compute_device="cpu",
        parallel=False,
        verbose=1,
    )

    samples_items = np.empty(shape=len(samples), dtype=object)
    samples_items[:] = list(samples.items())

    splitter = sklearn.model_selection.LeaveOneOut()
    splits = list(splitter.split(samples_items))
    train_idx,test_idx = splits[n_split]

    XX = [
        value.loc[:,pd.IndexSlice[:,:,features]].values
        for key,value in samples_items[train_idx] 
    ]
    yy = [
        value.loc[:,pd.IndexSlice[:,:,target]].values
        for key,value in samples_items[train_idx] 
    ]
    if training_filter == "savgol":
        filter_function = lambda x:scipy.signal.savgol_filter(
            x,
            window_length=99,
            polyorder=2
        )
    elif training_filter == "isotonic_regression":
        filter_function = lambda x:np.hstack([
            sklearn.isotonic.isotonic_regression(x[:np.argmax(x)], increasing=True) if np.argmax(x) > 0 else [],
            sklearn.isotonic.isotonic_regression(x[np.argmax(x):], increasing=False) if np.argmax(x) < len(x) else []
        ])
    elif training_filter == "none":
        filter_function = None
    else:
        raise ValueError(f"Unexpected training_filter value ({filter_function})")

    model.fit(XX, yy, filter_function=filter_function)

    filename = os.path.join(
        ba_utils.get_lstm_dirname([key for key,value in samples_items[test_idx]]),
        ba_utils.get_lstm_basename(features, window, horizon, model.hidden_dim, model.n_epochs, training_filter, model_id)
    ).replace("joblib", "npz")

    keys = list(map(ba_utils.sanitize_holdout_name, samples.keys()))
    values = list(map(
        np.squeeze,
        model.transform(
            list(map(
                lambda df:df.loc[:,pd.IndexSlice[:,:,features]].values,
                samples.values()
            ))
        )
    ))
    kwargs = dict(zip(keys,values))

    ba_utils.mkdirs(os.path.dirname(filename))
    np.savez(filename, **kwargs)

    filename = os.path.join(
        ba_utils.get_lstm_dirname([key for key,value in samples_items[test_idx]]),
        ba_utils.get_lstm_basename(features, model.window, model.horizon, model.hidden_dim, model.n_epochs, training_filter, model_id)
    )
    filename = ba_utils.save_model(filename, model)
    return model, filename

In [None]:
for model_id in range(1,n_models+1):
    for training_filter in training_filters:
        for hidden_dim in hidden_dims:
            for n_split in range(len(samples)):
                model,_ = train_model(
                    samples=samples,
                    n_split=n_split,
                    features=features,
                    target=target,
                    window=window,
                    horizon=horizon,
                    hidden_dim=hidden_dim,
                    n_epochs=n_epochs,
                    training_filter=training_filter,
                    model_id=model_id
                )
                print(model._model)
                ba_utils.display_loss_curve(model)