In [None]:
# default_exp timeseries.learner

# timeseries.learner

> API details.

In [None]:
#hide
%load_ext autoreload
%autoreload 2

In [None]:
#export
from fastai.torch_basics import *
from fastai.data.all import *
from fastai.tabular.data import *
from fastai.tabular.core import *
from fastai.tabular.model import *
from fastai.basics import *
from fastrenewables.tabular.core import *
from fastrenewables.tabular.model import *
from fastrenewables.timeseries.core import *
from fastrenewables.timeseries.data import *
from fastrenewables.timeseries.model import *
from fastrenewables.losses import VILoss
from fastrenewables.utils import *
import pandas as pd

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
#export
#hide
def _tensor_to_device(x, device):
    if isinstance(x, torch.Tensor):
        x = x.to(device)
    return x    

In [None]:
#export
#hide
def _swap_axes_and_flatten(tensr):
    if isinstance(tensr, torch.Tensor):
        return  torch.swapaxes(tensr, 1, 2).reshape(-1, tensr.shape[1])
    elif isinstance(tensr, np.ndarray):
        return  np.swapaxes(tensr, 1, 2).reshape(-1, tensr.shape[1])
    else:
        raise ValueError

In [None]:
#export
def convert_to_tensor_ts(to, include_index=False, device="cpu", flatten=False):
    # to increase speed we direclty predict on all tensors   
    if isinstance(to, (TimeseriesDataset, Timeseries)):
        with torch.no_grad():
            cats, conts, targets = _tensor_to_device(to.cats, device), _tensor_to_device(to.conts, device), _tensor_to_device(to.ys, device)
    else:
        raise NotImplementedError("Unknown type")
        
    indexes = to.indexes
    
    if flatten:
        indexes = _swap_axes_and_flatten(indexes).ravel()
        conts = _swap_axes_and_flatten(conts)
        cats = _swap_axes_and_flatten(cats)
        targets = _swap_axes_and_flatten(targets)
        
    if include_index:
        return indexes, cats, conts, targets
    else:
        return cats, conts, targets

In [None]:
#export
def fast_prediction_ts(model, to, flatten, filter, device="cpu"):
    
    cats, conts, ys = convert_to_tensor_ts(to, include_index=False, device=device)

    with torch.no_grad():
        preds = model(cats, conts)

    preds, targets = to_np(preds), to_np(ys)

    if flatten:
        preds, targets = preds.reshape(-1), targets.reshape(-1)

    if filter:
        targets, preds = filter_preds(targets, preds)
    
        
        
    return preds, targets


In [None]:
#export
class RenewableTimeseriesLearner(Learner):
    "`Learner` for renewable timerseries data."
    def predict(self, ds_idx=1, test_dl=None, filter=True, as_df=False, flatten=True):
        device = next(self.model.parameters()).device
        preds, targets = None, None
        if test_dl is not None:
            to = test_dl.train_ds
        elif ds_idx == 0:
            to = self.dls.train_ds
        elif ds_idx == 1:
            to = self.dls.valid_ds
        
        preds, targets = fast_prediction_ts(self.model, to, flatten=flatten, filter=filter)
        
        if as_df:
            return pd.DataFrame({"Prediction": preds, "Target":targets}, index=to.indexes.reshape(-1))
        else:
            return preds, targets

In [None]:
show_doc(RenewableTimeseriesLearner, title_level=3)

<h3 id="RenewableTimeseriesLearner" class="doc_header"><code>class</code> <code>RenewableTimeseriesLearner</code><a href="" class="source_link" style="float:right">[source]</a></h3>

> <code>RenewableTimeseriesLearner</code>(**`dls`**, **`model`**, **`loss_func`**=*`None`*, **`opt_func`**=*`Adam`*, **`lr`**=*`0.001`*, **`splitter`**=*`trainable_params`*, **`cbs`**=*`None`*, **`metrics`**=*`None`*, **`path`**=*`None`*, **`model_dir`**=*`'models'`*, **`wd`**=*`None`*, **`wd_bn_bias`**=*`False`*, **`train_bn`**=*`True`*, **`moms`**=*`(0.95, 0.85, 0.95)`*) :: `Learner`

`Learner` for renewable timerseries data.

In [None]:
def get_dls(cat_names = ["TaskID", 'Month', 'Day', 'Hour'], cont_names=None, y_names="PowerGeneration", y_block=RegressionBlock(), post_hooks=[]):
    

    if cont_names is None:
        cont_names = ['T_HAG_2_M', 'RELHUM_HAG_2_M', 'PS_SFC_0_M', 'ASWDIFDS_SFC_0_M',
           'ASWDIRS_SFC_0_M', 'WindSpeed58m']
    
    pd.options.mode.chained_assignment=None
    kwargs = {"post_hooks": post_hooks}
    dls = RenewableTimeSeriesDataLoaders.from_files(glob.glob("../data/*.h5"), 
                                                y_names=y_names, 
                                                cat_names=cat_names, 
                                                cont_names=cont_names,
                                                pre_procs=[FilterYear(year=2020), 
                                                             AddSeasonalFeatures(as_cont=False),
                                                             FilterInconsistentSamplesPerDay], 
                                                procs=Categorify, 
                                                bs=12,
                                                y_block=y_block,
                                                post_hooks=post_hooks)
    return dls



In [None]:
dls = get_dls(y_names="PowerGeneration", y_block=RegressionBlock())

ValueError: No objects to concatenate

We have one ouput for the regression task, the PowerGeneration and the MSE loss as default loss. Note that these values are inherited by the `TabularPandas` module from `fastai`.

In [None]:
f"Number of targets: {dls.train_ds.c} with loss {dls.train_ds.loss_func}."

Therefore, we can easily extend this to multiple targets by providing the required y columns.

In [None]:
dls = get_dls(cont_names = "WindSpeed58m", y_names=['T_HAG_2_M', 'RELHUM_HAG_2_M'], y_block=RegressionBlock())
f"Number of targets: {dls.train_ds.c}."

We can also use the same procedure for classification.

In [None]:
dls = get_dls(y_names=["TaskID"], y_block=CategoryBlock())
f"Number of targets: {dls.train_ds.c} with loss {dls.train_ds.loss_func}."

However, with the default values, we have one value for each timestamp. But in most cases we only want to classify a single sample for a timeseries sample.

In [None]:
y = dls.one_batch()[-1]
y[0:2]

To overcome this issue, we can apply `post_hooks` to shorten the target timeseries. 

In [None]:
dls = get_dls(y_names=["TaskID"], y_block=CategoryBlock(), post_hooks=[reduce_target_timeseries_to_element])
f"Number of targets: {dls.train_ds.c} with loss {dls.train_ds.loss_func}."
y = dls.one_batch()[-1]
y

In [None]:
#export
@delegates(Learner.__init__)
def renewable_timeseries_learner(dls, layers=None, emb_szs=None, config=None, 
                                 n_out=None, y_range=None, 
                                 embedding_type=EmbeddingType.Normal, 
                                 input_sequence_length=None,
                                 output_sequence_length=None,
                                 sequence_transform=None,
                                 **kwargs):
    "Get a `Learner` using `dls`, with `metrics`, including a `TabularModel` created using the remaining params."
    if config is None: config = tabular_config()
    
    if n_out is None: 
        n_out = get_c(dls)
#     n_out = dls.train_ds.ys.shape[1]
        
    assert n_out, "`n_out` is not defined, and could not be inferred from data, set `dls.c` or pass `n_out`"
    
    if layers is None: layers = [len(dls.cont_names), 200, 100, n_out]
    if y_range is None and 'y_range' in config: y_range = config.pop('y_range')
        
    embed_p = kwargs["embed_p"].pop() if "embed_p" in kwargs.keys() else 0.1
    
    emb_module = None
    if len(dls.train_ds.cat_names) > 0:
        emb_szs = get_emb_sz(dls.train_ds, {} if emb_szs is None else emb_szs)
        emb_module = EmbeddingModule(None, embedding_dropout=embed_p, embedding_dimensions=emb_szs)
        
    model = TemporalCNN(layers, embedding_module=emb_module, 
                        input_sequence_length=input_sequence_length,
                        output_sequence_length=output_sequence_length, 
                        sequence_transform=sequence_transform,
                        **config)
    
    if embedding_type==EmbeddingType.Bayes and "loss_func" not in kwargs.keys():
        base_loss = getattr(dls.train_ds, 'loss_func', None)
        assert base_loss is not None, "Could not infer loss function from the data, please pass a loss function."
        loss_func=VILoss(model=model, base_loss=base_loss, kl_weight=0.1)
        kwargs["loss_func"] = loss_func
    
    return RenewableTimeseriesLearner(dls, model, **kwargs)

In [None]:
show_doc(renewable_timeseries_learner)

In [None]:
#export
from fastai.torch_basics import *
from fastai.data.all import *
from fastai.tabular.data import *
from fastai.tabular.core import *
from fastai.tabular.model import *
from fastai.basics import *
from fastrenewables.tabular.core import *
from fastrenewables.tabular.data import *
from fastrenewables.tabular.model import *
from fastrenewables.losses import VILoss

In [None]:
dls = get_dls(y_names="PowerGeneration", y_block=RegressionBlock())

In [None]:
learner = renewable_timeseries_learner(dls, metrics=rmse)

In [None]:
learner.loss_func

In [None]:
cats, conts, ys = dls.one_batch()

In [None]:
learner.model

In [None]:
cats.shape

In [None]:
learner.model(cats, conts).shape

In [None]:
ys.shape

In [None]:
learner.fit_one_cycle(5)

In [None]:
preds, targets = learner.predict(0)

In [None]:
result_df = learner.predict(0, as_df=True)

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(16,9))
plt.plot(targets[0:200], label="Target")
plt.plot(preds[0:200], label="Prediction")
plt.legend()
# result_df[500:1000].plot(figsize=(16,9))

Create a clasification dataloader, where we aim to forecast the task ID.

In [None]:
dls = get_dls(cat_names = ['Month', 'Day', 'Hour'], y_names=["TaskID"], 
              y_block=CategoryBlock(), 
              post_hooks=[reduce_target_timeseries_to_element])
f"Number of classes {dls.c}"

In [None]:
#hide
# check that the convert function are working
ts_data_cont = dls.train_ds.conts
ts_data_cat = dls.train_ds.cats
index, cats, conts, ys = convert_to_tensor_ts(dls.train_ds, include_index=True, flatten=True)
test_eq(cats[1,0], ts_data_cat[0,0,1])
test_eq(cats[1,1], ts_data_cat[0,1,1])
test_eq(cats[1,2], ts_data_cat[0,2,1])
test_eq(conts[1,0], ts_data_cont[0,0,1])
test_eq(conts[1,1], ts_data_cont[0,1,1])
test_eq(conts[1,2], ts_data_cont[0,2,1])

We can either let the `renewable_timeseries_learner` handle the correct output shape by providing the `input_sequence_length` and `output_sequence_length`.

In [None]:
input_sequence_length=dls.input_sequence_length
output_sequence_length=dls.output_sequence_length
input_sequence_length, output_sequence_length

In [None]:
learner = renewable_timeseries_learner(dls, metrics=accuracy, 
               input_sequence_length=input_sequence_length,
               output_sequence_length=output_sequence_length)
learner.fit_one_cycle(5)

Or alternatively, we can provide a custom layer converts the results of the `CNN` into the required shape for classification.

In [None]:
num_outputs_last_cnn_layer = 10
sequence_transform=MultiLayerPerceptron([input_sequence_length*num_outputs_last_cnn_layer, dls.c])

In [None]:
learner = renewable_timeseries_learner(dls, n_out=10, metrics=accuracy, 
               input_sequence_length=input_sequence_length,
               sequence_transform=sequence_transform)

In [None]:
learner.loss_func

In [None]:
learner.model

In [None]:
learner.fit_one_cycle(5)

In [None]:
from nbdev.export import notebook2script
notebook2script()