In [None]:
# default_exp data.load 

In [None]:
# hide
import sys

sys.path.append("..")

In [None]:
# export
from fastseq.core import *
from fastseq.data.external import *
from fastcore.utils import *
from fastcore.imports import *
from fastai2.basics import *
from fastai2.tabular.core import *



# Data

> Using the pytorch dataset to make an easy dataset.


# Dataset

In [None]:
# export
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader

In [None]:
# export
def pad_zeros(X, lenght):
    return  np.pad(
                X, 
                pad_width=((0, 0), (lenght - X.shape[-1], 0)), 
                mode='constant', 
                constant_values=0
            )

In [None]:
# export

class TimeSeriesDataset(Dataset):
    """Takes a list of time series and provides access to windowed subseries for
    training.
    Arguments:
        * time_series (list): List of time series ``pandas`` DataFrames.
        * lookback (int): Number of time steps used as input for forecasting.
        * horizon (int): Number of time steps to forecast.
        * step (int): Time step size between consecutive examples.
        * cat_names (list): list of catigorical column names.
        * cont_names (list): list of continues colomn names.
        * y_names (list): list of names of the columns to predict.
        * static_covs (list): Static covariates for each item in ``time_series`` list.
        * thinning (float): Fraction of examples to include.
    
    """
    def __init__(self, 
                 time_series,
                 lookback,
                 horizon,
                 step, 
                 static_covs=None,
                 thinning=1.0,
                 transform = noop,
                ):
        self.time_series = time_series
        self.lookback = lookback
        self.horizon = horizon
        self.step = step
        self.transform = L(transform)
        self.static_covs = static_covs

        # Slice each time series into examples, assigning IDs to each
        last_id = 0
        n_dropped = 0
        self.example_ids = {}
        for i, ts in enumerate(self.time_series):
            num_examples = (ts.shape[-1] - self.lookback - self.horizon + self.step) // self.step
            # Time series shorter than the forecast horizon need to be dropped.
            if ts.shape[-1] < self.horizon:
                n_dropped += 1
                continue
            # For short time series zero pad the input
            if ts.shape[-1] < self.lookback + self.horizon:
                num_examples = 1
            for j in range(num_examples):
                self.example_ids[last_id + j] = (i, j * self.step)
            last_id += num_examples

        # Inform user about time series that were too short
        if n_dropped > 0:
            print("Dropped {}/{} time series due to length.".format(
                    n_dropped, len(self.time_series)
                    )
                 )

        # Store the number of training examples
        self._len = int(self.example_ids.__len__() * thinning)

    def __len__(self):
        return self._len

    def __getitem__(self, idx):
        # Get time series
        ts_id, lookback_id = self.example_ids[idx]
        ts = self.time_series[ts_id]

        # Prepare input and target. Zero pad if necessary.
        if ts.shape[-1] < self.lookback + self.horizon:
            # If the time series is too short, we zero pad
            X = ts[:, :-self.horizon]
            X = np.pad(
                X, 
                pad_width=((0, 0), (self.lookback - X.shape[-1], 0)), 
                mode='constant', 
                constant_values=0
            )         
            y = ts[:,-self.horizon:]
        else:
            X = ts[:,lookback_id:lookback_id + self.lookback]
            y = ts[:,lookback_id + self.lookback:lookback_id + self.lookback + self.horizon]

        # Create the input and output for the sample
        sample = (X,y)
        for tmf in self.transform:
            sample = tmf(sample)

#         # Static covariates can be attached
#         if self.static_covs is not None:
#             sample['X_stat'] = self.static_covs[ts_id]

        return sample
    

## Test

In [None]:
# hide
horizon = 12
lookback = 128

t = np.arange(1000)
lenghts = [20,10,140,140]
data_train = [np.array([i+.5*np.sin(t[:l]),
              t[:l]+(0.1*np.random.randn()),
             ])
              for i,l in enumerate(lenghts)]

print([d.shape for d in data_train])

ts_ds = TimeSeriesDataset(
    data_train,
    lookback,
    horizon,
    step=1,
    static_covs = [1,2,2,2] ,
    transform = [ToTensor(),Cuda()]
)
r = ts_ds[0]
test_eq(r[0].shape,(2,lookback))
test_eq(r[1].shape,(2,horizon))

[(2, 20), (2, 10), (2, 140), (2, 140)]
Dropped 1/4 time series due to length.


In [None]:
horizon = 12
lookback = 128

In [None]:
t = np.arange(1000)
lenghts = [20,10,140,140]
data_train = [np.array([i+.5*np.sin(t[:l]),
              t[:l]+(0.1*np.random.randn()),
             ])
              for i,l in enumerate(lenghts)]

print([d.shape for d in data_train])

[(2, 20), (2, 10), (2, 140), (2, 140)]


In [None]:
ts_ds = TimeSeriesDataset(
    data_train,
    lookback,
    horizon,
    step=1,
    static_covs = [1,2,2,2]    
)

Dropped 1/4 time series due to length.


In [None]:
test_eq(ts_ds[0][0].shape, (2,lookback))
test_eq(ts_ds[0][1].shape, (2,horizon))
# test_eq(ts_ds[2]['X_stat'], 2)
test_eq(len(ts_ds),3)

## including transform

In [None]:
ts_ds = TimeSeriesDataset(
    data_train,
    lookback,
    horizon,
    step=1,
    static_covs = [1,2,2,2],
    transform = ToTensor()
)

Dropped 1/4 time series due to length.


In [None]:
ToTensor()((np.array(1)))

array(1)

In [None]:
ts_ds[0][0]
# test_eq(ts_ds[0][1].shape, (2,horizon))
# # test_eq(ts_ds[2]['X_stat'], 2)
# test_eq(len(ts_ds),3)

array([[ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0. 

In [None]:
# hide
from nbdev.export import *

notebook2script()

Converted 00_core.ipynb.
Converted 01_data.external.ipynb.
This cell doesn't have an export destination and was ignored:
 
Converted 02_deep4cast_m4_example.ipynb.
Converted 03_data.load.ipynb.
Converted 04_data.transforms.ipynb.
Converted 05_models.wavenet.ipynb.
Converted index.ipynb.
