Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
Rijk van der Meulen committed Aug 8, 2022
2 parents 7b1b7e7 + 8a90725 commit c9914e3
Show file tree
Hide file tree
Showing 9 changed files with 208 additions and 37 deletions.
42 changes: 27 additions & 15 deletions darts/dataprocessing/dtw/dtw.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@
from typing import Callable, Union

import numpy as np
import pandas as pd
import xarray as xr

from darts import TimeSeries
from darts.logging import get_logger, raise_if, raise_if_not
from darts.timeseries import DIMS

from .cost_matrix import CostMatrix
from .window import CRWindow, NoWindow, Window
Expand Down Expand Up @@ -203,25 +206,34 @@ def warped(self) -> (TimeSeries, TimeSeries):
Two new TimeSeries instances of the same length, indexed by pd.RangeIndex.
"""

series1 = self.series1
series2 = self.series2

xa1 = series1.data_array(copy=False)
xa2 = series2.data_array(copy=False)

xa1 = self.series1.data_array(copy=False)
xa2 = self.series2.data_array(copy=False)
path = self.path()

warped_series1 = xa1[path[:, 0]]
warped_series2 = xa2[path[:, 1]]

time_dim1 = series1._time_dim
time_dim2 = series2._time_dim
values1, values2 = xa1.values[path[:, 0]], xa2.values[path[:, 1]]

# We set a RangeIndex for both series:
warped_series1 = xr.DataArray(
data=values1,
dims=xa1.dims,
coords={
self.series1._time_dim: pd.RangeIndex(values1.shape[0]),
DIMS[1]: xa1.coords[DIMS[1]],
},
attrs=xa1.attrs,
)

range_index = True
warped_series2 = xr.DataArray(
data=values2,
dims=xa2.dims,
coords={
self.series2._time_dim: pd.RangeIndex(values2.shape[0]),
DIMS[1]: xa2.coords[DIMS[1]],
},
attrs=xa2.attrs,
)

if range_index:
warped_series1 = warped_series1.reset_index(dims_or_levels=time_dim1)
warped_series2 = warped_series2.reset_index(dims_or_levels=time_dim2)
time_dim1, time_dim2 = self.series1._time_dim, self.series2._time_dim

# todo: prevent time information being lost after warping
# Applying time index from series1 to series2 (take_dates = True) is disabled for consistency reasons
Expand Down
56 changes: 56 additions & 0 deletions darts/models/components/transformer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import torch
import torch.nn as nn

from darts.utils.torch import MonteCarloDropout


class CustomFeedForwardEncoderLayer(nn.TransformerEncoderLayer):
"""Overwrites the PyTorch TransformerEncoderLayer to use Darts' Position-wise Feed-Forward variants."""

def __init__(self, ffn: nn.Module, dropout: float, *args, **kwargs):
"""
Parameters
----------
ffn
One of Darts' Position-wise Feed-Forward Network variants from darts.models.components.glu_variants
dropout
Fraction of neurons affected by Dropout (default=0.1).
args
positional arguments from torch.nn.TransformerEncoderLayer.
kwargs
keyword arguments from torch.nn.TransformerEncoderLayer. `activation` will have no effect.
"""
super().__init__(*args, **kwargs)
self.ffn = ffn
self.dropout = MonteCarloDropout(dropout)

# overwrite the feed forward block
def _ff_block(self, x: torch.Tensor) -> torch.Tensor:
x = self.ffn(x)
return self.dropout(x)


class CustomFeedForwardDecoderLayer(nn.TransformerDecoderLayer):
"""Overwrites the PyTorch TransformerDecoderLayer to use Darts' custom Position Wise Feed Forward Layers."""

def __init__(self, ffn: nn.Module, dropout: float, *args, **kwargs):
"""
Parameters
----------
ffn
One of Darts' Position-wise Feed-Forward Network variants from darts.models.components.glu_variants
dropout
Fraction of neurons affected by Dropout (default=0.1).
args
positional arguments from torch.nn.TransformerEncoderLayer.
kwargs
keyword arguments from torch.nn.TransformerEncoderLayer. `activation` will have no effect.
"""
super().__init__(*args, **kwargs)
self.ffn = ffn
self.dropout = MonteCarloDropout(dropout)

# overwrite the feed forward block
def _ff_block(self, x: torch.Tensor) -> torch.Tensor:
x = self.ffn(x)
return self.dropout(x)
2 changes: 1 addition & 1 deletion darts/models/forecasting/torch_forecasting_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -1355,7 +1355,7 @@ def load_model(path: str) -> "TorchForecastingModel":
path_ptl_ckpt = base_path + "_ptl-ckpt.pth.tar"
if os.path.exists(path_ptl_ckpt):
model.model = model.model.__class__.load_from_checkpoint(path_ptl_ckpt)
model.trainer = model.model.trainer
model.trainer = None

return model

Expand Down
74 changes: 66 additions & 8 deletions darts/models/forecasting/transformer_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,13 @@
import torch
import torch.nn as nn

from darts.logging import get_logger, raise_if_not
from darts.logging import get_logger, raise_if, raise_if_not
from darts.models.components import glu_variants
from darts.models.components.glu_variants import GLU_FFN
from darts.models.components.transformer import (
CustomFeedForwardDecoderLayer,
CustomFeedForwardEncoderLayer,
)
from darts.models.forecasting.pl_forecasting_module import PLPastCovariatesModule
from darts.models.forecasting.torch_forecasting_model import PastCovariatesTorchModel

Expand All @@ -22,6 +26,34 @@
FFN = GLU_FFN + BUILT_IN


def _generate_coder(
d_model, dim_ff, dropout, nhead, num_layers, coder_cls, layer_cls, ffn_cls
):
"""Generates an Encoder or Decoder with one of Darts' Feed-forward Network variants.
Parameters
----------
coder_cls
Either `torch.nn.TransformerEncoder` or `...TransformerDecoder`
layer_cls
Either `darts.models.components.transformer.CustomFeedForwardEncoderLayer` or
`...CustomFeedForwardDecoderLayer`
ffn_cls
One of Darts' Position-wise Feed-Forward Network variants `from darts.models.components.glu_variants`
"""
layer = layer_cls(
ffn=ffn_cls(d_model=d_model, d_ff=dim_ff, dropout=dropout),
dropout=dropout,
d_model=d_model,
nhead=nhead,
dim_feedforward=dim_ff,
)
return coder_cls(
layer,
num_layers=num_layers,
norm=nn.LayerNorm(d_model),
)


# This implementation of positional encoding is taken from the PyTorch documentation:
# https://pytorch.org/tutorials/beginner/transformer_tutorial.html
class _PositionalEncoding(nn.Module):
Expand Down Expand Up @@ -142,13 +174,39 @@ def __init__(

raise_if_not(activation in FFN, f"'{activation}' is not in {FFN}")
if activation in GLU_FFN:
# use glu variant feedforward layers
self.activation = getattr(glu_variants, activation)(
d_model=d_model, d_ff=dim_feedforward, dropout=dropout
raise_if(
custom_encoder is not None or custom_decoder is not None,
"Cannot use `custom_encoder` or `custom_decoder` along with an `activation` from "
f"{GLU_FFN}",
logger=logger,
)
# use glu variant feed-forward layers
ffn_cls = getattr(glu_variants, activation)

# custom feed-forward layers have activation built-in. reset activation
activation = None

custom_encoder = _generate_coder(
d_model,
dim_feedforward,
dropout,
nhead,
num_encoder_layers,
nn.TransformerEncoder,
CustomFeedForwardEncoderLayer,
ffn_cls,
)

custom_decoder = _generate_coder(
d_model,
dim_feedforward,
dropout,
nhead,
num_decoder_layers,
nn.TransformerDecoder,
CustomFeedForwardDecoderLayer,
ffn_cls,
)
else:
# use nn.Transformer built in feedforward layers
self.activation = activation

# Defining the Transformer module
self.transformer = nn.Transformer(
Expand All @@ -158,7 +216,7 @@ def __init__(
num_decoder_layers=num_decoder_layers,
dim_feedforward=dim_feedforward,
dropout=dropout,
activation=self.activation,
activation=activation,
custom_encoder=custom_encoder,
custom_decoder=custom_decoder,
)
Expand Down
24 changes: 22 additions & 2 deletions darts/tests/models/forecasting/test_transformer_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@
logger = get_logger(__name__)

try:
import torch.nn as nn

from darts.models.components.transformer import (
CustomFeedForwardDecoderLayer,
CustomFeedForwardEncoderLayer,
)
from darts.models.forecasting.transformer_model import (
TransformerModel,
_TransformerModule,
Expand Down Expand Up @@ -118,14 +124,28 @@ def test_activations(self):
)
model1.fit(self.series, epochs=1)

# internal activation function
# internal activation function uses PyTorch TransformerEncoderLayer
model2 = TransformerModel(
input_chunk_length=1, output_chunk_length=1, activation="gelu"
)
model2.fit(self.series, epochs=1)
assert isinstance(
model2.model.transformer.encoder.layers[0], nn.TransformerEncoderLayer
)
assert isinstance(
model2.model.transformer.decoder.layers[0], nn.TransformerDecoderLayer
)

# glue variant FFN
# glue variant FFN uses our custom _FeedForwardEncoderLayer
model3 = TransformerModel(
input_chunk_length=1, output_chunk_length=1, activation="SwiGLU"
)
model3.fit(self.series, epochs=1)
assert isinstance(
model3.model.transformer.encoder.layers[0],
CustomFeedForwardEncoderLayer,
)
assert isinstance(
model3.model.transformer.decoder.layers[0],
CustomFeedForwardDecoderLayer,
)
25 changes: 25 additions & 0 deletions darts/tests/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,31 @@ def test_integer_indexing(self):
list(indexed_ts.time_index) == list(pd.RangeIndex(2, 7, step=1))
)

def test_univariate_component(self):
series = TimeSeries.from_values(np.array([10, 20, 30])).with_columns_renamed(
"0", "component"
)
mseries = concatenate([series] * 3, axis="component")
mseries = mseries.with_hierarchy(
{"component_1": ["component"], "component_2": ["component"]}
)

static_cov = pd.DataFrame(
{"dim0": [1, 2, 3], "dim1": [-2, -1, 0], "dim2": [0.0, 0.1, 0.2]}
)

mseries = mseries.with_static_covariates(static_cov)

for univ_series in [
mseries.univariate_component(1),
mseries.univariate_component("component_1"),
]:
# hierarchy should be dropped
self.assertIsNone(univ_series.hierarchy)

# only the right static covariate column should be retained
self.assertEqual(univ_series.static_covariates.sum().sum(), 1.1)

def test_column_names(self):
# test the column names resolution
columns_before = [
Expand Down
12 changes: 6 additions & 6 deletions darts/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,7 +649,7 @@ def from_dataframe(
else:
raise_if_not(
isinstance(df.index, VALID_INDEX_TYPES),
"If time_col is not specified, the DataFrame must be indexed either with"
"If time_col is not specified, the DataFrame must be indexed either with "
"a DatetimeIndex, or with a RangeIndex.",
logger,
)
Expand Down Expand Up @@ -2702,6 +2702,9 @@ def univariate_component(self, index: Union[str, int]) -> "TimeSeries":
Retrieve one of the components of the series
and return it as new univariate ``TimeSeries`` instance.
This drops the hierarchy (if any), and retains only the relevant static
covariates column.
Parameters
----------
index
Expand All @@ -2713,11 +2716,8 @@ def univariate_component(self, index: Union[str, int]) -> "TimeSeries":
TimeSeries
A new univariate TimeSeries instance.
"""
if isinstance(index, int):
new_xa = self._xa.isel(component=index).expand_dims(DIMS[1], axis=1)
else:
new_xa = self._xa.sel(component=index).expand_dims(DIMS[1], axis=1)
return self.__class__(new_xa)

return self[index if isinstance(index, str) else self.components[index]]

def add_datetime_attribute(
self, attribute, one_hot: bool = False, cyclic: bool = False
Expand Down
8 changes: 4 additions & 4 deletions docs/userguide/timeseries.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ In addition, some models can work on *multiple time series*, meaning that they c

* **Example of a multivariate series:** The blood pressure and heart rate of a single patient over time (one multivariate series with 2 components).

* **Example of multiple series:** The blood pressure and heart rate of multiple patients; potentially measured at different times for different patients (one univariate series per patient).
* **Example of multiple series:** The blood pressure and heart rate of multiple patients; potentially measured at different times for different patients (one multivariate series with 2 components per patient).


### Should I use a multivariate series or multiple series for my problem?
Expand All @@ -50,9 +50,9 @@ In Darts, probabilistic forecasts are represented by drawing Monte Carlo samples
## Creating `TimeSeries`
`TimeSeries` objects can be created using factory methods, for example:

* [TimeSeries.from_dataframe()](https://unit8co.github.io/darts/generated_api/darts.timeseries.html#darts.timeseries.TimeSeries.from_dataframe) can create `TimeSeries` from a Pandas Dataframe having one or several columns representing values (several columns would correspond to a multivariate series).
* [TimeSeries.from_dataframe()](https://unit8co.github.io/darts/generated_api/darts.timeseries.html#darts.timeseries.TimeSeries.from_dataframe) can create `TimeSeries` from a Pandas Dataframe having one or several columns representing values (columns correspond to components, and several columns would correspond to a multivariate series).

* [TimeSeries.from_values()](https://unit8co.github.io/darts/generated_api/darts.timeseries.html#darts.timeseries.TimeSeries.from_values) can create `TimeSeries` from a 2-D or 3-D NumPy array. It will generate an integer-based time index (of type `pandas.RangeIndex`). 2-D corresponds to deterministic (potentially multivariate) series, and 3-D to stochastic series.
* [TimeSeries.from_values()](https://unit8co.github.io/darts/generated_api/darts.timeseries.html#darts.timeseries.TimeSeries.from_values) can create `TimeSeries` from a 1-D, 2-D or 3-D NumPy array. It will generate an integer-based time index (of type `pandas.RangeIndex`). 1-D corresponds to univariate deterministic series, 2-D to multivariate deterministic series, and 3-D to multivariate stochastic series.

* [TimeSeries.from_times_and_values()](https://unit8co.github.io/darts/generated_api/darts.timeseries.html#darts.timeseries.TimeSeries.from_times_and_values) is similar to `TimeSeries.from_values()` but also accepts a time index.

Expand All @@ -67,7 +67,7 @@ my_multivariate_series = concatenate([series1, series2, ...], axis=1)
produces a multivariate series from some series that share the same time axis.

## Implementation
Behind the scenes, `TimeSeries` is wrapping around a 3-dimensional `xarray.DataArray` object. The dimensions are *(time, component, sample)*, where the size of the *component* dimension is larger than 1 for multivariate series and the size of the *sample* dimension is larger than 1 for stochastic series. The `DataArray` is itself backed by a a 3-dimensional NumPy array, and it has a time index (either `pandas.DatetimeIndex` or `pandas.RangeIndex`) on the *time* dimension and another `pandas.Index` on the *component* (or "columns") dimension. `TimeSeries` is intended to be immutable.
Behind the scenes, `TimeSeries` is wrapping around a 3-dimensional `xarray.DataArray` object. The dimensions are *(time, component, sample)*, where the size of the *component* dimension is larger than 1 for multivariate series and the size of the *sample* dimension is larger than 1 for stochastic series. The `DataArray` is itself backed by a 3-dimensional NumPy array, and it has a time index (either `pandas.DatetimeIndex` or `pandas.RangeIndex`) on the *time* dimension and another `pandas.Index` on the *component* (or "columns") dimension. `TimeSeries` is intended to be immutable and most operations return new `TimeSeries` objects.

## Exporting data from a `TimeSeries`
`TimeSeries` objects offer a few ways to export the data, for example:
Expand Down
2 changes: 1 addition & 1 deletion requirements/core.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ prophet>=1.1
requests>=2.22.0
scikit-learn>=1.0.1
scipy>=1.3.2
statsforecast>=0.5.2
statsforecast==0.6.0
statsmodels>=0.13.0
tbats>=1.1.0
tqdm>=4.60.0
Expand Down

0 comments on commit c9914e3

Please sign in to comment.