In [None]:
import tsdm
import numpy as np

In [None]:
from tsdm.datasets import Electricity

In [None]:
vars(Electricity)

In [None]:
x = Electricity.dataset
x

In [None]:
observed = np.random.choice([True, False], size=x.shape)
x = x.where(observed)
display(observed, x)

### 2.1 Triplet Format

The data is represented as a set of triplets (time, variable, value). All NaNs are dropped.

In [None]:
dense_x = tsdm.util.make_dense_triplets(x)
dense_x

### 2.2 Sparse Triplet format

The same as before, but the variable tensor is encoded in a one-hot fashion, and the tensor is stored as a sparse tensor

In [None]:
sparse_x = tsdm.util.make_sparse_triplets(x)
sparse_x

### 2.3 Masked Format

Here we get 3 tensors:

- x: the original data
- m: a boolean mask, 1: value observed, 0: value not observed (NaN)
- d: time since the channel was last observed

In [None]:
x, m, d = tsdm.util.make_masked_format(x)
display(x, m, d)

## 3. Visualizing the data

In [None]:
df = Electricity.dataset
ΔT = np.diff(df.index)
Δt = ΔT[0].astype("timedelta64[m]")
assert np.all(ΔT == Δt)
N, M = df.shape
# remove first year from the data (useless zeros)
span = np.timedelta64(365, "D")//Δt
df = df.iloc[span:]

In [None]:
%config InlineBackend.figure_format = 'retina'
from matplotlib import pyplot as plt

fig, ax = plt.subplots(nrows=3, ncols=2, figsize=(24, 9), tight_layout=True, sharex='col', sharey=True)
ax[0,0].set_title("24h rolling average")
ax[1,0].set_title("7d rolling average")
selection = np.random.randint(low=0, high=M, size=5)
# selection = [319]

a = np.datetime64('2013-01-01')
b = np.datetime64('2013-02-01')
mask = (df.index >= a) & (df.index <= b)


for k, timedelta in enumerate((Δt, np.timedelta64(24, "h"), np.timedelta64(7, "D"))):
    for l in range(2):
        if l==0:
            data = df.rolling(window=timedelta//Δt, min_periods=1, axis=0).mean()
        if l==1: 
            a = np.datetime64('2013-01-01')
            b = np.datetime64('2013-02-01')
            mask = (df.index >= a) & (df.index <= b)
            data = df[mask].rolling(window=timedelta//Δt, min_periods=1, axis=0).mean()

        for col in data.iloc[:, selection]:
            ax[k,l].plot(data.index, data[col])
        ax[k,l].set_title(F"{timedelta}-rolling average")
        ax[k,l].set_ylabel("electricity consumption in kW")