# Processing

In [1]:
# Import to be able to import python package from src
import sys
sys.path.insert(0, '../../src')

In [2]:
import pandas as pd
import ontime as on

---
## Generation of random time series

In [3]:
ts = on.generators.random_walk().generate(start=pd.Timestamp('2022-01-01'), end=pd.Timestamp('2022-12-31'))

---
## Apply function on the whole time series

with Lambda function

In [4]:
add_two = on.processors.mapper(lambda x : x + 2)
new_ts = add_two.process(ts)
new_ts - ts

with normal function

In [5]:
def add_2(x):
    return x + 2

add_two = on.processors.mapper(add_2)
new_ts = add_two.process(ts)
new_ts - ts

---
## Apply Function on Windows of the Time Series

In [6]:
mean = on.processors.windower({
    'function': 'mean',
    'mode': 'rolling',
    'window': 10
})

In [11]:
new_ts = on.TimeSeries.from_darts(mean.process(ts))

In [12]:
ts.plot()

In [13]:
new_ts.plot()

---
## Split Time Series in defined durations of e.g. day, week, month, year

In [14]:
ts = on.generators.random_walk().generate(start=pd.Timestamp('2022-01-01'), end=pd.Timestamp('2022-12-31'))

In [15]:
ts.plot()

Split by month

All offset aliases can be used to make different split length (https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases)

In [16]:
seq = ts.split_by_period('M')

  splits_df = [g for n, g in df.groupby(pd.Grouper(freq=period))]


In [17]:
len(seq)

12

Group the splits

In [18]:
ts_g = ts.group_splits(seq)

In [19]:
ts_g

---
## Compute Correlation Through Time

Load some data from the Energy dataset in Darts

In [20]:
from darts.datasets import EnergyDataset
ts = EnergyDataset().load()

Get a few columns and samples

In [21]:
cols = ['generation biomass', 'generation solar', 'generation nuclear']
ts = ts[cols][0:1000]

Compute correlations within a daily window

In [22]:
correlation = on.processors.correlation('1D')

In [23]:
ts_corr = correlation.process(ts)

In [25]:
ts_corr[0:100].plot()