# Context - Common

In [1]:
# Import to be able to import python package from src
import sys
sys.path.insert(0, '../src')

In [2]:
import pandas as pd
from darts.datasets import EnergyDataset

In [3]:
import ontime as on

---

## Load data

In [4]:
ts = EnergyDataset().load()

## Process the data

In [5]:
df = ts.pd_dataframe()
df = df.interpolate()
cols = ['generation biomass', 'generation solar', 'generation nuclear']
df = df[cols]

In [6]:
ts = on.TimeSeries.from_dataframe(df)

In [7]:
ts_uni = ts['generation solar'].slice(pd.Timestamp('2015'), pd.Timestamp('2016'))
ts_multi = ts.slice(pd.Timestamp('2015'), pd.Timestamp('2016'))

In [8]:
train, test = ts_uni.split_after(pd.Timestamp('2015-09-01'))

---

## Load Common Context

In [9]:
from ontime.context import common

## Profiler

In [10]:
profiler = common.Profiler()

### Daily Aggregation

In [16]:
day_mean = profiler.profile(ts_uni, profiler.Period.DAILY, profiler.Aggregation.MEAN).rename({"value": "day_mean"})
day_median = profiler.profile(ts_uni, profiler.Period.DAILY, profiler.Aggregation.MEDIAN).rename({"value": "day_median"})

In [17]:
(
    on.Plot()
    .add(on.marks.line, day_mean)
    .add(on.marks.line, day_median)
    .show()
)

### Weekly Aggregation

In [18]:
week_mean = profiler.profile(ts_uni, profiler.Period.WEEKLY, profiler.Aggregation.MEAN).rename({"value": "week_mean"})
week_median = profiler.profile(ts_uni, profiler.Period.WEEKLY, profiler.Aggregation.MEDIAN).rename({"value": "week_median"})

In [19]:
(
    on.Plot()
    .add(on.marks.line, week_mean)
    .add(on.marks.line, week_median)
    .show()
)

## Generic Predictor

In [20]:
model = common.GenericPredictor()

In [24]:
model.fit(train)

<ontime.context.common.generic_predictor.GenericPredictor at 0x7fc6c9b189d0>

What does the future looks like ?

In [25]:
pred = model.predict(48)

In [26]:
(
    on.Plot()
    .add(on.marks.line, train[-96:].rename({"generation solar": "Training set"}))
    .add(on.marks.line, pred.rename({"generation solar": "Prediction"}))
    .add(on.marks.line, test[:48].rename({"generation solar": "Truth"}), type="dashed")
    .properties(width=600, height=300)
    .show()
)

## Generic Detector

In [27]:
model = common.GenericDetector()

In [28]:
model.fit(train)

<ontime.context.common.generic_detector.GenericDetector at 0x7fc6c9a32ef0>

Does the current signal has problem ? 

In [29]:
detected_test = model.detect(test)

In [30]:
(
    on.Plot(test[:72])
    .add(on.marks.line)
    .add(on.marks.mark, data=detected_test[:72].rename({"generation solar": "Anomalies"}), type="dot")
    .properties(width=600, height=300)
    .show()
)

What if we want to have an idea about the future problems ?

In [31]:
predetected = model.predetect(72)

In [32]:
(
    on.Plot(test[:72])
    .add(on.marks.line)
    .add(on.marks.mark, data=predetected[:72].rename({"generation solar": "Anomalies"}), type="dot")
    .properties(width=600, height=300)
    .show()
)

## Data Quality Detector

Detect values above an absolute threshold

In [33]:
detector = common.DataQualityDetector(
    threshold_type='threshold', 
    upper_threshold=3000
)

# Fitting on some data
detector.fit(test)

# Detecting on other
detector.detect(test[:72]).plot()

Or between two thresholds 

In [34]:
detector = common.DataQualityDetector(
    threshold_type='threshold', 
    upper_threshold=3000,
    lower_threshold=1000
)

# Fitting on some data
detector.fit(test)

# Detecting on other
detector.detect(test[:72]).plot()

Or within a statistical range

In [35]:
detector = common.DataQualityDetector(
    threshold_type='quantile', 
    upper_threshold=0.8
)

# Fitting on some data
detector.fit(test)

# Detecting on other
detector.detect(test[:72]).plot()

## Missing Data Detector

Creating data with NaNs

In [36]:
import numpy as np

def add_random_nans(series, n=1):
    """
    Randomly add NaN values to a pandas Series.
    
    Parameters:
    - series (pd.Series): The pandas Series to modify.
    - n (int): The number of NaN values to add. Default is 1.
    
    Returns:
    - pd.Series: The modified pandas Series with NaN values.
    """
    n = min(n, len(series))
    nan_indices = np.random.choice(series.index, size=n, replace=False)
    series[nan_indices] = np.nan
    return series

In [37]:
ts_w_nans = on.TimeSeries.from_series(add_random_nans(test.pd_series(), 300))

Detecting the NaNs

In [38]:
detector = common.MissingDataDetector()

detector.detect(ts_w_nans[:72]).plot()