In [9]:
from kats.utils.simulator import Simulator
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from statsmodels.tsa.arima_process import ArmaProcess
import plotly.graph_objects as go
from plotly_resampler import unregister_plotly_resampler
unregister_plotly_resampler()

## Exp 1: AR with different scales

How did I get to the baseline?
- AR(2) single TS (MAE less than 1%)
- AR(2) 10 TS (can see that performance of NP increases
- AR(4) single TS
- AR(4) 10 TS
- AR(4) with ar_magnitude 10 for single TS + offset=1000 (MAE round about 1%, got slightly worse compared to no offset)
- AR(4) with ar_magnitude 10 for 10 TS + offset=1000 = BASELINE

Set up unbalanced scale:
- 1 additional TS with ar_magnitude 1 + offset=100

Expected Outcome:
If we have have many TS with higher scale and larger ar_magnitude the single TS with smaller scale and smaller ar_magnitude should be predicted worse.

Comments:
- no seasonality, no trend, no noise
- try inbalanced scale in the other direction or with 50/50

Model config:
model_class_np = NeuralProphetModel
params_np =  {
    "n_lags":4,
    "n_forecasts": 1,
    "n_changepoints":0,
    "growth":"off",
    # Disable seasonality components, except yearly
    "yearly_seasonality":False,
    "weekly_seasonality":False,
    "daily_seasonality":False,
    "epochs": 20,
    "_data_params":{},
}

#### Baseline
To compare NPs baseline performance without different scales

In [2]:
N=26000
# Create a date range for the time series index
date_rng = pd.date_range(start=pd.to_datetime("2011-01-01 01:00:00"), periods=N, freq='H')
# Create a DataFrame with the simulated data and date range
ar_dfs=[]
 # Define AR coefficients (AR(2) model with coefficients 0.5 and 0.3)
ar_coeffs = np.array([1, 0.5, -0.3, 0.02, 0.01])
ma_coeffs = np.array([1])  # MA coefficients (no MA component)
# Create an ARMA process with the specified coefficients
ar_process = ArmaProcess(ar_coeffs, ma_coeffs, nobs=N)

# Simulate the time series data
np.random.seed(42)  # Set a random seed for reproducibility
n_samples = N  # Number of samples to generate
ar_data = [ar_process.generate_sample(n_samples, scale=10) for _ in range(10)]
ar_data = [(ar_data[i] - np.mean(ar_data[i])) for i in range(10)]
offset_list = [1000,1000,1000,1000,1000,1000,1000,1000,1000,1000]

for j in range(10):
    simulated_df = pd.DataFrame(date_rng, columns=['ds'])
    simulated_df['y'] = ar_data[j]+offset_list[j]
    simulated_df['ID'] = j
    ar_dfs.append(simulated_df)

concatenated_dfs= pd.DataFrame()
for i, df in enumerate(ar_dfs):
    concatenated_dfs = pd.concat([concatenated_dfs, df], axis=0)

#### Unbalanced scale

In [3]:
N=26000
# Create a date range for the time series index
date_rng = pd.date_range(start=pd.to_datetime("2011-01-01 01:00:00"), periods=N, freq='H')
# Create a DataFrame with the simulated data and date range
ar_dfs=[]
 # Define AR coefficients (AR(2) model with coefficients 0.5 and 0.3)
ar_coeffs = np.array([1, 0.5, -0.3, 0.02, 0.01])
ma_coeffs = np.array([1])  # MA coefficients (no MA component)
# Create an ARMA process with the specified coefficients
ar_process = ArmaProcess(ar_coeffs, ma_coeffs, nobs=N)

# Simulate the time series data
np.random.seed(42)  # Set a random seed for reproducibility
n_samples = N  # Number of samples to generate
ar_data = [ar_process.generate_sample(n_samples, scale=10) for _ in range(10)]
ar_data = [(ar_data[i] - np.mean(ar_data[i])) for i in range(10)]
offset_list = [1000,1000,1000,1000,1000,1000,1000,1000,1000,1000]

for j in range(10):
    simulated_df = pd.DataFrame(date_rng, columns=['ds'])
    simulated_df['y'] = ar_data[j]+offset_list[j]
    simulated_df['ID'] = j
    ar_dfs.append(simulated_df)

ar_single= ar_process.generate_sample(n_samples, scale=1)
ar_single = ar_single - np.mean(ar_single)
offset_single = 100
simulated_df = pd.DataFrame(date_rng, columns=['ds'])
simulated_df['y'] = ar_single+offset_single
simulated_df['ID'] = 11
ar_dfs.append(simulated_df)

concatenated_dfs= pd.DataFrame()
for i, df in enumerate(ar_dfs):
    concatenated_dfs = pd.concat([concatenated_dfs, df], axis=0)

In [4]:
# save the data to csv
concatenated_dfs.to_csv('syn_data_exp1_unbalanced_scale_ar_baseline.csv')

## Exp 2: Same seasonality with different scales

How did I get to the baseline?
- daily seasonality with season_magnitude 50 single TS (fits perfectly)
- daily seasonality with season_magnitude 50, 10 TS (fits perfectly)
- daily seasonality with season_magnitude 50, 10 TS, offset=1000 (fits perfectly) = BASELINE

Set up unbalanced scale:
- 1 additional TS with season_magnitude 5 + offset=100

Expected Outcome:
If we have have many TS with higher scale and larger season_magnitude the single TS with smaller scale and smaller season_magnitude should be predicted worse.

Comments:
- no AR, no trend, no noise
- try inbalanced scale in the other direction or with 50/50

m = NeuralProphet(
    n_forecasts=1,
    # Disable trend changepoints
    n_changepoints=0,
    growth="off",
    # Disable seasonality components, except yearly
    yearly_seasonality=False,
    weekly_seasonality=False,
    daily_seasonality=True,
    epochs=20,
)

#### Baseline

In [18]:
# Define the number of samples and the period
n_samples = 26000
period = 24
factor=50
# Generate an array of time steps
t = np.arange(n_samples)
# Define the angular frequency (omega) corresponding to the period
omega = 2 * np.pi / period
# Generate the seasonal time series using multiple sine and cosine terms
seasonal_data = [(np.sin(omega * t) + np.cos(omega * t) + np.sin(2 * omega * t) + np.cos(2 * omega * t))*factor for _ in range(10)]

offset = [1000,1000,1000,1000,1000,1000,1000,1000,1000,1000]

# Create a date range for the time series index
date_rng = pd.date_range(start=pd.to_datetime("2011-01-01 01:00:00"), periods=N, freq='H')

# Create a DataFrame with the simulated data and date range
df_seasons=[]
for i in range(0,10):
    df = pd.DataFrame(date_rng, columns=['ds'])
    df['y'] = seasonal_data[i] + offset[i]
    df['ID'] = i
    df_seasons.append(df)

concatenated_dfs = pd.DataFrame()
for i, df in enumerate(df_seasons):
    concatenated_dfs = pd.concat([concatenated_dfs, df], axis=0)


In [19]:
concatenated_dfs.to_csv('syn_data_exp1_unbalanced_scale_season_baseline.csv')

In [17]:
fig = go.Figure()
fig.add_trace(go.Scatter(y=seasonal_data[0][:100]+1000, mode='lines'))
fig.show()

#### Unbalanced scale

In [24]:
df = pd.DataFrame(date_rng, columns=['ds'])
factor_single = 5
seasonal_data_single= (np.sin(omega * t) + np.cos(omega * t) + np.sin(2 * omega * t) + np.cos(2 * omega * t))*factor_single
offset_single = 100
df['y'] =seasonal_data_single+ offset_single
df['ID'] = 10
df_seasons.append(df)

concatenated_dfs = pd.DataFrame()
for i, df in enumerate(df_seasons):
    concatenated_dfs = pd.concat([concatenated_dfs, df], axis=0)

In [23]:
concatenated_dfs.to_csv('syn_data_exp1_unbalanced_scale_season_unbalanced.csv')

## Exp 3: Seasonality with 2 different shapes and with different scales

## Exp 4: TS with strong trend