In [1]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
path = str(Path.cwd().parent)
print(path)
sys.path.insert(1, path)

import numpy as np
import pandas as pd
import skforecast

print(skforecast.__version__)

/home/joaquin/Documents/GitHub/skforecast
0.19.0


In [6]:
# Parameters
n_days = 365 * 2  # 2 years of daily data
seed = 42
rng = np.random.default_rng(seed)

# Define states
states = ["low", "medium", "high"]

# Latent AR process with memory at lags 1, 7, 14
phi1 = 0.6   # yesterday
phi7 = 0.25  # one week ago
phi14 = 0.1  # two weeks ago
noise_sd = 0.5

# Simulate latent continuous process
latent = np.zeros(n_days)
latent[:14] = rng.normal(0, 1, size=14)

for t in range(14, n_days):
    latent[t] = (
        phi1 * latent[t - 1]
        + phi7 * latent[t - 7]
        + phi14 * latent[t - 14]
        + rng.normal(0, noise_sd)
    )

# Add mild weekly seasonality (e.g., weekends slightly higher)
day_of_week = np.arange(n_days) % 7
latent += 0.5 * np.sin(2 * np.pi * day_of_week / 7)

# Discretize into 3 states using quantiles
q1, q2 = np.quantile(latent, [1/3, 2/3])
def to_state(x):
    if x < q1:
        return "low"
    elif x < q2:
        return "medium"
    else:
        return "high"

states_series = np.vectorize(to_state)(latent)

# Create DataFrame with daily frequency
dates = pd.date_range("2023-01-01", periods=n_days, freq="D")
df = pd.DataFrame({
    "date": dates,
    "state": states_series,
    "latent_value": latent
})

# Encode states numerically for analysis
state_map = {s: i for i, s in enumerate(states)}
df["state_code"] = df["state"].map(state_map)

data = df[['date', 'state']].copy()
data.columns = ['datetime', 'y']
data = data.set_index('datetime')
data = data.asfreq('D')
data.to_csv('time_series_categorical.csv', index=True)
data

Unnamed: 0_level_0,y
datetime,Unnamed: 1_level_1
2023-01-01,high
2023-01-02,low
2023-01-03,high
2023-01-04,high
2023-01-05,low
...,...
2024-12-26,low
2024-12-27,low
2024-12-28,low
2024-12-29,low
