In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import plotly.express as px

In [2]:
from aku_utils.common import curr_date

In [3]:
from aku_utils.gen import panel_data

In [None]:
num_objs = 10

cap_letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

objs_ids = list(range(num_objs))
obj_names = [''.join(np.random.choice(list(cap_letters), size=3)) for i in range(num_objs)]

objs_base = {
    'obj_id' : objs_ids,
    'obj' : obj_names,
    'start' : np.random.uniform(low=-100, high=100, size=num_objs),
    'trend_constant' : np.random.normal(loc=0.0, scale=1.0, size=num_objs),
    'trend_global_scale' : np.random.gamma(shape=16, scale=0.5, size=num_objs) + 1,  # scale for 'base' col from trend_effects
    'trend_local_scale' : np.random.gamma(shape=8, scale=0.5, size=num_objs) + 1,
    'weekly_global_scale' : np.random.gamma(shape=8, scale=1, size=num_objs) + 0.5,
    'weekly_local_scale' : np.random.gamma(shape=4, scale=1, size=num_objs) + 0.5,
    'daily_global_scale' : np.random.gamma(shape=4, scale=2, size=num_objs) + 0.25,
    'daily_local_scale' : np.random.gamma(shape=2, scale=4, size=num_objs) + 0.25,
}
forbidden_cols = [c for c in objs_base.keys() if c not in ['obj_id', 'obj']]
objs = pd.DataFrame(objs_base)
objs

Unnamed: 0,obj_id,obj,start,trend_constant,trend_global_scale,trend_local_scale,weekly_global_scale,weekly_local_scale,daily_global_scale,daily_local_scale
0,0,FWZ,-39.578427,-0.33298,6.79911,4.301857,6.870476,1.928224,8.750613,13.650633
1,1,RLR,16.432573,-0.811366,8.500011,8.91538,10.084693,3.550635,8.444267,9.89964
2,2,NEL,-2.122625,-1.82945,6.893278,4.094312,7.218152,3.787823,22.058692,8.396652
3,3,HYC,-87.717795,-0.764788,7.958981,5.730809,11.912512,6.288986,9.338964,5.342839
4,4,PAD,-69.206107,1.078567,8.078045,4.601098,5.907457,3.988538,10.973002,2.481452
5,5,KXT,-13.585438,-1.051455,10.074259,3.675496,7.447423,3.357715,5.868627,10.896774
6,6,BDK,-95.166843,-0.769516,7.783792,3.510404,8.308159,4.347344,8.715406,7.581729
7,7,XGB,-10.073381,-0.736747,10.395727,4.253779,12.211898,3.103783,12.948584,3.084048
8,8,XBC,22.089197,0.516277,9.457174,5.041331,8.422201,3.793109,7.21157,2.445995
9,9,HIM,61.043911,-0.364022,6.806663,4.825851,6.702225,3.169602,7.199148,13.515151


In [128]:
def gen_effects(
    n_periods : int,
    num_objs : int,
    window : int = 5,
):
    '''
    Generate dataframe with smoothed random standard noise

    Smoothness increases with `window`. It must not be equal or higher than `n_periods`
    - it will produce flat effects.
    '''
    if window >= n_periods:
        raise ValueError(f'window ({window}) must be lower than n_periods ({n_periods})')

    effects = np.random.normal(loc=0, scale=1, size=(n_periods, num_objs + 1))
    effects = pd.DataFrame(effects, columns=['base'] + list(range(num_objs)))
    effects = pd.concat([effects, effects.iloc[:window]])
    effects = effects.rolling(window).mean()
    effects = effects.iloc[window:, :].reset_index(drop=True)
    return effects

In [129]:
daily_effects = gen_effects(n_periods=24, num_objs=num_objs, window=5)

px.line(daily_effects)

In [130]:
weekly_effects = gen_effects(n_periods=7, num_objs=num_objs, window=3)

px.line(weekly_effects)

In [136]:
date_range = pd.date_range(
    start = curr_date - timedelta(days=10),
    end = curr_date + timedelta(days=3, hours=23),
    freq = 'h'
)

trend_effects = gen_effects(n_periods=date_range.shape[0], num_objs=num_objs, window=64)

px.line(trend_effects)

In [145]:
trend_effects = trend_effects.assign(**{
    'dt' : date_range
})

trend_effects = trend_effects.melt(
    id_vars='dt',
    value_vars=[c for c in trend_effects if c not in ['dt']],
    var_name='obj_id',
    value_name='trend',
)

In [153]:
df = objs.merge(trend_effects, on='obj_id')

df = df.merge(
    trend_effects.loc[trend_effects['obj_id'] == 'base', ['dt', 'trend']],
    on='dt',
    suffixes=('', '_base')
)

df['trend_post'] = df['trend_local_scale'] * df['trend'] + df['trend_global_scale'] * df['trend_base']

df

Unnamed: 0,obj_id,obj,start,trend_constant,trend_global_scale,trend_local_scale,weekly_global_scale,weekly_local_scale,daily_global_scale,daily_local_scale,dt,trend,trend_base,trend_post
0,0,FWZ,-39.578427,-0.332980,6.799110,4.301857,6.870476,1.928224,8.750613,13.650633,2025-02-13 00:00:00,-0.116678,-0.169012,-1.651066
1,0,FWZ,-39.578427,-0.332980,6.799110,4.301857,6.870476,1.928224,8.750613,13.650633,2025-02-13 01:00:00,-0.140225,-0.217201,-2.080000
2,0,FWZ,-39.578427,-0.332980,6.799110,4.301857,6.870476,1.928224,8.750613,13.650633,2025-02-13 02:00:00,-0.131827,-0.191125,-1.866579
3,0,FWZ,-39.578427,-0.332980,6.799110,4.301857,6.870476,1.928224,8.750613,13.650633,2025-02-13 03:00:00,-0.155139,-0.194842,-1.992136
4,0,FWZ,-39.578427,-0.332980,6.799110,4.301857,6.870476,1.928224,8.750613,13.650633,2025-02-13 04:00:00,-0.117887,-0.232938,-2.090904
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3355,9,HIM,61.043911,-0.364022,6.806663,4.825851,6.702225,3.169602,7.199148,13.515151,2025-02-26 19:00:00,0.036752,-0.202898,-1.203702
3356,9,HIM,61.043911,-0.364022,6.806663,4.825851,6.702225,3.169602,7.199148,13.515151,2025-02-26 20:00:00,0.055776,-0.173916,-0.914623
3357,9,HIM,61.043911,-0.364022,6.806663,4.825851,6.702225,3.169602,7.199148,13.515151,2025-02-26 21:00:00,0.075941,-0.160912,-0.728792
3358,9,HIM,61.043911,-0.364022,6.806663,4.825851,6.702225,3.169602,7.199148,13.515151,2025-02-26 22:00:00,0.063639,-0.148244,-0.701934


In [154]:
px.line(
    df,
    animation_frame='obj_id',
    x='dt',
    y='trend	trend_base	trend_post'.split('\t')
)