In [2]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import plotly.express as px

from aku_utils.gen import panel_data

In [2]:
from aku_utils.common import to_list

In [3]:
df = panel_data()

In [4]:
dfv = df.pivot(
    index='dt',
    columns='obj_id',
    values='target'
).reset_index()

px.line(
    dfv,
    x='dt',
    y=[c for c in dfv.columns if c not in ['dt']]
)


In [5]:
df

Unnamed: 0,obj_id,obj,dt,target
0,0,YWZ,2024-11-27 00:00:00,39.244235
1,0,YWZ,2024-11-27 01:00:00,37.507573
2,0,YWZ,2024-11-27 02:00:00,37.231958
3,0,YWZ,2024-11-27 03:00:00,35.868645
4,0,YWZ,2024-11-27 04:00:00,32.539878
...,...,...,...,...
22555,9,MLH,2025-02-28 19:00:00,67.206455
22556,9,MLH,2025-02-28 20:00:00,64.349723
22557,9,MLH,2025-02-28 21:00:00,64.098528
22558,9,MLH,2025-02-28 22:00:00,63.059986


In [6]:
from sklearn.pipeline import Pipeline

In [4]:
from sklearn.base import BaseEstimator, TransformerMixin
from typing import Union, List, Dict, Any

In [None]:
class Lags(BaseEstimator, TransformerMixin):
    def __init__(
            self,
            prt : Union[str, List[str]],
            configs : List[Dict[str, Any]],
        ):
        self.prt = prt
        self.configs = configs

    def fit(self, *args):
        return self

    def transform(self, df):

        return df

In [None]:
from itertools import product
import pandas as pd
import warnings

In [None]:
KEYS_ORDER = ['name', 't', 'c', 'a', 'l']
TRANSFORMS_ORDER = ['lag', 'mean', 'expmean', 'delta']

keys_order_dict = {key : i for i, key in enumerate(KEYS_ORDER)}
transforms_order_dict = {key : i for i, key in enumerate(TRANSFORMS_ORDER)}


def add_name(cf : Dict):
    '''
    Adds a name keyword to the config if it wasnt provided by user,
    which is unrecommended
    '''
    cf_copy = cf.copy()

    def to_str(v):
        if hasattr(v, '__iter__') and not isinstance(v, str):
            return ' '.join([str(i) for i in v])
        return v

    if 'name' not in cf_copy.keys():
        main = [cf_copy.pop('t'), cf_copy.pop('c')]

        trues = [k for k, v in cf_copy.items() if v is True]
        for k in trues:
            del cf_copy[k]

        params = [f'{k} {to_str(v)}' for k, v in cf_copy.items()]

        cf['name'] = ','.join(
            main + trues + params
        )
    return cf


def flatten(cfs : Union[Dict, List[Dict]]):
    '''
    Flattens a (list of) config(s). Best explained by Usage

    Usage
    ---
    ```python
    flatten({'t' : 'lag', 'c' : 'target', 'l' : [0, 1, 2]})
    >>> [{'t': 'lag', 'c': 'target', 'l': 0},
    {'t': 'lag', 'c': 'target', 'l': 1},
    {'t': 'lag', 'c': 'target', 'l': 2}]
    ```
    '''
    if isinstance(cfs, dict):
        cfs = [cfs]

    flattened_cfs = []
    for cf in cfs:
        res = {k : to_list(v) for k, v in cf.items()}

        res = [
            # zips every values combination with original keys
            # and turns it into a dict
            dict(zip(res.keys(), values_set))
            for values_set in product(*res.values())  # produces every values combination
        ]
        flattened_cfs.extend(res)
    return flattened_cfs


def _get_key_index(item):
    '''
    item: tuple with key that needs to be assigned an index to be sorted,
    and its value, which we dont need

    returns a number used to sort config's keys
    '''
    return keys_order_dict.get(item[0], 999)


def _get_transform_index(t):
    '''
    t: transform string that needs to be matched to an index

    returns a number used to sort configs between themselves
    '''
    return transforms_order_dict.get(t, 999)


def pick_out_duplicates(cfs : List[Dict]):
    '''picks out duplicates, storing them in a separate list
    only works on dicts'''
    seen = set()
    duplicates = set()

    for d in cfs:
        # this step may scrumble key order
        # intentionally left out bc
        # we sort keys anyway later

        # fails if any part of tuple is a list
        # so we must enforce no lists as parameters
        dict_tuple = tuple(sorted(d.items()))
    
        if dict_tuple in seen:
            duplicates.add(dict_tuple)

        seen.add(dict_tuple)

    uniques = [dict(t) for t in seen]
    duplicates = [dict(t) for t in duplicates]
    return uniques, duplicates


def order_keys(cf):
    '''sorts config's keys'''
    res = dict(tuple(sorted(cf.items(), key=_get_key_index)))
    return res


def _get_cfs_sorting_tuple(cf):
    '''returns a tuple used to sort configs'''
    return _get_transform_index(cf['t']), cf['c'], cf.get('l', 999)


def order_cfs(cfs):
    '''sort configs between themselves'''
    res = sorted(cfs, key=_get_cfs_sorting_tuple)
    return res


def validate(cf):
    '''
    forces iterables into tuples

    TODO add field validation, eg lag mustnt have window, and mean must have alpha
    '''
    def to_tuple(v):
        if hasattr(v, '__iter__') and not isinstance(v, str):
            return tuple(v)

        return v

    # forcing lists into tuples
    cf = {k : to_tuple(v) for k, v in cf.items()}
    return cf


def finalize(cfs : List[Dict[str, Any]]):
    '''
    Finalize configs, which includes:
    * adding names to configs
    * removing duplicates
    * sorting keys in config and configs themselves
    '''
    cfs = [validate(cf) for cf in cfs]

    # remove duplicates, pass through unique configs
    cfs, duplicates = pick_out_duplicates(cfs)

    if duplicates:
        warnings.warn(f"duplicate configs found: {duplicates}")

    # add names keywords to configs
    cfs = [add_name(cf) for cf in cfs]

    # order keys in each configs in a specific order
    cfs = [order_keys(cf) for cf in cfs]

    # order configs in order according by the transform and other parameters
    cfs = order_cfs(cfs)
    return cfs

_ = [
    {'t': 'delta', 'c': 'target', 'l': 8, 'ap' : [0, 1]},
    {'t': 'lag', 'c': 'target', 'l': 1},
    {'t': 'lag', 'c': 'plan', 'l': 2},
    {'t': 'lag', 'c': 'plan', 'l': 1, 'ap' : True},
    {'t': 'expsm', 'c': 'target', 'l': 6},
    {'t': 'expsm', 'c': 'target', 'l': 0},
    {'t': 'mean', 'c': 'target', 'l': 7},
    {'t': 'expmean', 'c': 'target', 'l': 8},
    {'t': 'lag', 'c': 'target', 'l': 1},
]

finalize(_)



[{'name': 'lag,plan,ap,l 1', 't': 'lag', 'c': 'plan', 'l': 1, 'ap': True},
 {'name': 'lag,plan,l 2', 't': 'lag', 'c': 'plan', 'l': 2},
 {'name': 'lag,target,l 1', 't': 'lag', 'c': 'target', 'l': 1},
 {'name': 'mean,target,l 7', 't': 'mean', 'c': 'target', 'l': 7},
 {'name': 'expmean,target,l 8', 't': 'expmean', 'c': 'target', 'l': 8},
 {'name': 'delta,target,ap 0 1,l 8',
  't': 'delta',
  'c': 'target',
  'l': 8,
  'ap': (0, 1)},
 {'name': 'expsm,target,l 0', 't': 'expsm', 'c': 'target', 'l': 0},
 {'name': 'expsm,target,l 6', 't': 'expsm', 'c': 'target', 'l': 6}]