In [23]:
import warnings
from kurveclient.interaction import autofe_local_source
import pathlib
import datetime
import pandas as pd
pd.options.mode.chained_assignment = None  # None, 'warn' (default), or 'raise'

In [24]:
help(autofe_local_source)

Help on function autofe_local_source in module kurveclient.interaction:

autofe_local_source(path: Union[str, pathlib.Path], storage_format: Optional[str] = None, parent_node: str = '', label_node: str = '', label_field: str = 'id', label_operation: str = 'count', compute_period: int = 365, label_period: int = 30, cut_date: datetime.datetime = datetime.datetime(2024, 10, 30, 8, 40, 15, 297991), hops_front: int = 1, hops_back: int = 2) -> pandas.core.frame.DataFrame
    Perform automatic feature engineering on a local
    data source and return the results.



In [25]:
!ls cust_data

cust.csv                           notifications.csv
notification_interaction_types.csv order_products.csv
notification_interactions.csv      orders.csv


In [26]:
# perform automated feature engineering from raw data

In [27]:
p = pathlib.Path('./cust_data')

In [32]:
vect1 = autofe_local_source(
    path=p.absolute(),
    storage_format='csv',
    parent_node=f'{p.absolute()}/cust.csv',
    label_node=f'{p.absolute()}/orders.csv',
    label_field='id',
    label_operation='count',
    cut_date=datetime.datetime(2023,9,1)
)

[2m2024-10-30 08:45:50[0m [[32m[1minfo     [0m] [1mhydrating graph attributes    [0m
[2m2024-10-30 08:45:50[0m [[32m[1minfo     [0m] [1mhydrating attributes for DynamicNode[0m
[2m2024-10-30 08:45:50[0m [[32m[1minfo     [0m] [1mhydrating attributes for DynamicNode[0m
[2m2024-10-30 08:45:50[0m [[32m[1minfo     [0m] [1mhydrating attributes for DynamicNode[0m
[2m2024-10-30 08:45:50[0m [[32m[1minfo     [0m] [1mhydrating attributes for DynamicNode[0m
[2m2024-10-30 08:45:50[0m [[32m[1minfo     [0m] [1mhydrating attributes for DynamicNode[0m
[2m2024-10-30 08:45:50[0m [[32m[1minfo     [0m] [1mhydrating attributes for DynamicNode[0m
[2m2024-10-30 08:45:50[0m [[32m[1minfo     [0m] [1mhydrating graph data          [0m
[2m2024-10-30 08:45:50[0m [[32m[1minfo     [0m] [1mchecking for prefix uniqueness[0m
[2m2024-10-30 08:45:50[0m [[32m[1minfo     [0m] [1mrunning filters, normalize, and annotations for <GraphReduceNode: fpath=/Users/

In [29]:
# perform automated feature engineering at the notification-level

In [33]:
vect2 = autofe_local_source(
    path=p.absolute(),
    storage_format='csv',
    parent_node=f'{p.absolute()}/notifications.csv',
    label_node=f'{p.absolute()}/notification_interactions.csv',
    label_field='id',
    label_operation='count',
    cut_date=datetime.datetime(2023,9,1)
)

[2m2024-10-30 08:45:54[0m [[32m[1minfo     [0m] [1mhydrating graph attributes    [0m
[2m2024-10-30 08:45:54[0m [[32m[1minfo     [0m] [1mhydrating attributes for DynamicNode[0m
[2m2024-10-30 08:45:54[0m [[32m[1minfo     [0m] [1mhydrating attributes for DynamicNode[0m
[2m2024-10-30 08:45:54[0m [[32m[1minfo     [0m] [1mhydrating attributes for DynamicNode[0m
[2m2024-10-30 08:45:54[0m [[32m[1minfo     [0m] [1mhydrating attributes for DynamicNode[0m
[2m2024-10-30 08:45:54[0m [[32m[1minfo     [0m] [1mhydrating attributes for DynamicNode[0m
[2m2024-10-30 08:45:54[0m [[32m[1minfo     [0m] [1mhydrating attributes for DynamicNode[0m
[2m2024-10-30 08:45:54[0m [[32m[1minfo     [0m] [1mhydrating graph data          [0m
[2m2024-10-30 08:45:54[0m [[32m[1minfo     [0m] [1mchecking for prefix uniqueness[0m
[2m2024-10-30 08:45:54[0m [[32m[1minfo     [0m] [1mrunning filters, normalize, and annotations for <GraphReduceNode: fpath=/Users/

In [34]:
print(vect1.shape)
print(vect2.shape)

(4, 24)
(17, 18)


In [35]:
vect1.head()

Unnamed: 0,cust_id,cust_name,orde_customer_id,orde_id_count,orde_customer_id_count,orde_ts_min,orde_ts_max,orde_time_since_last_event,orde_30d_num_events,orde_60d_num_events,...,noti_customer_id,noti_id_count,noti_customer_id_count,noti_ts_min,noti_ts_max,noti_time_since_last_event,noti_30d_num_events,noti_60d_num_events,noti_90d_num_events,noti_365d_num_events
0,1,wes,1.0,2.0,2.0,2023-05-12 00:00:00+00:00,2023-06-01,7948800.0,,,...,1.0,5.0,5.0,2023-01-01 00:00:00+00:00,2023-06-23,6048000.0,,,1.0,5.0
1,2,john,2.0,1.0,1.0,2023-01-01 00:00:00+00:00,2023-01-01,20995200.0,,,...,2.0,7.0,7.0,2022-09-05 00:00:00+00:00,2023-05-22,8812800.0,,,,7.0
2,3,ryan,3.0,1.0,1.0,2023-06-01 00:00:00+00:00,2023-06-01,7948800.0,,,...,3.0,1.0,1.0,2023-06-12 00:00:00+00:00,2023-06-12,6998400.0,,,1.0,1.0
3,4,tianji,,,,NaT,NaT,,,,...,,,,NaT,NaT,,,,,


In [36]:
vect2.head()

Unnamed: 0,noti_id,noti_customer_id,noti_ts,cust_id,cust_name,noin_notification_id,noin_id_count,noin_notification_id_count,noin_interaction_type_id_count,noin_ts_min,noin_ts_max,noin_time_since_last_event,noin_30d_num_events,noin_60d_num_events,noin_90d_num_events,noin_365d_num_events,noin_notification_id_dupe,noin_id_label
0,101,1,2022-08-05,1,wes,,,,,NaT,NaT,,,,,,,
1,102,1,2023-01-01,1,wes,102.0,3.0,3.0,3.0,2023-01-01 00:00:00+00:00,2023-01-03,20822400.0,,,,3.0,,
2,103,1,2023-05-05,1,wes,103.0,3.0,3.0,3.0,2023-05-05 00:00:00+00:00,2023-05-07,10108800.0,,,,3.0,,
3,104,1,2023-06-01,1,wes,104.0,2.0,2.0,2.0,2023-05-10 00:00:00+00:00,2023-05-11,9763200.0,,,,2.0,,
4,105,1,2023-06-02,1,wes,105.0,2.0,2.0,2.0,2023-05-11 00:00:00+00:00,2023-05-11,9763200.0,,,,2.0,,
