In [15]:
import kumoai.experimental.rfm as rfm, os
from pathlib import Path
import pandas as pd

In [2]:
home_api_key_file = Path.home() / "kumoai_key.txt"
with open(home_api_key_file, "r") as file:
    api_key = file.read().strip()
os.environ["KUMO_API_KEY"] = api_key

In [3]:
rfm.init()

[2025-08-08 16:54:04 - kumoai:196 - INFO] Successfully initialized the Kumo SDK against deployment https://kumorfm.ai/api, with log level INFO.


In [5]:
root = 's3://kumo-sdk-public/rfm-datasets/online-shopping'
users_df = pd.read_parquet(f'{root}/users.parquet')
items_df = pd.read_parquet(f'{root}/items.parquet')
orders_df = pd.read_parquet(f'{root}/orders.parquet')

In [7]:
users = rfm.LocalTable(users_df, name="users").infer_metadata()
items = rfm.LocalTable(items_df, name="items").infer_metadata()
orders = rfm.LocalTable(orders_df, name="orders").infer_metadata()

Detected primary key 'user_id' in table 'users'
Detected primary key 'item_id' in table 'items'
Detected primary key 'order_id' and time column 'date' in table 'orders'


In [8]:
users.print_metadata()
items.print_metadata()
orders.print_metadata()

### 🏷️ Metadata of Table `users` (1,000 rows)

name,dtype,stype,is_primary_key,is_time_column
user_id,int,ID,True,False
active,bool,categorical,False,False
age,int,numerical,False,False


### 🏷️ Metadata of Table `items` (1,000 rows)

name,dtype,stype,is_primary_key,is_time_column
item_id,int,ID,True,False
item_name,string,text,False,False
category,string,categorical,False,False
color,string,categorical,False,False
descriptions,string,text,False,False


### 🏷️ Metadata of Table `orders` (267,774 rows)

name,dtype,stype,is_primary_key,is_time_column
user_id,int,ID,False,False
item_id,int,ID,False,False
date,date,timestamp,False,True
sales_channel_id,int,ID,False,False
price,float,numerical,False,False
order_id,int,ID,True,False


In [10]:
graph = rfm.LocalGraph(tables=[users, items, orders])

In [None]:
graph.link(src_table="orders", fkey="user_id", dst_table="users")

LocalGraph(
  tables=[users, items, orders],
  edges=[Edge(src_table='orders', fkey='item_id', dst_table='users')],
)

In [12]:
graph.link(src_table="orders", fkey="item_id", dst_table="items")

LocalGraph(
  tables=[users, items, orders],
  edges=[Edge(src_table='orders', fkey='item_id', dst_table='users'), Edge(src_table='orders', fkey='item_id', dst_table='items')],
)

In [16]:
graph.print_metadata()
graph.print_links()

### 🗂️ Graph Metadata

name,primary_key,time_column
users,user_id,-
items,item_id,-
orders,order_id,date


### 🕸️ Graph Links (FK ↔️ PK)

- `orders.item_id` ↔️ `items.item_id`
- `orders.item_id` ↔️ `users.user_id`

In [18]:
model = rfm.KumoRFM(graph)

Output()

In [31]:
orders_df[['date']].describe()

Unnamed: 0,date
count,267774
mean,2023-09-15 02:06:26.385534
min,2022-09-20 00:00:00
25%,2023-03-31 00:00:00
50%,2023-08-24 00:00:00
75%,2024-03-27 00:00:00
max,2024-09-19 00:00:00


In [1]:
items_df.sample(2)

NameError: name 'items_df' is not defined

In [64]:
item_id = 42
days = 30
anchor_date = pd.Timestamp("2023-03-31") 

anchor_date_plus_30 = anchor_date + pd.Timedelta(days=days)

query = f"PREDICT SUM(orders.price, 0, {days}, days) FOR items.item_id={item_id}"

orders_for_item_in_next_x_days = orders_df[
    (orders_df['item_id'] == item_id) &
    (orders_df['date'] >= anchor_date) &
    (orders_df['date'] < anchor_date_plus_30)
]

y_true = orders_for_item_in_next_x_days.price.sum()
y_pred = model.predict(query, anchor_time=anchor_date)['TARGET_PRED'].values[0]
print(y_true, y_pred)

Output()

261.25 257.70556640625


### MY QUESTIONS
- How do I access to the actual value I am predicting, without having to calculate it manually.
- How to interpret the result of evaluate.