In [316]:
import pandas as pd
import datetime as dt

In [317]:
df = pd.read_csv('../data/Customer_Orders_Dataset.csv')

In [318]:
df.head()

Unnamed: 0,customer_id,order_date
0,C0001,2024-01-22
1,C0001,2024-03-29
2,C0001,2024-03-30
3,C0001,2024-04-18
4,C0001,2024-05-10


In [None]:
def sales_lag_tracker_basic(df: pd.DataFrame, customer_col: str, date_col: str) -> pd.DataFrame:
    """
    Compute per-customer purchase gaps:
      - days_since_last_order: current_date - previous_date
      - days_until_next_order: next_date - current_date

    Returns the original rows with both lag columns
    """
    
    if not isinstance(df, pd.DataFrame):
        raise TypeError('df must be a pandas DataFrame')
    if not isinstance(customer_col, str) or not isinstance(date_col, str):
        raise TypeError('customer_col or date_col must be strings')
    if customer_col not in df.columns or date_col not in df.columns:
        raise ValueError('customer_col or date_col not found in DataFrame')
    
    df_copy = df.copy()
    df_copy[date_col] = pd.to_datetime(df_copy[date_col], errors='coerce')
    
    df_copy = df_copy.dropna(subset=[customer_col, date_col])
    df_copy[customer_col] = df_copy[customer_col].astype('string')

    df_copy = df_copy.drop_duplicates(subset=[customer_col, date_col], keep='first')

    df_sorted = df_copy.sort_values([customer_col, date_col]).reset_index(drop=True)

    g = df_sorted.groupby(customer_col)[date_col]  # reuse same groupby
    prev_date = g.shift(1)
    next_date = g.shift(-1)

    df_sorted['days_since_last_order'] = (df_sorted[date_col] - prev_date).dt.days
    df_sorted['days_until_next_order'] = (next_date - df_sorted[date_col]).dt.days

    return df_sorted

In [320]:
sales_lag_tracker_basic(df, 'customer_id', 'order_date')

Unnamed: 0,customer_id,order_date,days_since_last_order,days_until_next_order
0,C0001,2024-01-22,,67.0
1,C0001,2024-03-29,67.0,1.0
2,C0001,2024-03-30,1.0,19.0
3,C0001,2024-04-18,19.0,22.0
4,C0001,2024-05-10,22.0,37.0
...,...,...,...,...
5474,C0500,2024-07-13,42.0,50.0
5475,C0500,2024-09-01,50.0,130.0
5476,C0500,2025-01-09,130.0,22.0
5477,C0500,2025-01-31,22.0,4.0
