# 🔧 Patch: Robust Feature‑Engineering Cell

Copy–paste the **code cell below** into your notebook to replace the original Step 3 Feature‑Engineering block. It avoids the Pandas 3.0 chained‑assignment warning and fixes the `'window must be an integer'` error that occurs when using time‑offset rolling windows inside a `groupby`.


In [None]:

### ==== 3  Feature engineering (robust version) ====
import pandas as pd, numpy as np

df = df_raw.copy()

# 1️⃣ Temporal atoms
df['dow']  = df['open_datetime'].dt.dayofweek      # 0 = Monday
df['hour'] = df['open_datetime'].dt.hour

# 2️⃣ Days since previous ticket per user
df = df.sort_values(['requester_id', 'open_datetime'])
df['prev_open'] = df.groupby('requester_id')['open_datetime'].shift(1)
df['days_since_prev'] = (
    (df['open_datetime'] - df['prev_open']).dt.total_seconds() / 86_400
)
median_gap = df['days_since_prev'].median()
df.loc[df['days_since_prev'].isna(), 'days_since_prev'] = median_gap   # no chained‑assign

# 3️⃣ Rolling ticket counts (past 7 & 30 days) — compatible with any Pandas ≥ 1.4
def _rolling_count(group, window_days):
    # create a dummy series = 1 and set datetime index
    ts = group.assign(dummy=1).set_index('open_datetime')['dummy']
    return ts.rolling(f'{window_days}D').sum().values  # numpy array keeps row order

for win in [7, 30]:
    df[f'cnt_{win}d'] = (
        df.groupby('requester_id', group_keys=False)
          .apply(lambda g: _rolling_count(g, win))
          .astype(int)
    )

# 4️⃣ Final feature matrix
feature_cols = [
    'dow', 'hour', 'days_since_prev',
    'cnt_7d', 'cnt_30d',
    'errors_401_last24h', 'errors_403_last24h', 'user_active'
]

df[feature_cols].head()
