# ================ WINDOW FUNCTIONS ================

In [98]:
import pandas as pd
import numpy as np

In [99]:
df = pd.DataFrame({
    "store": ["A","A","A","A","B","B","B","B"],
    "date": pd.date_range("2024-01-01", periods=8, freq="D"),
    "sales": [10, 12, 9, 13, 20, 18, 25, 22]
})
df

Unnamed: 0,store,date,sales
0,A,2024-01-01,10
1,A,2024-01-02,12
2,A,2024-01-03,9
3,A,2024-01-04,13
4,B,2024-01-05,20
5,B,2024-01-06,18
6,B,2024-01-07,25
7,B,2024-01-08,22


In [100]:
df = pd.DataFrame({
    "store": ["A","A","A","A","B","B","B","B"],
    "date": pd.date_range("2024-01-01", periods=8, freq="D"),
    "sales": [10, 12, 9, 13, 20, 18, 25, 22]
}).sort_values(["store","date"]).reset_index()
df

Unnamed: 0,index,store,date,sales
0,0,A,2024-01-01,10
1,1,A,2024-01-02,12
2,2,A,2024-01-03,9
3,3,A,2024-01-04,13
4,4,B,2024-01-05,20
5,5,B,2024-01-06,18
6,6,B,2024-01-07,25
7,7,B,2024-01-08,22


In [101]:
df = pd.DataFrame({
    "store": ["A","A","A","A","B","B","B","B"],
    "date": pd.date_range("2024-01-01", periods=8, freq="D"),
    "sales": [10, 12, 9, 13, 20, 18, 25, 22]
}).sort_values(["store","date"]).reset_index(drop=True)
df

Unnamed: 0,store,date,sales
0,A,2024-01-01,10
1,A,2024-01-02,12
2,A,2024-01-03,9
3,A,2024-01-04,13
4,B,2024-01-05,20
5,B,2024-01-06,18
6,B,2024-01-07,25
7,B,2024-01-08,22


## ----------- ROLLING WINDOWS -----------
> Provide rolling window calculations.

> rolling = “look back N rows”

> Problem: Rolling drops old rows abruptly.

> DataFrame.rolling(window, min_periods=None, center=False, win_type=None, on=None, axis=<no_default>, closed=None, step=None, method='single')

### Fixed-size window

> Fixed-size moving window

In [102]:
# Rolling sum with a window length of 3 observations
df["rolling_sum_3"] = df["sales"].rolling(window=3).sum()
df

Unnamed: 0,store,date,sales,rolling_sum_3
0,A,2024-01-01,10,
1,A,2024-01-02,12,
2,A,2024-01-03,9,31.0
3,A,2024-01-04,13,34.0
4,B,2024-01-05,20,42.0
5,B,2024-01-06,18,51.0
6,B,2024-01-07,25,63.0
7,B,2024-01-08,22,65.0


In [103]:
# Rolling sum with a window length of 3 observations, but only needs a minimum of 1 observation to calculate a value.
df["rolling_sum_1"] = df["sales"].rolling(window=3, min_periods=1).sum()
df

Unnamed: 0,store,date,sales,rolling_sum_3,rolling_sum_1
0,A,2024-01-01,10,,10.0
1,A,2024-01-02,12,,22.0
2,A,2024-01-03,9,31.0,31.0
3,A,2024-01-04,13,34.0,34.0
4,B,2024-01-05,20,42.0,42.0
5,B,2024-01-06,18,51.0,51.0
6,B,2024-01-07,25,63.0,63.0
7,B,2024-01-08,22,65.0,65.0


##### closed parameter ❓❓❓❓

In [104]:
# Rolling sum with a window length of 3 observations, but only needs a minimum of 1 observation to calculate a value.
df["rolling_sum_1"] = df["sales"].rolling(window=3, min_periods=1, closed='left').sum()
df

Unnamed: 0,store,date,sales,rolling_sum_3,rolling_sum_1
0,A,2024-01-01,10,,
1,A,2024-01-02,12,,10.0
2,A,2024-01-03,9,31.0,22.0
3,A,2024-01-04,13,34.0,31.0
4,B,2024-01-05,20,42.0,34.0
5,B,2024-01-06,18,51.0,42.0
6,B,2024-01-07,25,63.0,51.0
7,B,2024-01-08,22,65.0,63.0


In [105]:
# Rolling sum with a window length of 3 observations, minimum of 1 observation to calculate a value, and a step of 2.
df["rolling_sum_1_2step"] = df["sales"].rolling(window=3, min_periods=1, step=2).sum()
df

Unnamed: 0,store,date,sales,rolling_sum_3,rolling_sum_1,rolling_sum_1_2step
0,A,2024-01-01,10,,,10.0
1,A,2024-01-02,12,,10.0,
2,A,2024-01-03,9,31.0,22.0,31.0
3,A,2024-01-04,13,34.0,31.0,
4,B,2024-01-05,20,42.0,34.0,42.0
5,B,2024-01-06,18,51.0,42.0,
6,B,2024-01-07,25,63.0,51.0,63.0
7,B,2024-01-08,22,65.0,63.0,


In [106]:
# # Per store (reset index trick to align results)
# df["rolling_sum_store"] = (
#     df.groupby("store")["sales"]
#       .rolling(window=3, min_periods=1)
#       .sum()
# )
# df  # ❌❌ TypeError: incompatible index of inserted column with frame index ❌❌

In [107]:
# Per store (reset index trick to align results)
df["rolling_sum_store"] = (
    df.groupby("store")["sales"]
      .rolling(window=3, min_periods=1)
      .sum()
      .reset_index(level=0, drop=True)
)
df

Unnamed: 0,store,date,sales,rolling_sum_3,rolling_sum_1,rolling_sum_1_2step,rolling_sum_store
0,A,2024-01-01,10,,,10.0,10.0
1,A,2024-01-02,12,,10.0,,22.0
2,A,2024-01-03,9,31.0,22.0,31.0,31.0
3,A,2024-01-04,13,34.0,31.0,,34.0
4,B,2024-01-05,20,42.0,34.0,42.0,20.0
5,B,2024-01-06,18,51.0,42.0,,38.0
6,B,2024-01-07,25,63.0,51.0,63.0,63.0
7,B,2024-01-08,22,65.0,63.0,,65.0


In [108]:
# OR Using transform. It applies your function per group and returns a result aligned to the original index
df["roll3_mean_store"] = (
    df.sort_values(["store", "date"])               
      .groupby("store")["sales"]
      .transform(lambda s: s.rolling(3, min_periods=1).mean())
)
df

Unnamed: 0,store,date,sales,rolling_sum_3,rolling_sum_1,rolling_sum_1_2step,rolling_sum_store,roll3_mean_store
0,A,2024-01-01,10,,,10.0,10.0,10.0
1,A,2024-01-02,12,,10.0,,22.0,11.0
2,A,2024-01-03,9,31.0,22.0,31.0,31.0,10.333333
3,A,2024-01-04,13,34.0,31.0,,34.0,11.333333
4,B,2024-01-05,20,42.0,34.0,42.0,20.0,20.0
5,B,2024-01-06,18,51.0,42.0,,38.0,19.0
6,B,2024-01-07,25,63.0,51.0,63.0,63.0,21.0
7,B,2024-01-08,22,65.0,63.0,,65.0,21.666667


#### Time-based rolling

In [109]:
# for last 2 days
df["time_based_sum_2D"] = (
    df.groupby("store")                               # DataFrameGroupBy
      .rolling("2D", on="date")["sales"]              # use 'on' to point to the time column
      .sum()
      .reset_index(level=0, drop=True)              
)
df

Unnamed: 0,store,date,sales,rolling_sum_3,rolling_sum_1,rolling_sum_1_2step,rolling_sum_store,roll3_mean_store,time_based_sum_2D
0,A,2024-01-01,10,,,10.0,10.0,10.0,
1,A,2024-01-02,12,,10.0,,22.0,11.0,
2,A,2024-01-03,9,31.0,22.0,31.0,31.0,10.333333,
3,A,2024-01-04,13,34.0,31.0,,34.0,11.333333,
4,B,2024-01-05,20,42.0,34.0,42.0,20.0,20.0,
5,B,2024-01-06,18,51.0,42.0,,38.0,19.0,
6,B,2024-01-07,25,63.0,51.0,63.0,63.0,21.0,
7,B,2024-01-08,22,65.0,63.0,,65.0,21.666667,


In [110]:
# OR The Transform Version -
df = df.sort_values(["store", "date"])
df["sum_2D"] = (
    df.groupby("store")
      .apply(lambda g: g.rolling("2D", on="date")["sales"].sum())
      .reset_index(level=0, drop=True)
)
df

  .apply(lambda g: g.rolling("2D", on="date")["sales"].sum())


Unnamed: 0,store,date,sales,rolling_sum_3,rolling_sum_1,rolling_sum_1_2step,rolling_sum_store,roll3_mean_store,time_based_sum_2D,sum_2D
0,A,2024-01-01,10,,,10.0,10.0,10.0,,10.0
1,A,2024-01-02,12,,10.0,,22.0,11.0,,22.0
2,A,2024-01-03,9,31.0,22.0,31.0,31.0,10.333333,,21.0
3,A,2024-01-04,13,34.0,31.0,,34.0,11.333333,,22.0
4,B,2024-01-05,20,42.0,34.0,42.0,20.0,20.0,,20.0
5,B,2024-01-06,18,51.0,42.0,,38.0,19.0,,38.0
6,B,2024-01-07,25,63.0,51.0,63.0,63.0,21.0,,43.0
7,B,2024-01-08,22,65.0,63.0,,65.0,21.666667,,47.0


## ----------- EXPANDING -----------
> Running / Cumulative:- expanding = “look back ALL rows so far”

> Expands from the start of data until the current row

> Provide expanding window calculations.

> Problem:- Expanding gives equal weight to all past rows.

> DataFrame.expanding(min_periods=1, axis=<no_default>, method='single')

In [111]:
dff = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]})
dff

Unnamed: 0,B
0,0.0
1,1.0
2,2.0
3,
4,4.0


In [112]:
dff.expanding(1).sum()

Unnamed: 0,B
0,0.0
1,1.0
2,3.0
3,3.0
4,7.0


In [113]:
dff.expanding(min_periods=2).sum()

Unnamed: 0,B
0,
1,1.0
2,3.0
3,3.0
4,7.0


In [114]:
df = pd.DataFrame({
    "store": ["A","A","A","A","B","B","B","B"],
    "date": pd.date_range("2024-01-01", periods=8, freq="D"),
    "sales": [10, 12, 9, 13, 20, 18, 25, 22]
}).sort_values(["store","date"]).reset_index(drop=True)
df

Unnamed: 0,store,date,sales
0,A,2024-01-01,10
1,A,2024-01-02,12
2,A,2024-01-03,9
3,A,2024-01-04,13
4,B,2024-01-05,20
5,B,2024-01-06,18
6,B,2024-01-07,25
7,B,2024-01-08,22


In [115]:
# Cumulative sum
df["expanding_sum"] = df["sales"].expanding().sum()
df

Unnamed: 0,store,date,sales,expanding_sum
0,A,2024-01-01,10,10.0
1,A,2024-01-02,12,22.0
2,A,2024-01-03,9,31.0
3,A,2024-01-04,13,44.0
4,B,2024-01-05,20,64.0
5,B,2024-01-06,18,82.0
6,B,2024-01-07,25,107.0
7,B,2024-01-08,22,129.0


In [116]:
# OR (but without decimal):
df["cum_sum"] = df["sales"].cumsum()
df

Unnamed: 0,store,date,sales,expanding_sum,cum_sum
0,A,2024-01-01,10,10.0,10
1,A,2024-01-02,12,22.0,22
2,A,2024-01-03,9,31.0,31
3,A,2024-01-04,13,44.0,44
4,B,2024-01-05,20,64.0,64
5,B,2024-01-06,18,82.0,82
6,B,2024-01-07,25,107.0,107
7,B,2024-01-08,22,129.0,129


In [117]:
df["exp_mean_min2"] = df["sales"].expanding(min_periods=2).mean()
df

Unnamed: 0,store,date,sales,expanding_sum,cum_sum,exp_mean_min2
0,A,2024-01-01,10,10.0,10,
1,A,2024-01-02,12,22.0,22,11.0
2,A,2024-01-03,9,31.0,31,10.333333
3,A,2024-01-04,13,44.0,44,11.0
4,B,2024-01-05,20,64.0,64,12.8
5,B,2024-01-06,18,82.0,82,13.666667
6,B,2024-01-07,25,107.0,107,15.285714
7,B,2024-01-08,22,129.0,129,16.125


In [118]:
df["exp_avg_store"] = (
    df.groupby("store")["sales"]
      .expanding(min_periods=1).sum()
      .reset_index(level=0, drop=True)
)
df

Unnamed: 0,store,date,sales,expanding_sum,cum_sum,exp_mean_min2,exp_avg_store
0,A,2024-01-01,10,10.0,10,,10.0
1,A,2024-01-02,12,22.0,22,11.0,22.0
2,A,2024-01-03,9,31.0,31,10.333333,31.0
3,A,2024-01-04,13,44.0,44,11.0,44.0
4,B,2024-01-05,20,64.0,64,12.8,20.0
5,B,2024-01-06,18,82.0,82,13.666667,38.0
6,B,2024-01-07,25,107.0,107,15.285714,63.0
7,B,2024-01-08,22,129.0,129,16.125,85.0


## ----------- EWM -----------
> recent rows get more weight

> Provide exponentially weighted (EW) calculations.

> Solution:- Sometimes we want recent data to matter more, older data less → that’s EWM.

> DataFrame.ewm(com=None, span=None, halflife=None, alpha=None, min_periods=0, adjust=True, ignore_na=False, axis=<no_default>, times=None, method='single')

##### Formula:- new_avg = alpha * current_value + (1 - alpha) * previous_avg
where alpha depends on span/halflife.

- Eg: df["ewm_mean"] = df["sales"].ewm(span=3).mean()
span=3 → controls the “decay rate” (larger span = smoother, slower to react).

In [145]:
df = pd.DataFrame({
    "store": ["A","A","A","A","B","B","B","B"],
    "date": pd.date_range("2024-01-01", periods=8, freq="D"),
    "sales": [10, 12, 9, 13, 20, 18, 25, 22]
}).sort_values(["store","date"]).reset_index(drop=True)
df

Unnamed: 0,store,date,sales
0,A,2024-01-01,10
1,A,2024-01-02,12
2,A,2024-01-03,9
3,A,2024-01-04,13
4,B,2024-01-05,20
5,B,2024-01-06,18
6,B,2024-01-07,25
7,B,2024-01-08,22


In [146]:
df["ewm_sum"] = df["sales"].ewm(span=3).sum()
df

Unnamed: 0,store,date,sales,ewm_sum
0,A,2024-01-01,10,10.0
1,A,2024-01-02,12,17.0
2,A,2024-01-03,9,17.5
3,A,2024-01-04,13,21.75
4,B,2024-01-05,20,30.875
5,B,2024-01-06,18,33.4375
6,B,2024-01-07,25,41.71875
7,B,2024-01-08,22,42.859375


In [148]:
df["ewm_sum_2"] = df["sales"].ewm(alpha=0.5).sum()
df

Unnamed: 0,store,date,sales,ewm_sum,ewm_sum_2
0,A,2024-01-01,10,10.0,10.0
1,A,2024-01-02,12,17.0,17.0
2,A,2024-01-03,9,17.5,17.5
3,A,2024-01-04,13,21.75,21.75
4,B,2024-01-05,20,30.875,30.875
5,B,2024-01-06,18,33.4375,33.4375
6,B,2024-01-07,25,41.71875,41.71875
7,B,2024-01-08,22,42.859375,42.859375


##### halflife=2 → value from 2 steps ago has half the weight of the current value.

In [149]:
df["ewm_sum_half2"] = df["sales"].ewm(halflife=2).sum()
df

Unnamed: 0,store,date,sales,ewm_sum,ewm_sum_2,ewm_sum_half2
0,A,2024-01-01,10,10.0,10.0,10.0
1,A,2024-01-02,12,17.0,17.0,19.071068
2,A,2024-01-03,9,17.5,17.5,22.485281
3,A,2024-01-04,13,21.75,21.75,28.899495
4,B,2024-01-05,20,30.875,30.875,40.435029
5,B,2024-01-06,18,33.4375,33.4375,46.591883
6,B,2024-01-07,25,41.71875,41.71875,57.945436
7,B,2024-01-08,22,42.859375,42.859375,62.973611


##### adjust=True (default) → normalizes weights so they add up to 1.

In [150]:
df["ewm_adj"] = df["sales"].ewm(span=3, adjust=True).mean()
df

Unnamed: 0,store,date,sales,ewm_sum,ewm_sum_2,ewm_sum_half2,ewm_adj
0,A,2024-01-01,10,10.0,10.0,10.0,10.0
1,A,2024-01-02,12,17.0,17.0,19.071068,11.333333
2,A,2024-01-03,9,17.5,17.5,22.485281,10.0
3,A,2024-01-04,13,21.75,21.75,28.899495,11.6
4,B,2024-01-05,20,30.875,30.875,40.435029,15.935484
5,B,2024-01-06,18,33.4375,33.4375,46.591883,16.984127
6,B,2024-01-07,25,41.71875,41.71875,57.945436,21.023622
7,B,2024-01-08,22,42.859375,42.859375,62.973611,21.513725


In [151]:
df["ewm_adj_sum"] = df["sales"].ewm(span=3, adjust=True).sum()
df

Unnamed: 0,store,date,sales,ewm_sum,ewm_sum_2,ewm_sum_half2,ewm_adj,ewm_adj_sum
0,A,2024-01-01,10,10.0,10.0,10.0,10.0,10.0
1,A,2024-01-02,12,17.0,17.0,19.071068,11.333333,17.0
2,A,2024-01-03,9,17.5,17.5,22.485281,10.0,17.5
3,A,2024-01-04,13,21.75,21.75,28.899495,11.6,21.75
4,B,2024-01-05,20,30.875,30.875,40.435029,15.935484,30.875
5,B,2024-01-06,18,33.4375,33.4375,46.591883,16.984127,33.4375
6,B,2024-01-07,25,41.71875,41.71875,57.945436,21.023622,41.71875
7,B,2024-01-08,22,42.859375,42.859375,62.973611,21.513725,42.859375


##### adjust=False → recursive formula (more common in streaming).

In [152]:
df["ewm_noadj"] = df["sales"].ewm(span=3, adjust=False).mean()
df

Unnamed: 0,store,date,sales,ewm_sum,ewm_sum_2,ewm_sum_half2,ewm_adj,ewm_adj_sum,ewm_noadj
0,A,2024-01-01,10,10.0,10.0,10.0,10.0,10.0,10.0
1,A,2024-01-02,12,17.0,17.0,19.071068,11.333333,17.0,11.0
2,A,2024-01-03,9,17.5,17.5,22.485281,10.0,17.5,10.0
3,A,2024-01-04,13,21.75,21.75,28.899495,11.6,21.75,11.5
4,B,2024-01-05,20,30.875,30.875,40.435029,15.935484,30.875,15.75
5,B,2024-01-06,18,33.4375,33.4375,46.591883,16.984127,33.4375,16.875
6,B,2024-01-07,25,41.71875,41.71875,57.945436,21.023622,41.71875,20.9375
7,B,2024-01-08,22,42.859375,42.859375,62.973611,21.513725,42.859375,21.46875


##### ❌❌ NotImplementedError: sum is not implemented with adjust=False ❌❌

In [153]:
# df["ewm_noadj_sum"] = df["sales"].ewm(span=3, adjust=False).sum()
# df

##### with groupby

In [154]:
df["ewm_store"] = (
    df.groupby("store")["sales"]
      .ewm(span=3, adjust=False).mean()
      .reset_index(level=0, drop=True)
)
df

Unnamed: 0,store,date,sales,ewm_sum,ewm_sum_2,ewm_sum_half2,ewm_adj,ewm_adj_sum,ewm_noadj,ewm_store
0,A,2024-01-01,10,10.0,10.0,10.0,10.0,10.0,10.0,10.0
1,A,2024-01-02,12,17.0,17.0,19.071068,11.333333,17.0,11.0,11.0
2,A,2024-01-03,9,17.5,17.5,22.485281,10.0,17.5,10.0,10.0
3,A,2024-01-04,13,21.75,21.75,28.899495,11.6,21.75,11.5,11.5
4,B,2024-01-05,20,30.875,30.875,40.435029,15.935484,30.875,15.75,20.0
5,B,2024-01-06,18,33.4375,33.4375,46.591883,16.984127,33.4375,16.875,19.0
6,B,2024-01-07,25,41.71875,41.71875,57.945436,21.023622,41.71875,20.9375,22.0
7,B,2024-01-08,22,42.859375,42.859375,62.973611,21.513725,42.859375,21.46875,22.0
