In [None]:
# I4
import pandas as pd
import numpy as np

df = pd.DataFrame({
    "user":["A","A","A","B","B","B"],
    "date": pd.date_range("2025-01-01", periods=6),
    "spend":[10,20,15,5,8,12]
})

In [2]:
df.head()

Unnamed: 0,user,date,spend
0,A,2025-01-01,10
1,A,2025-01-02,20
2,A,2025-01-03,15
3,B,2025-01-04,5
4,B,2025-01-05,8


In [10]:
df['lag1'] = df['spend'].shift(1) 
df['lag2'] = df['spend'].shift(2)
df['rolling_3'] = (df.sort_values('date').groupby('user').rolling(3, min_periods = 3)['spend'].mean().reset_index(0, drop=True))

df.head()

Unnamed: 0,user,date,spend,lag1,lag2,rolling_3
0,A,2025-01-01,10,,,
1,A,2025-01-02,20,10.0,,
2,A,2025-01-03,15,20.0,10.0,15.0
3,B,2025-01-04,5,15.0,20.0,
4,B,2025-01-05,8,5.0,15.0,


In [13]:
# I5

import pandas as pd
import numpy as np

rng = pd.date_range("2025-01-01", periods=90, freq="D")

s = pd.Series(
    np.random.rand(90),
    index=rng
)

In [None]:
frame = s.to_frame(name = 'value') 

frame['month_mean_1'] = frame.groupby(s.index.to_period('W')).transform('mean') 
frame['month_mean_2'] = frame.resample('W')['value'].transform('mean') 
# I'm pretty sure this is R syntax
assert(all(frame['month_mean_1'] == frame['month_mean_2']))
print('Both methods give equal results!')
print(frame)

Both methods give equal results!
               value  month_mean_1  month_mean_2
2025-01-01  0.779602      0.582488      0.582488
2025-01-02  0.672340      0.582488      0.582488
2025-01-03  0.709830      0.582488      0.582488
2025-01-04  0.251569      0.582488      0.582488
2025-01-05  0.499096      0.582488      0.582488
...              ...           ...           ...
2025-03-27  0.933448      0.600434      0.600434
2025-03-28  0.130599      0.600434      0.600434
2025-03-29  0.579462      0.600434      0.600434
2025-03-30  0.668317      0.600434      0.600434
2025-03-31  0.405914      0.405914      0.405914

[90 rows x 3 columns]


In [21]:
# Hard - Funnel Analysis

df = pd.DataFrame({
    "user":["U1","U1","U1","U2","U2","U3","U3","U3"],
    "step":["visit","signup","pay","visit","signup","visit","signup","pay"],
    "date": pd.to_datetime([
        "2025-01-01","2025-01-02","2025-01-05",
        "2025-01-01","2025-01-04",
        "2025-01-03","2025-01-06","2025-01-10"
    ])
})

In [23]:
df.sort_values(['user', 'date'], inplace=True)
print(df)

  user    step       date
0   U1   visit 2025-01-01
1   U1  signup 2025-01-02
2   U1     pay 2025-01-05
3   U2   visit 2025-01-01
4   U2  signup 2025-01-04
5   U3   visit 2025-01-03
6   U3  signup 2025-01-06
7   U3     pay 2025-01-10


In [26]:
df.groupby('step')['user'].nunique().to_frame('counts').reset_index()

Unnamed: 0,step,counts
0,pay,2
1,signup,3
2,visit,3


In [35]:
# set column order to 'visit', 'signup', 'pay' 
df_wide = df.pivot(index='user', columns='step', values='date')[['visit', 'signup', 'pay']]

df_wide = df_wide.notna().cumsum(axis = 1)

visit_count = (df_wide['visit'] == 1).sum()
signup_count = (df_wide['signup'] == 2).sum()
pay_count = (df_wide['pay'] == 3).sum()

conversion_rate_visit_to_signup = signup_count / visit_count
conversion_rate_signup_to_pay = pay_count / signup_count

pd.DataFrame({
    'step': ['visit', 'signup', 'pay'],
    'count': [visit_count, signup_count, pay_count],
    'conversion_rate': [np.nan, conversion_rate_visit_to_signup, conversion_rate_signup_to_pay]
})

Unnamed: 0,step,count,conversion_rate
0,visit,3,
1,signup,3,1.0
2,pay,2,0.666667
