# 02 — Метрики: DAU/MAU, Retention, Воронка

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

df = pd.read_csv('../data/events.csv', parse_dates=['timestamp'])
df['date'] = df['timestamp'].dt.date


## DAU/MAU

In [None]:

dau = df.groupby('date')['user_id'].nunique()
mau = df.groupby(df['timestamp'].dt.to_period('M'))['user_id'].nunique()
display(dau.head())
display(mau)
plt.figure()
dau.plot(title='DAU'); plt.show()


## Retention: D1/D7 по install date (первый день появления пользователя)

In [None]:

first_seen = df.groupby('user_id')['date'].min().rename('install_dt')
activity = df.groupby(['user_id','date']).size().reset_index(name='events')
merged = activity.merge(first_seen, on='user_id')
activity['install_dt'] = merged['install_dt']

def retention_on(day):
    return (merged['date'] == merged['install_dt'] + pd.to_timedelta(day, unit='D')).groupby(merged['install_dt']).sum()

d0 = (merged['date'] == merged['install_dt']).groupby(merged['install_dt']).sum()
d1 = retention_on(1)
d7 = retention_on(7)
retention = pd.DataFrame({'day0': d0, 'day1': d1, 'day7': d7}).fillna(0)
retention['d1_rate'] = retention['day1']/retention['day0']
retention['d7_rate'] = retention['day7']/retention['day0']
retention.head()


## Воронка чтения (reader): book_open → page_scroll → book_finished

In [None]:

reader = df[df['source'] == 'reader']
f_open = reader[reader['event'] == 'book_open'].groupby('date')['user_id'].nunique()
f_scroll = reader[reader['event'] == 'page_scroll'].groupby('date')['user_id'].nunique()
f_finish = reader[reader['event'] == 'book_finished'].groupby('date')['user_id'].nunique()
funnel = pd.concat([f_open.rename('openers'), f_scroll.rename('scrollers'), f_finish.rename('finishers')], axis=1).fillna(0)
funnel['finish_rate'] = funnel['finishers']/funnel['openers'].replace({0:np.nan})
funnel.head()
plt.figure()
funnel[['openers','scrollers','finishers']].plot(kind='line', title='Reading funnel over time'); plt.show()
