# Telemetry → KPI & Cohort Dashboard

This notebook loads `data/synthetic_telemetry.csv` and computes basic **funnel**, **retention**, and **cohort** views.

In [None]:
import pandas as pd, numpy as np
from datetime import datetime
import matplotlib.pyplot as plt

df = pd.read_csv('../data/synthetic_telemetry.csv', parse_dates=['ts'])
df.head()[:5]

## Funnel: install → tutorial_complete → level_1_complete

In [None]:
# Install base
installs = df[df['event']=='install']['user_id'].unique()
tutorial = df[df['event']=='tutorial_complete']['user_id'].unique()
lvl1 = df[(df['event']=='level_complete') & (df['level']==1)]['user_id'].unique()

steps = ['install','tutorial_complete','level1_complete']
vals = [len(installs), len(set(tutorial)&set(installs)), len(set(lvl1)&set(installs))]

plt.figure()
plt.bar(steps, vals)
plt.title('Funnel (Users)')
plt.xlabel('Step'); plt.ylabel('Users')
plt.show()

## Day-1 retention (rough cut)
We consider a user retained if they have any event at least 1 day after install.

In [None]:
inst_df = df[df['event']=='install'][['user_id','ts']].rename(columns={'ts':'install_ts'})
merged = df.merge(inst_df, on='user_id', how='left')
merged['days_after'] = (merged['ts'] - merged['install_ts']).dt.days
retained = merged.groupby('user_id')['days_after'].max().ge(1).sum()
base = inst_df['user_id'].nunique()
rate = retained/base if base else 0
print(f'D1 Retention: {rate:.2%} ({retained}/{base})')

## Weekly cohorts by install week

In [None]:
inst = inst_df.copy()
inst['install_week'] = inst['install_ts'].dt.to_period('W').astype(str)
cohort_sizes = inst.groupby('install_week')['user_id'].nunique()

# engagement any event after install in next week (simplified engagement signal)
merged['week'] = merged['ts'].dt.to_period('W').astype(str)
engagement = merged[merged['days_after']>=0].groupby(['user_id','week']).size().reset_index(name='events')
eng_users = engagement.merge(inst[['user_id','install_week']], on='user_id', how='left')
cohort_ret = eng_users.groupby(['install_week','week'])['user_id'].nunique().reset_index()
pivot = cohort_ret.pivot(index='install_week', columns='week', values='user_id').fillna(0)

# Normalize by cohort size
for c in pivot.index:
    if cohort_sizes.get(c,0)>0:
        pivot.loc[c] = (pivot.loc[c] / cohort_sizes.loc[c]).round(3)

pivot