In [1]:
import pandas as pd
import numpy as np
import altair as alt
alt.data_transformers.disable_max_rows()
from datetime import datetime

## Example of Metrics Calculation

### User Activity

In [2]:
## Loading the data from a .csv file 
data = pd.read_csv("data/activity_pretest.csv")

In [3]:
data.head()

Unnamed: 0,userid,dt,activity_level
0,a5b70ae7-f07c-4773-9df4-ce112bc9dc48,2021-10-01,0
1,d2646662-269f-49de-aab1-8776afced9a3,2021-10-01,0
2,c4d1cfa8-283d-49ad-a894-90aedc39c798,2021-10-01,0
3,6889f87f-5356-4904-a35a-6ea5020011db,2021-10-01,0
4,dbee604c-474a-4c9d-b013-508e5a0e3059,2021-10-01,0


In [4]:
data.activity_level.value_counts().sort_values()

20     24520
7      48339
17     48395
8      48396
13     48534
4      48556
15     48599
14     48620
3      48659
1      48732
9      48820
11     48832
19     48901
6      48901
12     48911
16     48934
10     48943
18     48982
2      49074
5      49227
0     909125
Name: activity_level, dtype: int64

In [5]:
data.groupby('activity_level').describe().head()

Unnamed: 0_level_0,userid,userid,userid,userid,dt,dt,dt,dt
Unnamed: 0_level_1,count,unique,top,freq,count,unique,top,freq
activity_level,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
0,909125,60000,6b953416-72e5-4b6e-b634-41c8d3bf98a4,27,909125,31,2021-10-11,29511
1,48732,33688,3c5297b6-602e-4479-9a97-e2b4cb444f0a,6,48732,31,2021-10-19,1620
2,49074,33761,3d5b7e5d-d7b8-459b-a4f0-33231fc930fd,6,49074,31,2021-10-14,1665
3,48659,33634,fd9d8064-2f3f-47ba-9deb-0a38bc0b1a3d,6,48659,31,2021-10-28,1663
4,48556,33502,dc396a83-174c-4244-8a33-71eae2283eeb,8,48556,31,2021-10-29,1632


In [7]:
activity = data.query('activity_level > 0').groupby(['dt', 'activity_level']).count().reset_index()

In [8]:
alt.Chart(activity).mark_line(size=1).encode(
    alt.X('dt:T', axis=alt.Axis(title = 'date')),
    alt.Y('userid:Q', axis=alt.Axis(title = 'number of users')),
    tooltip=['activity_level'], 
    color='activity_level:N'
).properties(
    width=600,
    height=400
)

####

### Calculating Daily Active Users

In this dataset, a userid will count towards DAU if their activity_level for that day is not zero.

In [9]:
activity = data.query('activity_level > 0').groupby(['dt']).count().reset_index()

In [10]:
activity.describe()

Unnamed: 0,userid,activity_level
count,31.0,31.0
mean,30673.387097,30673.387097
std,90.968375,90.968375
min,30489.0,30489.0
25%,30608.0,30608.0
50%,30661.0,30661.0
75%,30728.5,30728.5
max,30902.0,30902.0


In [11]:
alt.Chart(activity).mark_line(size=4).encode(
    alt.X('dt:T', axis=alt.Axis(title = 'date')),
    alt.Y('userid:Q', axis=alt.Axis(title = 'number of users'))
).properties(
    width=600,
    height=400, 
    title='Daily Active Users'
)

### Click-through rate

In [12]:
## Loading the data from a .csv file 
data = pd.read_csv("data/ctr_pretest.csv")

In [13]:
data.head()

Unnamed: 0,userid,dt,ctr
0,4b328144-df4b-47b1-a804-09834942dce0,2021-10-01,34.28
1,34ace777-5e9d-40b3-a859-4145d0c35c8d,2021-10-01,34.67
2,8028cccf-19c3-4c0e-b5b2-e707e15d2d83,2021-10-01,34.77
3,652b3c9c-5e29-4bf0-9373-924687b1567e,2021-10-01,35.42
4,45b57434-4666-4b57-9798-35489dc1092a,2021-10-01,35.04


In [14]:
data.describe()

Unnamed: 0,ctr
count,950875.0
mean,33.000242
std,1.731677
min,30.0
25%,31.5
50%,33.0
75%,34.5
max,36.0


In [15]:
ctr = data.groupby(['dt']).mean().reset_index()

In [16]:
alt.Chart(ctr).mark_line(size=4).encode(
    alt.X('dt:T', axis=alt.Axis(title = 'date')),
    alt.Y('ctr:Q', axis=alt.Axis(title = 'ctr'), scale=alt.Scale(domain=[32, 34])),
    tooltip=['ctr'], 
).properties(
    width=600,
    height=400, 
    title='Average Daily CTR'
)