In [1]:
from collections import namedtuple

import pandas as pd
import numpy as np
import scipy.stats as ss
import json

import pyspark.sql.functions as spf
from pyspark.sql import SparkSession, Row
from pyspark.sql.types import *

import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
def dof(n_0, n_1, s2_0, s2_1):
    numerator = (s2_0 / n_0 + s2_1 / n_1) * (s2_0 / n_0 + s2_1 / n_1)
    denominator = s2_0 * s2_0 / n_0 / n_0 / (n_0 - 1) + s2_1 * s2_1 / n_1 / n_1 / (n_1 - 1)
    return numerator / denominator


def ci(n_0, n_1, s2_0, s2_1, alpha=0.05):
    return ss.t.ppf(1 - alpha/2, dof(n_0, n_1, s2_0, s2_1)) * np.sqrt(s2_0 / n_0 + s2_1 / n_1)

In [3]:
spark = SparkSession.builder.appName("dusiak").getOrCreate()

In [4]:
data = spark.read.json("/user/mob2021032/ab_test_data")

data.printSchema()

root
 |-- experiments: struct (nullable = true)
 |    |-- MY_EXP: string (nullable = true)
 |-- latency: double (nullable = true)
 |-- message: string (nullable = true)
 |-- recommendation: long (nullable = true)
 |-- time: double (nullable = true)
 |-- timestamp: long (nullable = true)
 |-- track: long (nullable = true)
 |-- user: long (nullable = true)



## Analyze experiment

In [5]:
experiment = "MY_EXP"

user_level_data = (
  data
    .groupBy(
        spf.col("user"), 
        spf.col("experiments." + experiment).alias("treatment")
    )
    .agg(
        spf.count(spf.when(spf.col("message") == "last", spf.col("user"))).alias("sessions"),
        (spf.count("user") / spf.count(spf.when(spf.col("message") == "last", spf.col("user")))).alias("mean_session_length"),
        (spf.sum("time") / spf.count(spf.when(spf.col("message") == "last", spf.col("user")))).alias("mean_session_time"),
        (spf.sum("latency") / spf.count("user")).alias("mean_request_time")
    )
)

metrics = [column for column in user_level_data.columns if column not in ("user", "treatment")]

metric_stats = []
for metric in metrics:
    metric_stats.extend(
        [
            spf.avg(metric).alias("mean_" + metric),
            spf.variance(metric).alias("var_" + metric),
            spf.count(metric).alias("n_" + metric)
        ]
    )
    
treatment_level_data = (
    user_level_data
      .groupBy(spf.col("treatment"))
      .agg(*metric_stats)
      .collect()
)

In [6]:
effects = []

control = [data for data in treatment_level_data if data["treatment"] == "C"][0]

for row in treatment_level_data:
    if row["treatment"] == "C":
        continue
    
    for metric in metrics:
        control_mean = control["mean_" + metric]
        treatment_mean = row["mean_" + metric]
        
        effect = treatment_mean - control_mean
        conf_int = ci(
            control["n_" + metric],
            row["n_" + metric],
            control["var_" + metric],
            row["var_" + metric],
        )
        effects.append({
            "treatment": row["treatment"],
            "metric": metric,
            "control_mean": control_mean,
            "treatment_mean": treatment_mean,
            "effect": effect / control_mean * 100,
            "lower": (effect - conf_int) / control_mean * 100,
            "upper": (effect + conf_int) / control_mean * 100,
            "significant": (effect + conf_int) * (effect - conf_int) > 0
        })

# ALS

In [8]:
def color(value):
    return 'color:red;' if value < 0 else 'color:green;'

def background(value):
    return 'color:white;background-color:green' if value else 'color:white;background-color:red'
        

(
    pd.DataFrame(effects)[[
        "treatment", 
        "metric",
        "effect", 
        "upper", 
        "lower", 
        "control_mean", 
        "treatment_mean",
        "significant"
    ]]
    .sort_values("effect", ascending=False)
    .style
    .applymap(color, subset=["effect", "upper", "lower"])
    .applymap(background, subset=["significant"])
)

Unnamed: 0,treatment,metric,effect,upper,lower,control_mean,treatment_mean,significant
2,T1,mean_session_time,283.473,309.061,257.884,2.09376,8.02901,True
1,T1,mean_session_length,83.4527,91.1197,75.7858,7.11419,13.0512,True
3,T1,mean_request_time,9.67193,10.4457,8.8982,0.000302269,0.000331504,True
0,T1,sessions,-0.616124,1.96221,-3.19446,1.09446,1.08772,False


# Sticky Artist

In [43]:
def color(value):
    return 'color:red;' if value < 0 else 'color:green;'

def background(value):
    return 'color:white;background-color:green' if value else 'color:white;background-color:red'
        

(
    pd.DataFrame(effects)[[
        "treatment", 
        "metric",
        "effect", 
        "upper", 
        "lower", 
        "control_mean", 
        "treatment_mean",
        "significant"
    ]]
    .sort_values("metric", ascending=False)
    .style
    .applymap(color, subset=["effect", "upper", "lower"])
    .applymap(background, subset=["significant"])
)

Unnamed: 0,treatment,metric,effect,upper,lower,control_mean,treatment_mean,significant
0,T1,sessions,1.52721,4.51445,-1.46003,1.10808,1.125,False
2,T1,mean_session_time,116.236,123.972,108.5,2.12317,4.59106,True
1,T1,mean_session_length,34.103,36.8276,31.3783,7.13841,9.57283,True
3,T1,mean_request_time,49.5863,50.2104,48.9623,0.000288044,0.000430874,True
