# A/B Testing Course

## Lesson 12. Ratio Metrics

### Homework

#### Import Libraries

In [1]:
import os
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
from scipy.stats import ttest_ind

#### Import Data

In [3]:
df_exp_users = pd.read_csv('12_experiment_users.csv')

In [4]:
df_web_logs = pd.read_csv('12_2022-04-13T12_df_web_logs.csv')

#### Data Description

df_exp_users - users split in the experiment:
- user_id - user identifier;
- pilot - test group flag.

df_web_logs - web logs information:
- user_id - user identifier;
- page - visited page;
- date - date and time of a page visit;
- load_time - page load time.

#### Checking Data

In [5]:
df_exp_users.head()

Unnamed: 0,user_id,pilot
0,c36b2e,0
1,20336e,0
2,034652,0
3,e98e3b,0
4,3f1105,0


In [6]:
df_exp_users.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5456 entries, 0 to 5455
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   user_id  5456 non-null   object
 1   pilot    5456 non-null   int64 
dtypes: int64(1), object(1)
memory usage: 85.4+ KB


In [7]:
df_web_logs.head()

Unnamed: 0,user_id,page,date,load_time
0,f25239,m,2022-02-03 23:45:37,80.8
1,06d6df,m,2022-02-03 23:49:56,70.5
2,06d6df,m,2022-02-03 23:51:16,89.7
3,f25239,m,2022-02-03 23:51:43,74.4
4,697870,m,2022-02-03 23:53:12,66.8


In [8]:
df_web_logs.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2401709 entries, 0 to 2401708
Data columns (total 4 columns):
 #   Column     Dtype  
---  ------     -----  
 0   user_id    object 
 1   page       object 
 2   date       object 
 3   load_time  float64
dtypes: float64(1), object(3)
memory usage: 73.3+ MB


In [9]:
df_web_logs.load_time.isna().sum()

0

#### Task 1. 

Previously, we evaluated the experiment assuming that the backend response time data was independent. Now, we can properly evaluate this experiment even if the data were dependent on users.

Evaluate the experiment using linearization assuming user-dependent data.

The experiment was conducted from 2022-04-05 to 2022-04-12. The metric is the average request processing time.

Please enter the p-value as a response with an accuracy of up to 4 decimal places.

In [10]:
df_filt = df_web_logs[(df_web_logs['date'] >= '2022-04-05') & (df_web_logs['date'] < '2022-04-12')]

In [11]:
data_agg = df_filt.groupby('user_id', as_index=False).agg({'load_time': list})

In [12]:
data = pd.merge(data_agg, df_exp_users, how='inner', on='user_id')

In [13]:
data.pilot.value_counts()

pilot
1    2773
0    2683
Name: count, dtype: int64

In [14]:
def check_linearization(a, b):
    """Hypothesis testing using linearization.
    
    a: List[np.array], a list of sets of session length for users in the control group.
    b: List[np.array], a list of sets of session length for users in the pilot group.
    
    return: p-value and point estimate.
    """
    a_x = np.array([np.sum(row) for row in a])
    a_y = np.array([len(row) for row in a])
    b_x = np.array([np.sum(row) for row in b])
    b_y = np.array([len(row) for row in b])
    
    coef = np.sum(a_x) / np.sum(a_y)
    
    a_lin = a_x - coef * a_y
    b_lin = b_x - coef * b_y
    
    delta = np.mean(b_lin) - np.mean(a_lin)
    _, pvalue = ttest_ind(a_lin, b_lin)

    return pvalue, delta

In [15]:
control_data = data[data['pilot'] == 0].load_time.values
test_data = data[data['pilot'] == 1].load_time.values

pval, delta = check_linearization(control_data, test_data)

print(f'p-value: {pval:.4f}')

p-value: 0.0442


#### Task 2. 

Code class `MetricsService` method `calculate_linearized_metrics`.

In [16]:
import numpy as np
import pandas as pd


class MetricsService:

    def calculate_linearized_metrics(
        self, control_metrics, pilot_metrics, control_user_ids=None, pilot_user_ids=None
    ):
        """Calculates the ratio metric values.
        
        You need to compute the kappa parameter (coefficient in the linearization function) based on the data from control_metrics and use it to calculate the linearized metric.

        :param control_metrics (pd.DataFrame): DataFrame with metric values for the control group.
        The 'user_id' column may contain non-unique values.
        Measurements for the same user_id are considered dependent, while different user_ids are considered independent.
        columns=['user_id', 'metric']
        :param pilot_metrics (pd.DataFrame): DataFrame with metric values for the pilot group.
        The 'user_id' column may contain non-unique values.
        Measurements for the same user_id are considered dependent, while different user_ids are considered independent.
        columns=['user_id', 'metric']
        :param control_user_ids (list): List of user IDs for the control group for which the metric should be calculated.
        If None, use the users from control_metrics.
        If a user has no records in the control_metrics table, their linearized metric will be zero.
        :param pilot_user_ids (list): List of user IDs for the pilot group for which the metric should be calculated.
        If None, use the users from pilot_metrics.
        If a user has no records in the pilot_metrics table, their linearized metric will be zero.
        
        :return lin_control_metrics, lin_pilot_metrics: DataFrame with columns=['user_id', 'metric']
        """
        # YOUR_CODE_HERE
        def linearization(a, b):
            a_x = np.array([np.sum(row) for row in a])
            a_y = np.array([len(row) for row in a])
            b_x = np.array([np.sum(row) for row in b])
            b_y = np.array([len(row) for row in b])
            coef = np.sum(a_x) / np.sum(a_y)
            a_lin = a_x - coef * a_y
            b_lin = b_x - coef * b_y
            return a_lin, b_lin
        
        control_grp = control_metrics.groupby('user_id', as_index=False).agg({'metric': list})
        test_grp = pilot_metrics.groupby('user_id', as_index=False).agg({'metric': list})
        
        control_lin, test_lin = linearization(control_grp.metric.values, test_grp.metric.values)
        
        control_grp['metric'] = control_lin
        test_grp['metric'] = test_lin
        
        if control_user_ids != None:
            control_users_df = pd.DataFrame({'user_id': control_user_ids, 'metric': [0] * len(control_user_ids)})
            control_grp = pd.merge(control_grp, control_users_df, how='right', on='user_id')[['user_id', 'metric_x']] \
                .rename(columns={'metric_x': 'metric'}).fillna(0)
        
        if pilot_user_ids != None:
            test_users_df = pd.DataFrame({'user_id': pilot_user_ids, 'metric': [0] * len(pilot_user_ids)})
            test_grp = pd.merge(test_grp, test_users_df, how='right', on='user_id')[['user_id', 'metric_x']] \
                .rename(columns={'metric_x': 'metric'}).fillna(0)
            
        return control_grp, test_grp
            


def _chech_df(df, df_ideal, sort_by, reindex=False, set_dtypes=False, decimal=None):
    assert isinstance(df, pd.DataFrame), 'The return of the function is not a pd.DataFrame.'
    assert len(df) == len(df_ideal), 'Wrong number of lines.'
    assert len(df.T) == len(df_ideal.T), 'Wrong number of columns.'
    columns = df_ideal.columns
    assert df.columns.isin(columns).sum() == len(df.columns), 'Wrong names of columns.'
    df = df[columns].sort_values(sort_by)
    df_ideal = df_ideal.sort_values(sort_by)
    if reindex:
        df_ideal.index = range(len(df_ideal))
        df.index = range(len(df))
    if set_dtypes:
        for column, dtype in df_ideal.dtypes.to_dict().items():
            df[column] = df[column].astype(dtype)
    if decimal:
        ideal_values = df_ideal.astype(float).values
        values = df.astype(float).values
        np.testing.assert_almost_equal(ideal_values, values, decimal=decimal)
    else:
        assert df_ideal.equals(df), 'The final dataframe does not match with an expected result.'


if __name__ == '__main__':
    control_metrics = pd.DataFrame({'user_id': [1, 1, 2], 'metric': [3, 5, 7],})
    pilot_metrics = pd.DataFrame({'user_id': [3, 3], 'metric': [3, 6], })
    ideal_lin_control_metrics = pd.DataFrame({'user_id': [1, 2], 'metric': [-2, 2],})
    ideal_lin_pilot_metrics = pd.DataFrame({'user_id': [3,], 'metric': [-1,],})

    metrics_service = MetricsService()
    lin_control_metrics, lin_pilot_metrics = metrics_service.calculate_linearized_metrics(
        control_metrics, pilot_metrics
    )
    _chech_df(lin_control_metrics, ideal_lin_control_metrics, ['user_id', 'metric'], True, True, decimal=3)
    _chech_df(lin_pilot_metrics, ideal_lin_pilot_metrics, ['user_id', 'metric'], True, True, decimal=3)
    print('simple test passed')

simple test passed
