# A/B Testing Course

## Lesson 3. MDE & Sample Size

### Homework

#### Import Libraries

In [1]:
import os
import pandas as pd
import numpy as np

In [2]:
from scipy.stats import norm

#### Import Data

In [3]:
URL_BASE = ''

def read_database(file_name):
    return pd.read_csv(os.path.join(URL_BASE, file_name))

In [4]:
df_sales = read_database('2022-04-01T12_df_sales.csv')
df_sales['date'] = pd.to_datetime(df_sales['date'])

#### Data Description

df_sales - information about purchases, one row represents one order:
- sale_id - purchase identifier;
- date - date of purchase;
- count_pizza - number of pizzas in the order;
- count_drink - number of drinks in the order;
- price - order price;
- user_id - user identifier.   

#### Checking Data

In [5]:
df_sales.head()

Unnamed: 0,sale_id,date,count_pizza,count_drink,price,user_id
0,1000001,2022-02-04 10:00:24,1,0,720,1c1543
1,1000002,2022-02-04 10:02:28,1,1,930,a9a6e8
2,1000003,2022-02-04 10:02:35,3,1,1980,23420a
3,1000004,2022-02-04 10:03:06,1,1,750,3e8ed5
4,1000005,2022-02-04 10:03:23,1,1,870,cbc468


In [6]:
df_sales.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 203847 entries, 0 to 203846
Data columns (total 6 columns):
 #   Column       Non-Null Count   Dtype         
---  ------       --------------   -----         
 0   sale_id      203847 non-null  int64         
 1   date         203847 non-null  datetime64[ns]
 2   count_pizza  203847 non-null  int64         
 3   count_drink  203847 non-null  int64         
 4   price        203847 non-null  int64         
 5   user_id      203847 non-null  object        
dtypes: datetime64[ns](1), int64(4), object(1)
memory usage: 9.3+ MB


#### Task 1. 

Suppose we want to conduct an experiment with customers who made a purchase during the experiment.

The metric is the average revenue per user during the experiment;  
Duration - one week;  
Significance level - 0.05;  
Allowable probability of type II error - 0.1;  
Expected effect - 20 units;    
Estimate the required group size based on purchase data for the week of February 21-28.  

As an answer, enter the required size of the groups, rounded to the nearest tens, using `round(x, -1)` function.

In [7]:
# filtering data by date
sales_filt = df_sales[(df_sales['date'] >= '2022-02-21') & \
                      (df_sales['date'] < '2022-02-28')]

In [8]:
# grouping data by users and calculating sum of purchases per user
sales_by_user = sales_filt.groupby('user_id', as_index=False).agg({'price': 'sum'})

In [9]:
# function to determine sample size for absolute change of metric
def get_sample_size_abs(epsilon, std, alpha=0.05, beta=0.2):
    t_alpha = norm.ppf(1 - alpha / 2, loc=0, scale=1)
    t_beta = norm.ppf(1 - beta, loc=0, scale=1)
    z_scores_sum_squared = (t_alpha + t_beta) ** 2
    sample_size = int(
        np.ceil(
            z_scores_sum_squared * (2 * std ** 2) / (epsilon ** 2)
        )
    )
    return sample_size

In [10]:
# calculating recommended group size for our input
alpha = 0.05
beta = 0.1
std_ = sales_by_user.price.std()
effect = 20

ans = round(get_sample_size_abs(effect, std_, alpha, beta), -1)

print(f'Recommended group size is: {ans:,} clients.')

Recommended group size is: 34,570 clients.


#### Task 2.

In the previous task, it turned out that the required sample size is larger than the available data for one week.  
What is the minimum effect that we can detect with the same probabilities of errors using data from February 21 to February 28?  

Round the answer to the nearest integer.






In [11]:
# function to calculate MDE (Minimum Detectable Effect)
def get_minimal_determinable_effect(std, sample_size, alpha=0.05, beta=0.2):
    t_alpha = norm.ppf(1 - alpha / 2, loc=0, scale=1)
    t_beta = norm.ppf(1 - beta, loc=0, scale=1)
    disp_sum_sqrt = (2 * (std ** 2)) ** 0.5
    mde = (t_alpha + t_beta) * disp_sum_sqrt / np.sqrt(sample_size)
    return mde

In [12]:
# calculating mde for our input
ans = round(get_minimal_determinable_effect(std=std_, 
                                      sample_size=sales_by_user.shape[0]/2,
                                      alpha=alpha, 
                                      beta=beta), 0)

print(f'Minimum Detectable Effect: {ans:,} units.')

Minimum Detectable Effect: 33.0 units.


#### Task 3. 

Write the method `estimate_sample_size` of the class `ExperimentsService`. The description of the method is given in the solution template below.

NB!:
- The effect size is specified in percentages.
- Use the `np.std` function with default parameters to calculate the standard deviation.

In [13]:
import numpy as np
import pandas as pd
from pydantic import BaseModel
from scipy.stats import norm


class Design(BaseModel):
    """Data class with a description of the experiment parameters.
    
    statistical_test - type of statistical test ['ttest'] 
    effect - effect size in percentages
    alpha - significance level
    beta - allowable probability of type II error
    """
    statistical_test: str
    effect: float
    alpha: float
    beta: float


class ExperimentsService:

    def estimate_sample_size(self, metrics, design):
        """We estimate the necessary sample size for testing the hypothesis 
        of equality of means.

        For metrics with a single value per user, simply calculate 
        the group size using the formula. For metrics with multiple values per user 
        (e.g. response_time), calculate the necessary amount of data 
        and divide it by the average number of values per user. 
        For example, if there are 1000 observations in the metrics table and 
        100 unique users, and the experiment requires 302 observations, 
        then the group size will be 31, since on average there are 
        10 observations per user, which gives a total of 
        approximately 310 observations in the group.
        
        :param metrics (pd.DataFrame): DataFrame with metric values from MetricsService.
            columns=['user_id', 'metric']
        :param design (Design): An object that describes the parameters of the experiment.
        :return (int): The minimum required group size (number of users).
        """
        # YOUR_CODE_HERE
        alpha = design.alpha
        beta = design.beta
        effect = 1 + design.effect / 100
        std_ = metrics.metric.std(ddof=0)
        mean_ = metrics.metric.mean()
        
        def get_sample_size_abs(epsilon, std, alpha=0.05, beta=0.2):
            t_alpha = norm.ppf(1 - alpha / 2, loc=0, scale=1)
            t_beta = norm.ppf(1 - beta, loc=0, scale=1)
            z_scores_sum_squared = (t_alpha + t_beta) ** 2
            sample_size = int(
                np.ceil(
                    z_scores_sum_squared * (2 * std ** 2) / (epsilon ** 2)
                )
            )
            return sample_size
        
        def get_sample_size_arb(mu, std, eff=1.01, alpha=0.05, beta=0.2):
            epsilon = (eff - 1) * mu
            return get_sample_size_abs(epsilon, std=std, alpha=alpha, beta=beta)
        
        if metrics.user_id.nunique() == metrics.shape[0]:
            return get_sample_size_arb(mean_, std_, effect, alpha, beta)
        else:
            mean_values = metrics.shape[0] / metrics.user_id.nunique()
            return int(np.ceil(get_sample_size_arb(mean_, std_, effect, alpha, beta) / mean_values))
        


if __name__ == '__main__':
    metrics = pd.DataFrame({
        'user_id': [str(i) for i in range(10)],
        'metric': [i for i in range(10)]
    })
    design = Design(
        statistical_test='ttest',
        alpha=0.05,
        beta=0.1,
        effect=3.
    )
    ideal_sample_size = 9513

    experiments_service = ExperimentsService()
    sample_size = experiments_service.estimate_sample_size(metrics, design)
    assert sample_size == ideal_sample_size, 'Wrong!'
    print('simple test passed')

simple test passed
