<a href="https://colab.research.google.com/github/mohiteprathamesh1996/Analyzing-Website-conversion-rate-using-AB-tests/blob/main/AB_Experiment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#### Loading packages

In [35]:
import pandas as pd
import numpy as np
import pandas_datareader as pdr
import datetime
import matplotlib.pyplot as plt
import seaborn as sns

import math
import statsmodels.stats.api as sms
import scipy.stats as st

import warnings
warnings.filterwarnings("ignore")

#### Load data

In [21]:
df_conversion = pd.read_csv("https://raw.githubusercontent.com/mohiteprathamesh1996/Analyzing-Website-conversion-rate-using-AB-tests/main/ab_data.csv")

#### Data Cleaning
To ensure that the data is as per the planned experiment, I have dropped rows where the control group was exposed to new landing page and vice versa the treatment group was exposed to the old landing page

In [22]:
df_conversion_cleaned = pd.concat([df_conversion[(df_conversion["group"]=="control") & 
                                                 (df_conversion["landing_page"]=="old_page")],
                                   df_conversion[(df_conversion["group"]=="treatment") & 
                                                 (df_conversion["landing_page"]=="new_page")]]).reset_index(drop=True)

#### Drop duplicate rows

In [23]:
df_conversion_cleaned = df_conversion_cleaned.drop_duplicates(subset=["user_id"], keep="first").reset_index(drop=True)

#### Determine sample size required

In [41]:
def sample_size(converted_control, total_control, practical_significance, confidence_level, sensitivity):
  '''
  Returns ideal sample size to run the AB experiment on.
  
  Parameters:
  -----------  
      converted_control: int
          Number of converted data points in control group.    
      
      total_control: int 
          Total number of data points in control group.

      practical_significance: float
          Minimum change to the baseline rate that is beneficial to the business objective.

      confidence_level: float
          Probability of rejecting null hypothesis when it shouldn't be rejected.

      sensitivity: float
          Probability of failing to reject null hypothesis when it should be rejected.
  '''
  baseline_rate = converted_control / total_control
  
  effect_size = sms.proportion_effectsize(baseline_rate, baseline_rate + practical_significance)

  sample_size = sms.NormalIndPower().solve_power(effect_size = effect_size,
                                                 power = sensitivity,
                                                 alpha = confidence_level,
                                                 ratio = 1)
  
  return round(sample_size)

In [65]:
sample_size(converted_control = df_conversion_cleaned[(df_conversion_cleaned["group"]=="control") &
                                                      (df_conversion_cleaned["converted"]==1)].shape[0], 
            total_control=df_conversion_cleaned[df_conversion_cleaned["group"]=="control"].shape[0],
            practical_significance=0.02,
            confidence_level=0.05,
            sensitivity=0.8)

4444

In [66]:
df_conversion_cleaned["group"].value_counts()

treatment    145310
control      145274
Name: group, dtype: int64

#### Running AB experiment

In [67]:
def AB_significance_test(converted_control, converted_treatment, total_control, 
                         total_treatment, practical_significance, confidence_level):
  '''
  Returns results of AB test.
  
  Parameters:
  -----------  
      converted_control: int
          Number of converted data points in control group.

      converted_treatment: int
          Number of converted data points in treatment group.    
      
      total_control: int 
          Total number of data points in control group.

      total_treatment: int 
          Total number of data points in treatment group.

      practical_significance: float
          Minimum change to the baseline rate that is beneficial to the business objective.

      confidence_level: float
          Probability of rejecting null hypothesis when it shouldn't be rejected.
  '''
  # Calculate pooled probability
  pooled_probability = (converted_control + converted_treatment) / (total_control + total_treatment)
  
  # Calculate pooled standard error
  std_error_pooled = math.sqrt(pooled_probability * (1 - pooled_probability) * (1 / total_control + 1 / total_treatment))

  # Calculate Z-score and margin of error
  z_score = st.norm.ppf(1 - confidence_level / 2)
  margin_of_error = std_error_pooled * z_score

  # Difference in probability of conversions between the control and treatment groups
  delta_prob = (converted_treatment/total_treatment) - (converted_control/total_control)

  # Lower and Upper bounds
  UL = delta_prob + margin_of_error
  LL = delta_prob - margin_of_error

  print("Practical significance = ", round(practical_significance*100, 2), "%")
  print("Lower Bound = ", round(LL*100, 2),"%")
  print("Upper Bound = ", round(UL*100, 2),"%")

  if practical_significance < LL:
    print("Reject Null Hypothesis!")

  else:
    print("Do not reject Null Hypothesis!")

In [74]:
AB_significance_test(converted_control = df_conversion_cleaned[(df_conversion_cleaned["group"]=="control") &
                                                               (df_conversion_cleaned["converted"]==1)].shape[0], 
                     converted_treatment = df_conversion_cleaned[(df_conversion_cleaned["group"]=="treatment") &
                                                              (df_conversion_cleaned["converted"]==1)].shape[0], 
                     total_control = df_conversion_cleaned[(df_conversion_cleaned["group"]=="control")].shape[0], 
                     total_treatment = df_conversion_cleaned[(df_conversion_cleaned["group"]=="treatment")].shape[0], 
                     practical_significance = 0.02, 
                     confidence_level = 0.05)

Practical significance =  2.0 %
Lower Bound =  -0.39 %
Upper Bound =  0.08 %
Do not reject Null Hypothesis!
