In [1]:
# Packages imports
import numpy as np
import pandas as pd
import scipy.stats as stats
import statsmodels.stats.api as sms
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from math import ceil

%matplotlib inline

# Some plot styling preferences
plt.style.use('seaborn-whitegrid')
font = {'family' : 'Helvetica',
        'weight' : 'bold',
        'size'   : 14}

mpl.rc('font', **font)

In [16]:
effect_size = sms.proportion_effectsize(0.13, 0.15)  # Effect Size: measure of the magnitude of a phenomenon. 
# Difference between 0.13 and 0.15, standardized by their variability.

required_n = sms.NormalIndPower().solve_power(       # Calculates the required sample size based on the inputs
    effect_size,                                     # Probability that it correctly rejects a false H0
    power=0.8,                                       # Desired power of the test to 0.8
    alpha=0.05,                                      # Significance level to 0.05
    ratio=1                                          # Ratio of sample sizes in two groups to 1 (based on different ratio values, might need different sample sizes)
    )                                                

required_n = ceil(required_n)                        # Rounds up to the next whole number                          

print(required_n)


effect_size  -0.0576728617308947
4720


In [33]:

file_path = '/Users/mobin/Documents/quant finance Interview/My interviews/Kaggle/'
df = pd.read_csv(file_path + 'ab_data.csv')

df.info()
df.head()



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 294478 entries, 0 to 294477
Data columns (total 5 columns):
 #   Column        Non-Null Count   Dtype 
---  ------        --------------   ----- 
 0   user_id       294478 non-null  int64 
 1   timestamp     294478 non-null  object
 2   group         294478 non-null  object
 3   landing_page  294478 non-null  object
 4   converted     294478 non-null  int64 
dtypes: int64(2), object(3)
memory usage: 11.2+ MB


Unnamed: 0,user_id,timestamp,group,landing_page,converted
0,851104,2017-01-21 22:11:48.556739,control,old_page,0
1,804228,2017-01-12 08:01:45.159739,control,old_page,0
2,661590,2017-01-11 16:55:06.154213,treatment,new_page,0
3,853541,2017-01-08 18:28:03.143765,treatment,new_page,0
4,864975,2017-01-21 01:52:26.210827,control,old_page,1


In [34]:
# To make sure all the control group are seeing the old page and viceversa
# Ideally instead of 1928 or 1965, would be zero. But it's still ok
pd.crosstab(df['group'], df['landing_page'])

landing_page,new_page,old_page
group,Unnamed: 1_level_1,Unnamed: 2_level_1
control,1928,145274
treatment,145311,1965


In [38]:
print('There are ', len(df["user_id"]) - len(df["user_id"].unique()) ,' users that appear multiple times in the dataset')

# Count occurrences of each user_id
user_counts = df['user_id'].value_counts()

# Identify user_ids that appear more than once
duplicate_users = user_counts[user_counts > 1].index

# Filter the DataFrame to exclude these user_ids
df = df[~df['user_id'].isin(duplicate_users)]


There are  3894  users that appear multiple times in the dataset


In [43]:
control_sample = df[df['group'] == 'control'].sample(n=required_n, random_state=22)
treatment_sample = df[df['group'] == 'treatment'].sample(n=required_n, random_state=22)

ab_test = pd.concat([control_sample, treatment_sample], axis=0)
ab_test.reset_index(drop=True, inplace=True)
ab_test.info()
ab_test['group'].value_counts()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9440 entries, 0 to 9439
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   user_id       9440 non-null   int64 
 1   timestamp     9440 non-null   object
 2   group         9440 non-null   object
 3   landing_page  9440 non-null   object
 4   converted     9440 non-null   int64 
dtypes: int64(2), object(3)
memory usage: 368.9+ KB


control      4720
treatment    4720
Name: group, dtype: int64

In [60]:
# Calculate conversion rate (mean)
conversion_rate = ab_test.groupby('group')['converted'].mean()
print("conversion_rate ", conversion_rate)

# Calculate standard deviation
std_deviation = ab_test.groupby('group')['converted'].std(ddof=0)                         # Std. deviation of the proportion
# ddof=0 p: use standard deviation formula for the entire population (n degrees of freedom)
print("std_deviation ", std_deviation)

# Calculate standard error
std_error = ab_test.groupby('group')['converted'].apply(lambda x: stats.sem(x, ddof=0))   # Std. error of the proportion (std / sqrt(n))

# Combine results into a single DataFrame
conversion_rates = pd.DataFrame({
    'conversion_rate': conversion_rate,
    'std_deviation': std_deviation,
    'std_error': std_error
})

# Formatting the output for display
conversion_rates = conversion_rates.reset_index()
print("conversion_rates. ", conversion_rates)
# conversion_rates.style.format('{:.3f}')


conversion_rate  group
control      0.123305
treatment    0.125636
Name: converted, dtype: float64
std_deviation  group
control      0.328787
treatment    0.331438
Name: converted, dtype: float64
conversion_rates.         group  conversion_rate  std_deviation  std_error
0    control         0.123305       0.328787   0.004786
1  treatment         0.125636       0.331438   0.004824


In [61]:
from statsmodels.stats.proportion import proportions_ztest, proportion_confint

control_results = ab_test[ab_test['group'] == 'control']['converted']
treatment_results = ab_test[ab_test['group'] == 'treatment']['converted']

n_con = control_results.count()                              # Number of observations in each group
n_treat = treatment_results.count()                          # Number of observations in the treatment group
successes = [control_results.sum(), treatment_results.sum()] # Number of successes (conversions) in each group
nobs = [n_con, n_treat]                                      # Number of observations for each group

# Perform a proportion z-test to compare conversion rates between groups
z_stat, pval = proportions_ztest(successes, nobs=nobs)
# Calculate the 95% confidence intervals for the proportion of conversions in each group
(lower_con, lower_treat), (upper_con, upper_treat) = proportion_confint(successes, nobs=nobs, alpha=0.05)
                                                                        
print(f'z statistic: {z_stat:.2f}')
print(f'p-value: {pval:.3f}')
print(f'ci 95% for control group: [{lower_con:.3f}, {upper_con:.3f}]')
print(f'ci 95% for treatment group: [{lower_treat:.3f}, {upper_treat:.3f}]')

z statistic: -0.34
p-value: 0.732
ci 95% for control group: [0.114, 0.133]
ci 95% for treatment group: [0.116, 0.135]


In [71]:
from statsmodels.stats.proportion import proportions_ztest, proportion_confint

control_group = df[df['group'] == 'control']
treatment_group = df[df['group'] == 'treatment']

n_control = len(control_group)
n_treatment = len(treatment_group)

conversions_control = control_group['converted'].sum()
conversions_treatment = treatment_group['converted'].sum()

# Step 2: Perform z-test using proportions_ztest
count = np.array([conversions_treatment, conversions_control])
nobs = np.array([n_treatment, n_control])

z_statistic, p_value = proportions_ztest(count, nobs, alternative='two-sided')


# Step 4: Calculate confidence intervals
confidence_level = 0.95  # 95% confidence interval
ci_control = proportion_confint(count=conversions_control, nobs=n_control, alpha=0.05)
ci_treatment = proportion_confint(count=conversions_treatment, nobs=n_treatment, alpha=0.05)

# Step 5: Print results
print(f"Control group 95% CI: ({ci_control[0]:.4f}, {ci_control[1]:.4f})")
print(f"Treatment group 95% CI: ({ci_treatment[0]:.4f}, {ci_treatment[1]:.4f})")
print(f"Z-statistic: {z_statistic:.4f}")
print(f"P-value: {p_value:.4f}")

# Step 6: Interpret results
alpha = 0.05  # Typical significance level
if p_value < alpha:
    print("Reject the null hypothesis. There is a significant difference in conversion rates.")
else:
    print("Fail to reject the null hypothesis. There is not enough evidence to conclude a significant difference in conversion rates.")

# Step 7: Check if confidence intervals overlap
if ci_control[1] < ci_treatment[0] or ci_treatment[1] < ci_control[0]:
    print("The confidence intervals do not overlap, suggesting a significant difference.")
else:
    print("The confidence intervals overlap, suggesting the difference may not be significant.")

Control group 95% CI: (0.1185, 0.1219)
Treatment group 95% CI: (0.1171, 0.1204)
Z-statistic: -1.1945
P-value: 0.2323
Fail to reject the null hypothesis. There is not enough evidence to conclude a significant difference in conversion rates.
The confidence intervals overlap, suggesting the difference may not be significant.
