In [1]:
import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.stats.api as sms
from scipy.stats import ttest_1samp, shapiro, levene, ttest_ind, mannwhitneyu, pearsonr, spearmanr, kendalltau, f_oneway, kruskal
from statsmodels.stats.proportion import proportions_ztest
import warnings
warnings.filterwarnings("ignore")

In [58]:
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", 10)
pd.set_option("display.float_format", lambda x: "%.4f" % x)

In [5]:
df = pd.read_excel("ab_testing.xlsx")

In [9]:
df.head(10) #impression:GörüntülEme sayısı, Earning:Elde edilen kazanç

Unnamed: 0,Impression,Click,Purchase,Earning
0,82529.45927,6090.07732,665.21125,2311.27714
1,98050.45193,3382.86179,315.08489,1742.80686
2,82696.02355,4167.96575,458.08374,1797.82745
3,109914.4004,4910.88224,487.09077,1696.22918
4,108457.76263,5987.65581,441.03405,1543.72018
5,77773.6339,4462.20659,519.66966,2081.85185
6,95110.58627,3555.58067,512.92875,1815.00661
7,106649.18307,4358.02704,747.02012,1965.1004
8,122709.71659,5091.55896,745.98568,1651.66299
9,79498.24866,6653.84552,470.50137,2456.30424


In [13]:
df_control = pd.read_excel("ab_testing.xlsx", sheet_name="Control Group")
df_test = pd.read_excel("ab_testing.xlsx", sheet_name="Test Group")

In [37]:
def check_dataframe(dataframe):
    print("----------------")
    print(dataframe.shape)
    print("----------------")
    print(dataframe.dtypes)
    print("----------------")
    print(dataframe.isnull().sum())

In [39]:
check_dataframe(df_control)

----------------
(40, 4)
----------------
Impression    float64
Click         float64
Purchase      float64
Earning       float64
dtype: object
----------------
Impression    0
Click         0
Purchase      0
Earning       0
dtype: int64


In [41]:
check_dataframe(df_test)

----------------
(40, 4)
----------------
Impression    float64
Click         float64
Purchase      float64
Earning       float64
dtype: object
----------------
Impression    0
Click         0
Purchase      0
Earning       0
dtype: int64


In [76]:
df_control["group"] = "control"
df_test["group"] = "test"

In [78]:
df = pd.concat([df_control, df_test])

In [80]:
df.head(10)

Unnamed: 0,Impression,Click,Purchase,Earning,group
0,82529.4593,6090.0773,665.2113,2311.2771,control
1,98050.4519,3382.8618,315.0849,1742.8069,control
2,82696.0235,4167.9657,458.0837,1797.8274,control
3,109914.4004,4910.8822,487.0908,1696.2292,control
4,108457.7626,5987.6558,441.034,1543.7202,control
5,77773.6339,4462.2066,519.6697,2081.8519,control
6,95110.5863,3555.5807,512.9287,1815.0066,control
7,106649.1831,4358.027,747.0201,1965.1004,control
8,122709.7166,5091.559,745.9857,1651.663,control
9,79498.2487,6653.8455,470.5014,2456.3042,control


#### Hipotez kurma

In [83]:
#H0: m1=m2
#H1: m1!=m2

In [87]:
df.groupby("group").agg({"Purchase" : "mean"}).sort_values("Purchase",ascending=False).head(20)

Unnamed: 0_level_0,Purchase
group,Unnamed: 1_level_1
test,582.1061
control,550.8941


#### Varsayım Testi

##### -Normal Varsayımı
##### -Varyans Homojenliği

In [70]:
#Normal Varsayım
#H0: Normal dağılım varsayım sağlanmaktadır 
#H1: Normal dağılım varsayımı sağlanmamaktadır 
#p_value < 0.05 H0 Red

In [89]:
test_stat, p_value = shapiro(df.loc[df["group"] == "control", "Purchase"])

In [91]:
print(f"Test Stat: {test_stat:.2f}, p_value: {p_value:.2f}")

Test Stat: 0.98, p_value: 0.59


In [93]:
test_stat, p_value = shapiro(df.loc[df["group"] == "test", "Purchase"])

In [95]:
print(f"Test Stat: {test_stat:.2f}, p_value: {p_value:.2f}")

Test Stat: 0.96, p_value: 0.15


#### H0 Reddilmedi

In [97]:
#Varyans Homojenliği
#H0: Varyanslar homojendir
#H1: Varyanslar homojen değildir
#p_value < 0.05 H0 Red

In [99]:
test_stat, p_value = levene(df.loc[df["group"] == "control", "Purchase"],
                            df.loc[df["group"] == "test", "Purchase"])

In [101]:
print(f"Test Stat: {test_stat:.2f}, p_value:{p_value:.2f}")

Test Stat: 2.64, p_value:0.11


#### H0 Reddedilmedi

In [105]:
#t testi uygulucaz, varsayımlar sağlandı

In [107]:
test_stat, p_value = ttest_ind(df.loc[df["group"] == "control", "Purchase"],
                               df.loc[df["group"] == "test", "Purchase"], equal_var=True)

In [109]:
print(f"Test Stat: {test_stat:.2f}, p_value: {p_value:.2f}")

Test Stat: -0.94, p_value: 0.35


In [111]:
#p_value < 0.05 H0 Red

### H0 REDDEDİLMEDİ 
#### ARALARINDA FARK YOKMUŞ DEMEKKİ M1=M2 DİR