In [2]:
import pandas as pd
from scipy.stats import ttest_ind
df=pd.read_csv("website_ab_test.csv")

In [3]:
df

Unnamed: 0,Theme,Click Through Rate,Conversion Rate,Bounce Rate,Scroll_Depth,Age,Location,Session_Duration,Purchases,Added_to_Cart
0,Light Theme,0.054920,0.282367,0.405085,72.489458,25,Chennai,1535,No,Yes
1,Light Theme,0.113932,0.032973,0.732759,61.858568,19,Pune,303,No,Yes
2,Dark Theme,0.323352,0.178763,0.296543,45.737376,47,Chennai,563,Yes,Yes
3,Light Theme,0.485836,0.325225,0.245001,76.305298,58,Pune,385,Yes,No
4,Light Theme,0.034783,0.196766,0.765100,48.927407,25,New Delhi,1437,No,No
...,...,...,...,...,...,...,...,...,...,...
995,Dark Theme,0.282792,0.401605,0.200720,68.478822,25,Kolkata,321,Yes,Yes
996,Dark Theme,0.299917,0.026372,0.762641,73.019821,38,Chennai,1635,Yes,Yes
997,Light Theme,0.370254,0.019838,0.607136,33.963298,32,Bangalore,1237,No,Yes
998,Light Theme,0.095815,0.137953,0.458898,37.429284,24,Chennai,893,Yes,No


In [4]:
df.head()

Unnamed: 0,Theme,Click Through Rate,Conversion Rate,Bounce Rate,Scroll_Depth,Age,Location,Session_Duration,Purchases,Added_to_Cart
0,Light Theme,0.05492,0.282367,0.405085,72.489458,25,Chennai,1535,No,Yes
1,Light Theme,0.113932,0.032973,0.732759,61.858568,19,Pune,303,No,Yes
2,Dark Theme,0.323352,0.178763,0.296543,45.737376,47,Chennai,563,Yes,Yes
3,Light Theme,0.485836,0.325225,0.245001,76.305298,58,Pune,385,Yes,No
4,Light Theme,0.034783,0.196766,0.7651,48.927407,25,New Delhi,1437,No,No


In [5]:
df.shape

(1000, 10)

In [6]:
df.isnull().sum()

Theme                 0
Click Through Rate    0
Conversion Rate       0
Bounce Rate           0
Scroll_Depth          0
Age                   0
Location              0
Session_Duration      0
Purchases             0
Added_to_Cart         0
dtype: int64

In [7]:
df.describe()

Unnamed: 0,Click Through Rate,Conversion Rate,Bounce Rate,Scroll_Depth,Age,Session_Duration
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,0.256048,0.253312,0.505758,50.319494,41.528,924.999
std,0.139265,0.139092,0.172195,16.895269,14.114334,508.231723
min,0.010767,0.010881,0.20072,20.011738,18.0,38.0
25%,0.140794,0.131564,0.353609,35.655167,29.0,466.5
50%,0.253715,0.252823,0.514049,51.130712,42.0,931.0
75%,0.370674,0.37304,0.648557,64.666258,54.0,1375.25
max,0.499989,0.498916,0.799658,79.997108,65.0,1797.0


In [8]:
df.dtypes

Theme                  object
Click Through Rate    float64
Conversion Rate       float64
Bounce Rate           float64
Scroll_Depth          float64
Age                     int64
Location               object
Session_Duration        int64
Purchases              object
Added_to_Cart          object
dtype: object

In [9]:
##Convert necessary columns to the numeric
numeric_cols=['Click Through Rate','Conversion Rate','Bounce Rate','Scroll_Depth','Age','Session_Duration']
for cols in numeric_cols:
    df[cols]=pd.to_numeric(df[cols], errors='coerce')
##Group the theme columns with respect to KPIs
theme_performance = df.groupby('Theme')[numeric_cols].mean()
theme_performance_sorted = theme_performance.sort_values(by='Conversion Rate', ascending=False)
print(theme_performance_sorted)



             Click Through Rate  Conversion Rate  Bounce Rate  Scroll_Depth  \
Theme                                                                         
Light Theme            0.247109         0.255459     0.499035     50.735232   
Dark Theme             0.264501         0.251282     0.512115     49.926404   

                   Age  Session_Duration  
Theme                                     
Light Theme  41.734568        930.833333  
Dark Theme   41.332685        919.482490  


In [10]:
##From the above data we conclude that the light theme outperforms dark theme but the diffrencce is minor.

In [11]:
##Let start Hypithesis Testing
#For Conversion Rate
##Null Hypothesis: There is no difference between conversion rates between light and dark theme
##Alternative Hypothesis: There is a difference between conversion rates of light and dark theme


In [12]:
##Extract the conversion rate from both themes
conversion_rates_light=df[df['Theme']=='Light Theme']['Conversion Rate']
conversion_rates_dark=df[df['Theme']=='Dark Theme']['Conversion Rate']

t_stat, p_value=ttest_ind(conversion_rates_light, conversion_rates_dark, equal_var=False)
t_stat, p_value

(0.4748494462782632, 0.6349982678451778)

In [13]:
##Extract click through rates for bith themes
click_trhough_rates_light=df[df['Theme']=='Light Theme']['Click Through Rate']
click_trhough_rates_dark=df[df['Theme']=='Dark Theme']['Click Through Rate']

t_stat_ctr,p_value_ctr=ttest_ind(click_trhough_rates_light,click_trhough_rates_dark, equal_var=False)
t_stat_ctr,p_value_ctr

(-1.9781708664172253, 0.04818435371010704)

In [14]:
#extracting bounce rates for both themes
bounce_rates_light = df[df['Theme'] == 'Light Theme']['Bounce Rate']
bounce_rates_dark = df[df['Theme'] == 'Dark Theme']['Bounce Rate']
t_stat_bounce, p_value_bounce = ttest_ind(bounce_rates_light, bounce_rates_dark, equal_var=False)
t_stat_bounce, p_value_bounce

(-1.2018883310494073, 0.229692077505148)

In [16]:
# extracting scroll depths for both themes
scroll_depth_light = df[df['Theme'] == 'Light Theme']['Scroll_Depth']
scroll_depth_dark = df[df['Theme'] == 'Dark Theme']['Scroll_Depth']
t_stat_scroll, p_value_scroll = ttest_ind(scroll_depth_light, scroll_depth_dark, equal_var=False)
t_stat_scroll, p_value_scroll

(0.7562277864140986, 0.4496919249484911)

In [17]:
##Create a table for comparison
comparison_table=pd.DataFrame({
    'Metric': ['Click Through Rate', 'Conversion Rate', 'Bounce Rate', 'Scroll Depth'],
    'T-Statistic': [t_stat_ctr, t_stat, t_stat_bounce, t_stat_scroll],
    'P-Value': [p_value_ctr, p_value, p_value_bounce, p_value_scroll]
    
})

In [18]:
comparison_table

Unnamed: 0,Metric,T-Statistic,P-Value
0,Click Through Rate,-1.978171,0.048184
1,Conversion Rate,0.474849,0.634998
2,Bounce Rate,-1.201888,0.229692
3,Scroll Depth,0.756228,0.449692


In [19]:
##Click Through Rate: The test reveals a statistically significant difference, with the Dark Theme likely performing better (P-Value = 0.048).
#Conversion Rate: No statistically significant difference was found (P-Value = 0.635).
#Bounce Rate: There’s no statistically significant difference in Bounce Rates between the themes (P-Value = 0.230).
#Scroll Depth: Similarly, no statistically significant difference is observed in Scroll Depths (P-Value = 0.450).

In [20]:
##Therefore we conclude that Dark Theme provides for more User Engagement than Light theme