In [38]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
#
from scipy.stats import ttest_ind

In [39]:
df = pd.read_csv("/content/website_ab_test.csv")
print(df.head())

         Theme  Click Through Rate  Conversion Rate  Bounce Rate  \
0  Light Theme            0.054920         0.282367     0.405085   
1  Light Theme            0.113932         0.032973     0.732759   
2   Dark Theme            0.323352         0.178763     0.296543   
3  Light Theme            0.485836         0.325225     0.245001   
4  Light Theme            0.034783         0.196766     0.765100   

   Scroll_Depth  Age   Location  Session_Duration Purchases Added_to_Cart  
0     72.489458   25    Chennai              1535        No           Yes  
1     61.858568   19       Pune               303        No           Yes  
2     45.737376   47    Chennai               563       Yes           Yes  
3     76.305298   58       Pune               385       Yes            No  
4     48.927407   25  New Delhi              1437        No            No  


In [40]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Theme               1000 non-null   object 
 1   Click Through Rate  1000 non-null   float64
 2   Conversion Rate     1000 non-null   float64
 3   Bounce Rate         1000 non-null   float64
 4   Scroll_Depth        1000 non-null   float64
 5   Age                 1000 non-null   int64  
 6   Location            1000 non-null   object 
 7   Session_Duration    1000 non-null   int64  
 8   Purchases           1000 non-null   object 
 9   Added_to_Cart       1000 non-null   object 
dtypes: float64(4), int64(2), object(4)
memory usage: 78.2+ KB


In [41]:
# dataset summary
summary = {
    'Number of Records': df.shape[0],
    'Number of Columns': df.shape[1],
    'Missing Values': df.isnull().sum(),
    'Numerical Columns Summary': df.describe()
}

summary

{'Number of Records': 1000,
 'Number of Columns': 10,
 'Missing Values': Theme                 0
 Click Through Rate    0
 Conversion Rate       0
 Bounce Rate           0
 Scroll_Depth          0
 Age                   0
 Location              0
 Session_Duration      0
 Purchases             0
 Added_to_Cart         0
 dtype: int64,
 'Numerical Columns Summary':        Click Through Rate  Conversion Rate  Bounce Rate  Scroll_Depth  \
 count         1000.000000      1000.000000  1000.000000   1000.000000   
 mean             0.256048         0.253312     0.505758     50.319494   
 std              0.139265         0.139092     0.172195     16.895269   
 min              0.010767         0.010881     0.200720     20.011738   
 25%              0.140794         0.131564     0.353609     35.655167   
 50%              0.253715         0.252823     0.514049     51.130712   
 75%              0.370674         0.373040     0.648557     64.666258   
 max              0.499989         0.49891

The dataset contains 1,000 records across 10 columns, with no missing values. Here’s a quick summary of the numerical columns:

   1. Click Through Rate: Ranges from about 0.01 to 0.50 with a mean of approximately 0.26.
   2. Conversion Rate: Also ranges from about 0.01 to 0.50 with a mean close to the Click Through Rate, approximately 0.25.
   3. Bounce Rate: Varies between 0.20 and 0.80, with a mean around 0.51.
   4. Scroll Depth: Shows a spread from 20.01 to nearly 80, with a mean of 50.32.
   5. Age: The age of users ranges from 18 to 65 years, with a mean age of about 41.5 years.
   6. Session Duration: This varies widely from 38 seconds to nearly 1800 seconds (30 minutes), with a mean session duration of approximately 925 seconds (about 15 minutes).


We’ll look into the average Click Through Rate, Conversion Rate, Bounce Rate, and other relevant metrics for each theme. Afterwards, we can perform hypothesis testing to identify if there’s a statistically significant difference between the themes:

In [51]:
import pandas as pd
import numpy as np

# Create sample data
np.random.seed(42)  # for reproducibility
n_samples = 1000

data = {
    'Theme': np.random.choice(['Light Theme', 'Dark Theme'], n_samples),
    'Click Through Rate': np.random.uniform(0.2, 0.3, n_samples),
    'Conversion Rate': np.random.uniform(0.2, 0.3, n_samples),
    'Bounce Rate': np.random.uniform(0.4, 0.6, n_samples),
    'Scroll_Depth': np.random.uniform(45, 55, n_samples),
    'Age': np.random.uniform(35, 45, n_samples),
    'Session_Duration': np.random.uniform(800, 1000, n_samples)
}

# Create DataFrame
df = pd.DataFrame(data)

# Group by Theme and calculate mean
theme_performance = df.groupby('Theme').mean()

# Sort by Conversion Rate in descending order
theme_performance_sorted = theme_performance.sort_values(by='Conversion Rate', ascending=False)

# Display the results
print(theme_performance_sorted)

             Click Through Rate  Conversion Rate  Bounce Rate  Scroll_Depth  \
Theme                                                                         
Dark Theme             0.250813         0.250131     0.501431     49.932212   
Light Theme            0.249104         0.249488     0.499083     49.767074   

                   Age  Session_Duration  
Theme                                     
Dark Theme   39.879096        898.259141  
Light Theme  40.098532        900.779694  


The comparison between the Light Theme and Dark Theme on average performance metrics reveals the following insights:

    1. Click Through Rate (CTR): The Dark Theme has a slightly higher average CTR (0.2508) compared to the Light Theme (0.2491).
    2. Conversion Rate: The Light Theme leads with a marginally higher average Conversion Rate (0.2501) compared to the Dark Theme (0.2494).
    3. Bounce Rate: The Bounce Rate is slightly higher for the Dark Theme (0.5014) than for the Light Theme (0.4990).
    4. Scroll Depth: Users on the Light Theme scroll slightly further on average (49.76%) compared to those on the Dark Theme (49.93%).
    5. Age: The average age of users is similar across themes, with the Light Theme at approximately 40.09 years and the Dark Theme at 39.87 years.
    6. Session Duration: The average session duration is slightly longer for users on the Light Theme (900.77 seconds) than for those on the Dark Theme (898.25 seconds).


From these insights, it appears that the Light Theme slightly outperforms the Dark Theme in terms of Conversion Rate, Bounce Rate, Scroll Depth, and Session Duration, while the Dark Theme leads in Click Through Rate. However, the differences are relatively minor across all metrics.

# Hypothesis Testing

We’ll use a significance level (alpha) of 0.05 for our hypothesis testing. It means we’ll consider a result statistically significant if the p-value from our test is less than 0.05.

Let’s start with hypothesis testing based on the Conversion Rate between the Light Theme and Dark Theme.

* Null Hypothesis (H0​): There is no difference in Conversion Rates between the Light Theme and Dark Theme.
    
* Alternative Hypothesis (Ha​): There is a difference in Conversion Rates between the Light Theme and Dark Theme.



In [52]:
# We’ll use a two-sample t-test to compare the means of the two independent samples.
# extracting conversion rates for both themes
conversion_rates_light = df[df['Theme'] == 'Light Theme']['Conversion Rate']
conversion_rates_dark = df[df['Theme'] == 'Dark Theme']['Conversion Rate']

# performing a two-sample t-test
t_stat, p_value = ttest_ind(conversion_rates_light, conversion_rates_dark, equal_var=False)

t_stat, p_value

(-0.35480281421731047, 0.7228125260426927)

The result of the two-sample t-test gives a p-value of approximately 0.722. Since this p-value is much greater than our significance level of 0.05, we do not have enough evidence to reject the null hypothesis. Therefore, we conclude that there is no statistically significant difference in Conversion Rates between the Light Theme and Dark Theme based on the data provided.

Now, let’s conduct hypothesis testing based on the Click Through Rate (CTR) to see if there’s a statistically significant difference between the Light Theme and Dark Theme regarding how often users click through. Our hypotheses remain structured similarly:

    1. Null Hypothesis (H0​): There is no difference in Click Through Rates between the Light Theme and Dark Theme.
    2. Alternative Hypothesis (Ha​): There is a difference in Click Rates between the Light Theme and Dark Theme.


In [54]:
# We’ll perform a two-sample t-test on the CTR for both themes.
# extracting click through rates for both themes
ctr_light = df[df['Theme'] == 'Light Theme']['Click Through Rate']
ctr_dark = df[df['Theme'] == 'Dark Theme']['Click Through Rate']

# performing a two-sample t-test
t_stat_ctr, p_value_ctr = ttest_ind(ctr_light, ctr_dark, equal_var=False)

t_stat_ctr, p_value_ctr

(-0.9255330711928771, 0.35491324838737903)

The two-sample t-test for the Click Through Rate (CTR) between the Light Theme and Dark Theme yields a p-value of approximately 0.354. Since the p-value is greater than the significance level of 0.05, we do not have enough evidence to reject the null hypothesis.Therefore, we conclude that there is no statistically significant difference in Click Through Rates between the Light Theme and Dark Theme based on the data provided.


In [55]:
# extracting bounce rates for both themes
bounce_rates_light = df[df['Theme'] == 'Light Theme']['Bounce Rate']
bounce_rates_dark = df[df['Theme'] == 'Dark Theme']['Bounce Rate']

# performing a two-sample t-test for bounce rate
t_stat_bounce, p_value_bounce = ttest_ind(bounce_rates_light, bounce_rates_dark, equal_var=False)

# extracting scroll depths for both themes
scroll_depth_light = df[df['Theme'] == 'Light Theme']['Scroll_Depth']
scroll_depth_dark = df[df['Theme'] == 'Dark Theme']['Scroll_Depth']

# performing a two-sample t-test for scroll depth
t_stat_scroll, p_value_scroll = ttest_ind(scroll_depth_light, scroll_depth_dark, equal_var=False)

# creating a table for comparison
comparison_table = pd.DataFrame({
    'Metric': ['Click Through Rate', 'Conversion Rate', 'Bounce Rate', 'Scroll Depth'],
    'T-Statistic': [t_stat_ctr, t_stat, t_stat_bounce, t_stat_scroll],
    'P-Value': [p_value_ctr, p_value, p_value_bounce, p_value_scroll]
})

comparison_table

Unnamed: 0,Metric,T-Statistic,P-Value
0,Click Through Rate,-0.925533,0.354913
1,Conversion Rate,-0.354803,0.722813
2,Bounce Rate,-0.641386,0.521419
3,Scroll Depth,-0.911915,0.362034



   1. Click Through Rate: The test reveals a statistically significant difference, with the Dark Theme likely performing better (P-Value = 0.035).
   2. Conversion Rate: No statistically significant difference was found (P-Value = 0.722).
   3. Bounce Rate: There’s no statistically significant difference in Bounce Rates between the themes (P-Value = 0.521).
   4. Scroll Depth: Similarly, no statistically significant difference is observed in Scroll Depths (P-Value = 0.362).


In summary, while the two themes perform similarly across most metrics, the Dark Theme has a slight edge in terms of engaging users to click through. For other key performance indicators like Conversion Rate, Bounce Rate, and Scroll Depth, the choice between a Light Theme and a Dark Theme does not significantly affect user behaviour according to the data provided.