In [9]:
# import libraries
import pandas as pd
import numpy as np
from scipy import stats

In [10]:
# Read the CSV file
df = pd.read_csv('fittrack_data_3months.csv')

In [11]:
df.head()

Unnamed: 0,Date,User_ID,Has_QuickLog,Logged_Workout
0,2024-01-31,1,0,0
1,2024-01-31,2,0,1
2,2024-01-31,3,0,1
3,2024-01-31,4,0,1
4,2024-01-31,5,0,0


In [16]:
# check the count values in column Has_QuickLog
df.Has_QuickLog.value_counts()

Has_QuickLog
0    450000
1    450000
Name: count, dtype: int64

In [17]:
# check the count values in column Logged_Workout
df.Logged_Workout.value_counts()

Logged_Workout
1    547683
0    352317
Name: count, dtype: int64

In [24]:
# check the data type of the date column
type(df.Date[0])

str

In [25]:
# Convert Date to datetime
df['Date'] = pd.to_datetime(df['Date'])

In [26]:
type(df.Date[0])

pandas._libs.tslibs.timestamps.Timestamp

In [27]:
# Group data by date and whether the user has QuickLog
grouped = df.groupby(['Date', 'Has_QuickLog'])['Logged_Workout'].sum().unstack()

In [29]:
grouped.head()

Has_QuickLog,0,1
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-01-31,2370,2970
2024-02-01,2416,2985
2024-02-02,2506,3009
2024-02-03,1636,1992
2024-02-04,1633,2075


In [30]:
# Calculate daily active users for each group
results_a = grouped[0]  # Control group (without QuickLog)
results_b = grouped[1]  # Treatment group (with QuickLog)

In [33]:
# double check
results_a.head()

Date
2024-01-31    2370
2024-02-01    2416
2024-02-02    2506
2024-02-03    1636
2024-02-04    1633
Name: 0, dtype: int64

In [34]:
# double check
results_b.head()

Date
2024-01-31    2970
2024-02-01    2985
2024-02-02    3009
2024-02-03    1992
2024-02-04    2075
Name: 1, dtype: int64

In [35]:
# Calculate average daily active users
avg_dau_a = np.mean(results_a)
avg_dau_b = np.mean(results_b)

print(f"Average DAU for group A (without quick log): {avg_dau_a:.2f}")
print(f"Average DAU for group B (with quick log): {avg_dau_b:.2f}")

Average DAU for group A (without quick log): 2702.06
Average DAU for group B (with quick log): 3383.31


**Lets use statistics to evaluate:**

**t-statistic:** Measures the size of the difference relative to the variation in your sample data.It is used to determine whether there is a statistically significant difference between the means of the two groups.


**p-value:** Indicates the probability of observing such an extreme difference (or more extreme) if the null hypothesis were true. A lower p-value indicates stronger evidence against the null hypothesis, suggesting that the observed difference is statistically significant.

In [42]:
# Perform statistical test (t-test)
t_statistic, p_value = stats.ttest_ind(results_a, results_b)

print(f"T-statistic: {t_statistic:.4f}")
print(f"P-value: {p_value:.4e}")

T-statistic: -7.2872
P-value: 9.9151e-12


**Interpretation:**

Given the **t-statistic of -7.2872** and the very small **p-value of 9.9151e-12**, we can conclude that there is a **statistically significant difference** between the means of results_a and results_b.

The negative sign of the t-statistic (-7.2872) indicates that the mean of results_a is significantly lower than the mean of results_b.

Therefore, based on these results, **we reject the null hypothesis and accept the alternative hypothesis** that there is indeed a significant difference between the groups represented by results_a and results_b.

In [43]:
# Interpret results using code
alpha = 0.05  # Significance level
if p_value < alpha:
    print("The difference is statistically significant.")
    if avg_dau_b > avg_dau_a:
        print("The quick log button improved daily active users.")
    else:
        print("The quick log button decreased daily active users.")
else:
    print("The difference is not statistically significant.")

The difference is statistically significant.
The quick log button improved daily active users.


In [44]:
# Calculate and print the percentage improvement
percent_improvement = ((avg_dau_b - avg_dau_a) / avg_dau_a) * 100
print(f"Percentage improvement: {percent_improvement:.2f}%")

Percentage improvement: 25.21%
