In [5]:
import numpy as np
import pandas as pd

import statsmodels.api as sm
from statsmodels.formula.api import ols
from scipy.stats import ttest_ind, kstest
from statsmodels.api import stats

# Q1. Advertising strategies

Suppose you conducted an experiment to investigate the impact of two different advertising strategies (A and B) on the click-through rates of a website.

After running the experiment, you found that the click-through count for Strategy A was 150, and for Strategy B, it was 200.

Perform a hypothesis test to determine if the difference in click-through rates between Strategy A and Strategy B is statistically significant. Use a significance level of 0.05. Assume 1000 users were exposed to each strategy.

In [6]:
# Number of users exposed to each strategy
n_A = n_B = 1000

# Number of users who clicked through for each strategy
clicks_A = 150
clicks_B = 200

# Proportions for each strategy
prop_A = clicks_A / n_A
prop_B = clicks_B / n_B


# Perform two-sample proportion z-test
z_stat, p_value = stats.proportions_ztest([clicks_A, clicks_B], [n_A, n_B], alternative='two-sided')

# Print the results
print(f"Z-statistic: {z_stat}")
print(f"P-value: {p_value}")

# Interpret the results
alpha = 0.05
if p_value < alpha:
   print("Reject the null hypothesis. There is a significant difference in click-through rates between Strategy A and Strategy B.")
else:
   print("Fail to reject the null hypothesis. There is no significant difference in click-through rates between Strategy A and Strategy B.")


Z-statistic: -2.9424494316825
P-value: 0.0032562696284120576
Reject the null hypothesis. There is a significant difference in click-through rates between Strategy A and Strategy B.


# Q2. Age group vs Coffee Type

A coffee shop wants to understand the relationship between the age group ('20-30','31-40', '41-50') and their preferred coffee type (Light Roast, Medium Roast, Dark Roast) based on data collected from a local Coffee Barista.

Dataset: [Data](./coffee.csv)

Sample data:

![](https://d2beiqkhq929f0.cloudfront.net/public_assets/assets/000/059/328/original/Screenshot_2023-12-13_at_6.46.05_PM.png?1702473375)


Do age groups and coffee types interact to influence the number of orders?

Conduct an appropriate hypothesis test to determine the interaction effects of the relationship at a 5% significance level.

In [3]:
df = pd.read_csv('coffee.csv')
df.head()

Unnamed: 0,Age_Group,Coffee_Type,Number_of_Orders
0,20-30,Light Roast,45
1,20-30,Medium Roast,60
2,20-30,Dark Roast,30
3,31-40,Light Roast,35
4,31-40,Medium Roast,40


In [4]:
# Two Way Anova

# fit an ols model on the data frame
# use 'fit()' to fit the linear model
# ols('dependent variable ~ C(independent variable1) * C(independent variable2)', data=df).fit()
test = ols('Number_of_Orders ~ C(Age_Group) * C(Coffee_Type)', data=df).fit()

# create a table for a 2-way ANOVA test
# Pass the linear model 'test'
# 'typ = 2' performs two-way ANOVA
anova_table = sm.stats.anova_lm(test, typ = 2)

# Display the results
# Significance level is 0.05
print(anova_table)

                                 sum_sq   df         F    PR(>F)
C(Age_Group)                 686.777778  2.0  5.104046  0.032992
C(Coffee_Type)               143.444444  2.0  1.066061  0.384141
C(Age_Group):C(Coffee_Type)  493.888889  4.0  1.835260  0.206332
Residual                     605.500000  9.0       NaN       NaN
