#  Case: Opportunity cost

## Setup

In [1]:
import pandas as pd
import numpy as np
import scipy.stats as st

import altair as alt

alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

## Data

In [2]:
df = pd.read_csv("https://raw.githubusercontent.com/kirenz/datasets/master/opportunity_cost.csv")
df.head()

Unnamed: 0,group,decision
0,control,buy video
1,control,buy video
2,control,buy video
3,control,buy video
4,control,buy video


In [3]:
df_control = df[(df["group"] == "control")]
df_treatment = df[(df["group"] == "treatment")]

control_total = len(df_control)
treatment_total = len(df_treatment)

control_not_buy = len(df_control[df_control['decision']=="not buy video"])
treatment_not_buy = len(df_treatment[df_treatment['decision']=="not buy video"])

control_p = round(control_not_buy / control_total, 3)
treatment_p = round(treatment_not_buy / treatment_total, 3)

p_diff_ob = round(treatment_p - control_p, 3)

df_p_diff_ob = pd.DataFrame({'p_diff': [p_diff_ob] })
df_p_diff_ob

Unnamed: 0,p_diff
0,0.2


In [4]:
n1 = control_total
n2 = treatment_total
p1 = 0.2
p2 = 0.0

se = np.sqrt(((p1 * (1 - p1)) / n1) + ((p2 * (1 - p2)) / n2))
se

0.046188021535170064

The standard error, $SE = 0.046,$ is the equivalent of the model's standard deviation

## Z score hypothesis test

In [5]:
z = (p1 - p2) / se
z

4.330127018922194

Determine the right tail area

In [7]:
p_value = 1 - st.norm.cdf(z)
p_value

7.451167896244115e-06

In [12]:
f"{p_value:8f}"

'0.000007'

In [13]:
def significance(p_value):
    if p_value <= 0.05:
        return "Reject Null Hypothesis"
    else:
        return "Accept Null Hypothesis"

In [14]:
significance(p_value)

'Reject Null Hypothesis'

Using this area as the p-value, we see that the p-value is less than 0.05, we conclude that the treatment did indeed impact students’ spending
