# **Hypothesis Testing**

In [11]:
import numpy as np
import pandas as pd
from scipy import stats

## **Z-Test**

### **Z-Test with one mean**

In [12]:
pop_mean = 100
pop_sd = 15

visitors = np.random.randint(100, size=55)
visitors

array([79, 30, 69, 96, 24, 13,  9, 33, 71,  1, 44,  2, 27, 60, 24, 55, 45,
       11, 81,  4, 59, 40, 51, 56, 95, 87, 11, 82, 44, 52,  7, 10, 92, 75,
       16, 56, 43, 31, 80,  4, 66, 27, 50, 77, 15, 55, 13, 63, 91, 42, 27,
       30, 44, 11, 85], dtype=int32)

In [13]:
# Calculate the sameple mean and size
sample_mean = np.mean(visitors)
sample_size = len(visitors)

# Calculate z-xcore
z_score_calculated = (sample_mean - pop_mean)/(pop_sd / np.sqrt(sample_size))
print(z_score_calculated)

-27.28262110101253


In [14]:
alpha = 0.05
z_critical = stats.norm.ppf(1 - alpha)
print(z_critical)

1.6448536269514722


In [15]:
print("Calculated Z-Score:", z_score_calculated)
print("Critical Z-Score:", z_critical)

Calculated Z-Score: -27.28262110101253
Critical Z-Score: 1.6448536269514722


In [16]:
if(np.absolute(z_score_calculated) > np.absolute(z_critical)):
  print("Reject Null Hypothesis")
else:
  print("Failed to Reject Null Hypothesis")

Reject Null Hypothesis


**Conclusion:** Reject Null Hypothesis as the absolute value of calculated Z-Score is greater than absolute value of critical Z-Score.

### **Z-Test with two means**

In [17]:
camp1_data = np.random.randint(100, size=55)
camp2_data = np.random.randint(100, size=55)

# Calculate sample mean and standard deviation for campaign 1
camp1_mean = np.mean(camp1_data)
camp1_std = np.std(camp1_data)
n1 = len(camp1_data)

# Calculate sample mean and standard deviation for campaign 2
camp2_mean = np.mean(camp2_data)
camp2_std = np.std(camp2_data)
n2 = len(camp2_data)

# Calculate the z-score
z_score_calculated = (camp1_mean - camp2_mean) / np.sqrt(((camp1_std ** 2) / n1) + ((camp2_std ** 2) / n2))

alpha = 0.05

# Get the critical value
z_critical = stats.norm.ppf(1 - alpha)

print(f"Campaign 1 Mean: {camp1_mean}")
print(f"Campaign 2 Mean: {camp2_mean}")
print(f"Z-Score: {z_score_calculated}")
print(f"Critical Z-Value: {z_critical}")

# Conclusion based on comparing Z-score with critical Z-value
if np.absolute(z_score_calculated) > np.absolute(z_critical):
    print("Reject the null hypothesis.")
else:
    print("Fail to reject the null hypothesis.")


Campaign 1 Mean: 51.27272727272727
Campaign 2 Mean: 47.92727272727273
Z-Score: 0.6733532329440863
Critical Z-Value: 1.6448536269514722
Fail to reject the null hypothesis.


**Conclusion:** We reject the null hypothesis is the absolute value of Z-Score is greater than the absolute critical value of Z-Score.

## **T-Test**

### **T-Test with single mean**

In [18]:
visitors = np.random.randint(100, size=69)
visitors

array([ 1,  3, 20, 16, 23,  2,  1, 19, 69, 57, 73, 65, 14, 82, 75, 71, 11,
       95, 67, 17, 43, 14, 17, 62, 20,  2, 58, 18, 97, 70, 81, 58, 38, 20,
        8,  9, 61, 73, 57, 23, 54, 58, 99, 66, 31, 22,  3, 42, 12, 34, 41,
       71, 52, 80, 58, 41, 60, 33, 89, 49, 90,  3, 57,  5, 37,  4,  1, 57,
       88], dtype=int32)

In [19]:
pop_mean = 150
sample_mean = np.mean(visitors)
sample_std = np.std(visitors, ddof=1)

n= len(visitors)
df = n-1

t_calc, _ = stats.ttest_1samp(visitors, pop_mean)

alpha = 0.05
t_critical = stats.t.ppf(1 - alpha, df)

In [20]:
print("Calculated T-Score:", t_calc)
print("Critical T-Score:", t_critical)

Calculated T-Score: -30.33627515587886
Critical T-Score: 1.667572280796708


In [21]:
if abs(t_calc) > t_critical:
  print("Reject Null Hypothesis")
else:
  print("Accept Null Hypothesis")

Reject Null Hypothesis


**Conclusion:** We reject the null hypothesis as the absolute calculated value of T-Score is greater than critical value of T-Score

### **T-Test with two means**

In [22]:
data = {
    'Gender': ['Female', 'Male', 'Female', 'Female', 'Male', 'Male', 'Male', 'Male', 'Female', 'Female', 'Male', 'Male',
               'Female', 'Male', 'Female', 'Female', 'Female', 'Male', 'Male', 'Female', 'Female', 'Male', 'Male', 'Male',
               'Male', 'Female', 'Female', 'Male', 'Male', 'Male', 'Female', 'Female', 'Female', 'Female', 'Female',
               'Male', 'Male', 'Female', 'Female', 'Male'],
    'Marks': [77, 89, 89, 91, 76, 85, 79, 78, 93, 88, 91, 77, 81, 88, 86, 80, 82, 95, 87, 83, 79, 94, 84, 73, 85, 85, 78, 88, 81, 82, 84, 86, 75, 83, 87, 80, 90, 90, 92, 92]
}
df = pd.DataFrame(data)
print(df)

# Separate the data into two groups
male_marks = df[df['Gender'] == 'Male']['Marks']
female_marks = df[df['Gender'] == 'Female']['Marks']

print(male_marks)
print(female_marks)

mean_male = male_marks.mean()
mean_female = female_marks.mean()

std_male = male_marks.std(ddof=1)
std_female = female_marks.std(ddof=1)

n_male = len(male_marks)
n_female = len(female_marks)

pooled_variance = ((std_male ** 2) / n_male) + ((std_female ** 2) / n_female)

t_statistic = (mean_male - mean_female) / np.sqrt(pooled_variance)

df = ((std_male ** 2 / n_male + std_female ** 2 / n_female) ** 2) / \
     ((std_male ** 2 / n_male) ** 2 / (n_male - 1) + (std_female ** 2 / n_female) ** 2 / (n_female - 1))

alpha = 0.05

t_critical = stats.t.ppf(1 - alpha / 2, df)

# Display results
print(f"Male Group Mean: {mean_male}")
print(f"Female Group Mean: {mean_female}")
print(f"T-Statistic: {t_statistic}")
print(f"Degrees of Freedom: {df}")
print(f"Critical T-Value: {t_critical}")

# Conclusion
if abs(t_statistic) > t_critical:
    print("Reject the null hypothesis.")
else:
    print("Fail to reject the null hypothesis.")


    Gender  Marks
0   Female     77
1     Male     89
2   Female     89
3   Female     91
4     Male     76
5     Male     85
6     Male     79
7     Male     78
8   Female     93
9   Female     88
10    Male     91
11    Male     77
12  Female     81
13    Male     88
14  Female     86
15  Female     80
16  Female     82
17    Male     95
18    Male     87
19  Female     83
20  Female     79
21    Male     94
22    Male     84
23    Male     73
24    Male     85
25  Female     85
26  Female     78
27    Male     88
28    Male     81
29    Male     82
30  Female     84
31  Female     86
32  Female     75
33  Female     83
34  Female     87
35    Male     80
36    Male     90
37  Female     90
38  Female     92
39    Male     92
1     89
4     76
5     85
6     79
7     78
10    91
11    77
13    88
17    95
18    87
21    94
22    84
23    73
24    85
27    88
28    81
29    82
35    80
36    90
39    92
Name: Marks, dtype: int64
0     77
2     89
3     91
8     93
9     88
12    81
14

**Conclusion**: We failed to reject the null hypothesis as the absolute calculated T-Score is less than absolute critical T-Score

## **F-Score**

In [23]:
import pandas as pd
from scipy.stats import f, f_oneway

data = {
    "Shipping Option": ["Standard", "Express", "Same-Day", "Standard", "Express", "Same-Day","Standard", "Express", "Same-Day"],
    "Purchase Amounts": [50, 70, 90, 55, 75, 85, 60, 80, 95]
}
df = pd.DataFrame(data)

standard_shipping = df[df["Shipping Option"] == "Standard"]["Purchase Amounts"]
express_shipping = df[df["Shipping Option"] == "Express"]["Purchase Amounts"]
same_day_shipping = df[df["Shipping Option"] == "Same-Day"]["Purchase Amounts"]


t = 3
n = len(df)

df_between = t - 1  
df_within = n - t  

f_statistic, _ = f_oneway(standard_shipping, express_shipping, same_day_shipping)
alpha = 0.05
f_critical = stats.f.ppf(1 - alpha, df_between, df_within)


print(f"F-Statistic: {f_statistic}")
print(f"F-Critical Value: {f_critical}")

if f_statistic > f_critical:
    print("Reject the null hypothesis: There are significant differences between the group means.")
else:
    print("Fail to reject the null hypothesis: There are no significant differences between the group means.")


F-Statistic: 37.00000000000006
F-Critical Value: 5.143252849784718
Reject the null hypothesis: There are significant differences between the group means.


**Conclusion**: We reject the null hypothesis concluding that there are significant differences between the group means.