In [None]:
# Import Libraries and Load Data

In [None]:
# 🔍 Z-Test 1: One-tailed test to check if average Income > 50,000
import pandas as pd
from scipy import stats
from statsmodels.stats.weightstats import ztest
import numpy as np

# Load the dataset
df = pd.read_csv('synthetic_customer_data.csv')

In [None]:
# 🔍 Z-Test 2: Two-tailed test to check if average Age ≠ 40
# Z-Tests

In [None]:
# 🔍 Z-Test 3: One-tailed test to check if average Spending Score Post < 100
z_stat, p_val = ztest(df['Income'], value=50000, alternative='larger')
print(f"Z-Test 1 (Income > 50k): Z = {z_stat:.2f}, p = {p_val:.4f}")

# Summary: Reject H0 if p < 0.05, otherwise fail to reject.
if p_val < 0.05:
    print('➡️ Reject the null hypothesis.')
else:
    print('➡️ Fail to reject the null hypothesis.')

In [None]:
# 🔍 T-Test 1: One-sample, one-tailed test to check if average Age > 35
z_stat, p_val = ztest(df['Age'], value=40, alternative='two-sided')
print(f"Z-Test 2 (Age ≠ 40): Z = {z_stat:.2f}, p = {p_val:.4f}")

# Summary: Reject H0 if p < 0.05, otherwise fail to reject.
if p_val < 0.05:
    print('➡️ Reject the null hypothesis.')
else:
    print('➡️ Fail to reject the null hypothesis.')

In [None]:
# 🔍 T-Test 2: One-sample, two-tailed test to check if average Income ≠ 60,000
z_stat, p_val = ztest(df['Spending_Score_post'], value=100, alternative='smaller')
print(f"Z-Test 3 (Spending Score Post < 100): Z = {z_stat:.2f}, p = {p_val:.4f}")

# Summary: Reject H0 if p < 0.05, otherwise fail to reject.
if p_val < 0.05:
    print('➡️ Reject the null hypothesis.')
else:
    print('➡️ Fail to reject the null hypothesis.')

In [None]:
# 🔍 T-Test 3: One-sample, one-tailed test to check if Purchase Frequency < 20
# One-Sample T-Tests

In [None]:
# 🔍 T-Test 4: Independent two-sample, one-tailed test if Male Income > Female Income
t_stat, p_val = stats.ttest_1samp(df['Age'], popmean=35)
print(f"T-Test 1 (Age > 35): t = {t_stat:.2f}, p = {p_val/2:.4f}")

# Summary: Reject H0 if p < 0.05, otherwise fail to reject.
if p_val < 0.05:
    print('➡️ Reject the null hypothesis.')
else:
    print('➡️ Fail to reject the null hypothesis.')

In [None]:
# 🔍 T-Test 5: Paired t-test to check if Spending Score changed (Pre vs Post)
t_stat, p_val = stats.ttest_1samp(df['Income'], popmean=60000)
print(f"T-Test 2 (Income ≠ 60k): t = {t_stat:.2f}, p = {p_val:.4f}")

# Summary: Reject H0 if p < 0.05, otherwise fail to reject.
if p_val < 0.05:
    print('➡️ Reject the null hypothesis.')
else:
    print('➡️ Fail to reject the null hypothesis.')

In [None]:
# 🔍 T-Test 6: Independent two-sample, two-tailed test between Satisfaction Level 4 vs 1
t_stat, p_val = stats.ttest_1samp(df['Purchase_Frequency'], popmean=20)
print(f"T-Test 3 (Purchase Frequency < 20): t = {t_stat:.2f}, p = {p_val/2:.4f}")

# Summary: Reject H0 if p < 0.05, otherwise fail to reject.
if p_val < 0.05:
    print('➡️ Reject the null hypothesis.')
else:
    print('➡️ Fail to reject the null hypothesis.')

In [None]:
# 🔍 T-Test 7: Independent two-sample, two-tailed test between Young vs Senior Purchase Frequency
# Independent and Paired T-Tests

In [None]:
# 🔍 Chi-Square Test 1: Test independence of Customer Satisfaction and Education Level
male_income = df[df['Gender'] == 'Male']['Income']
female_income = df[df['Gender'] == 'Female']['Income']
t_stat, p_val = stats.ttest_ind(male_income, female_income)
print(f"2-Sample T-Test 1 (Male > Female Income): t = {t_stat:.2f}, p = {p_val/2:.4f}")

# Summary: Reject H0 if p < 0.05, otherwise fail to reject.
if p_val < 0.05:
    print('➡️ Reject the null hypothesis.')
else:
    print('➡️ Fail to reject the null hypothesis.')

In [None]:
# 🔍 Chi-Square Test 2: Test independence of Gender and Purchase Frequency Group
t_stat, p_val = stats.ttest_rel(df['Spending_Score_pre'], df['Spending_Score_post'])
print(f"Paired T-Test 2 (Pre vs Post Spending): t = {t_stat:.2f}, p = {p_val:.4f}")

# Summary: Reject H0 if p < 0.05, otherwise fail to reject.
if p_val < 0.05:
    print('➡️ Reject the null hypothesis.')
else:
    print('➡️ Fail to reject the null hypothesis.')

In [None]:
# 🔍 Chi-Square Test 3: Test independence of Age Group and Customer Satisfaction
s1 = df[df['Customer_Satisfaction'] == 4]['Income']
s2 = df[df['Customer_Satisfaction'] == 1]['Income']
t_stat, p_val = stats.ttest_ind(s1, s2)
print(f"2-Sample T-Test 3 (Satisfaction 4 vs 1 Income): t = {t_stat:.2f}, p = {p_val:.4f}")

# Summary: Reject H0 if p < 0.05, otherwise fail to reject.
if p_val < 0.05:
    print('➡️ Reject the null hypothesis.')
else:
    print('➡️ Fail to reject the null hypothesis.')

In [None]:
# 🔍 ANOVA 1: Test if Purchase Frequency differs by Education Level
# Extra Independent T-Test: Young vs Senior

In [None]:
# 🔍 ANOVA 2: Test if Spending Score Post differs across Age Groups
df['Age_Group'] = pd.cut(df['Age'], bins=[0, 30, 50, 100], labels=['Young', 'Middle', 'Senior'])
young_group = df[df['Age_Group'] == 'Young']['Purchase_Frequency']
senior_group = df[df['Age_Group'] == 'Senior']['Purchase_Frequency']
t_stat, p_val = stats.ttest_ind(young_group, senior_group, equal_var=False)
print(f"2-Sample T-Test 4 (Young vs Senior Purchase Frequency): t = {t_stat:.2f}, p = {p_val:.4f}")

# Summary: Reject H0 if p < 0.05, otherwise fail to reject.
if p_val < 0.05:
    print('➡️ Reject the null hypothesis.')
else:
    print('➡️ Fail to reject the null hypothesis.')

In [None]:
# 🔍 ANOVA 3: Test if Income differs across Customer Satisfaction levels
# Chi-Square Tests