In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
from statsmodels.stats.weightstats import ztest


from method import hypothesis_result, val_print

In [2]:
df = pd.read_csv('synthetic_customer_data.csv')
df

Unnamed: 0,Customer_ID,Age,Gender,Income,Education_Level,Spending_Score_pre,Spending_Score_post,Customer_Satisfaction,Purchase_Frequency
0,1,56,Male,49504,PhD,-68.815035,16.846098,4,3
1,2,46,Male,63209,PhD,225.243581,131.759754,2,26
2,3,32,Male,58315,High School,98.176549,-100.654257,1,1
3,4,60,Male,56685,PhD,-32.483138,113.987856,2,11
4,5,25,Male,69212,PhD,-249.940571,131.711507,1,16
...,...,...,...,...,...,...,...,...,...
495,496,37,Male,66851,Master's,32.579632,166.147017,3,29
496,497,41,Male,68546,Master's,19.438430,2.088649,2,6
497,498,29,Female,66715,PhD,-35.316629,23.070091,3,16
498,499,52,Male,69640,Bachelor's,33.848384,-126.016477,5,27


In [3]:
import pandas as pd

# Dictionary
p_val_dict = {
    "p > 0.10": "No or very weak evidence",
    "0.05 < p ≤ 0.10": "Weak evidence",
    "0.01 < p ≤ 0.05": "Moderate evidence",
    "0.001 < p ≤ 0.01": "Strong evidence",
    "p ≤ 0.001": "Very strong or extreme evidence"
}

# Convert dictionary to a DataFrame
p_val_df = pd.DataFrame(list(p_val_dict.items()), columns=["P-Value Range", "Evidence Strength"])

# Display DataFrame
p_val_df


Unnamed: 0,P-Value Range,Evidence Strength
0,p > 0.10,No or very weak evidence
1,0.05 < p ≤ 0.10,Weak evidence
2,0.01 < p ≤ 0.05,Moderate evidence
3,0.001 < p ≤ 0.01,Strong evidence
4,p ≤ 0.001,Very strong or extreme evidence


## Z-Tests (Population Mean Known or Large Sample)

#### You are testing whether the average customer income is greater than $50,000.


##### ------------------------------- Income Senerio ----------------------------------------------------------------

In [4]:
import pprint

In [5]:
def refrence(key):
    cal_reference = {
                        "z_test":{
                                    "parameters" :"sample_mean, std_dev,n(sample_data len) ",
                                    "z_value" : "(sample_mean - pop_mean) / (std_dev / np.sqrt(n))",
                                    "p_value_two_tailed" : "2 * (1 - stats.norm.cdf(abs(z_value))) [H₁: μ ≠ 30]",
                                    "p_value_right_tailed" : "1 - stats.norm.cdf(z_value)  [H₁: μ > 30]",
                                    "p_value_left_tailed" : "stats.norm.cdf(z_value)  [H₁: μ < 30]"
                                }
                            
                    }
    # print (cal_reference)
    return cal_reference.get(key)
refrence('z_test')

{'parameters': 'sample_mean, std_dev,n(sample_data len) ',
 'z_value': '(sample_mean - pop_mean) / (std_dev / np.sqrt(n))',
 'p_value_two_tailed': '2 * (1 - stats.norm.cdf(abs(z_value))) [H₁: μ ≠ 30]',
 'p_value_right_tailed': '1 - stats.norm.cdf(z_value)  [H₁: μ > 30]',
 'p_value_left_tailed': 'stats.norm.cdf(z_value)  [H₁: μ < 30]'}

In [6]:
## Z-Test 1.1: One-tailed test to check if average Income > 50,000

Ho = "Income > 50,000"

z_stat, p_val = ztest(df['Income'], value=50000, alternative='larger')
print(f"z_stat = {z_stat:.2f}, p_val = {p_val:.4f}")

hypothesis_result(p=p_val, Ho=Ho)


z_stat = 16.02, p_val = 0.0000
Very strong/extreme evidence Highly significant — very strong support for rejecting H₀ [Income > 50,000]
Very strong support for H₁ (p <= 0.001)


In [7]:
Ho = "Income > 50,000"

sample_data = df['Income']
sample_mean = sample_data.mean()
std = sample_data.std()
pop_mean = 50000
n = len(sample_data)

z_value = (sample_mean - pop_mean) / (std/np.sqrt(n))
z_value

# greater than > 50000
right_tail = 1 - stats.norm.cdf(z_value)
right_tail
val_print(z_value,right_tail)
hypothesis_result(right_tail,Ho=Ho)

stat: 16.02, p_val: 0.0000
Very strong/extreme evidence Highly significant — very strong support for rejecting H₀ [Income > 50,000]
Very strong support for H₁ (p <= 0.001)


In [8]:
## Z-Test 1.2: One-tailed test to check if average Income < 50,000

Ho = "Income < 50,000"

z_stat, p_val = ztest(df['Income'], value=50000, alternative='smaller')
print(f"z_stat = {z_stat:.2f}, p_val = {p_val:.4f}")

hypothesis_result(p=p_val, Ho=Ho)


z_stat = 16.02, p_val = 1.0000
No evidence: Fail to reject H₀ [Income < 50,000] — the result is not statistically significant
No support for H₁ (p > 0.10:)


In [9]:
Ho = "Income < 50,000"
sample_data = df['Income']
sample_mean = sample_data.mean()
std = sample_data.std()
pop_mean = 50000
n = len(sample_data)

z_value = (sample_mean - pop_mean) / (std/np.sqrt(n))


# less than 50000
left_tail = stats.norm.cdf(z_value)

val_print(z_value, left_tail)
hypothesis_result(left_tail, Ho=Ho)

stat: 16.02, p_val: 1.0000
No evidence: Fail to reject H₀ [Income < 50,000] — the result is not statistically significant
No support for H₁ (p > 0.10:)


In [10]:
## Z-Test 1.3: Two-tailed test to check if average Income <> 50,000

Ho = "Income <> 50,000"

z_stat, p_val = ztest(df['Income'], value=50000, alternative='two-sided')
print(f"z_stat = {z_stat:.2f}, p_val = {p_val:.4f}")

hypothesis_result(p=p_val, Ho=Ho)


z_stat = 16.02, p_val = 0.0000
Very strong/extreme evidence Highly significant — very strong support for rejecting H₀ [Income <> 50,000]
Very strong support for H₁ (p <= 0.001)


In [11]:
Ho = "Income <>  50,000"
sample_data = df['Income']
sample_mean = sample_data.mean()
std = sample_data.std()
pop_mean = 50000
n = len(sample_data)

z_value = (sample_mean - pop_mean) / (std/np.sqrt(n))


# less than <> 50000
two_tail = 2 * (1 - stats.norm.cdf(abs(z_value)))

val_print(z_value, two_tail)
hypothesis_result(two_tail, Ho=Ho)

stat: 16.02, p_val: 0.0000
Very strong/extreme evidence Highly significant — very strong support for rejecting H₀ [Income <>  50,000]
Very strong support for H₁ (p <= 0.001)


In [12]:
## Z-Test 2: Two-tailed test to check if average Age ≠ 40

Ho = "Avg Age <> 40"
z_stat, p_val =  ztest(df['Age'], value =40, alternative='two-sided')
val_print(z_stat,p_val)

hypothesis_result(p=p_val, Ho=Ho)

stat: 2.13, p_val: 0.0328
Moderate evidence: Reject H₀ [Avg Age <> 40] — statistically significant at 5 percent level
Moderate support for H₁ (0.01 < p <= 0.05)


In [13]:
Ho = "Avg Age <> 40"
sample_data = df['Age']
sample_mean = sample_data.mean()
std = sample_data.std()
pop_mean = 40
n = len(sample_data)

z_value = (sample_mean - pop_mean)/ (std/np.sqrt(n))

smaller_tail = stats.norm.cdf(z_value)

greater_tail = 1 - smaller_tail

two_tail = 2 * (1 - (stats.norm.cdf(abs(z_value))))

hypothesis_result(p=two_tail,Ho= Ho )

Moderate evidence: Reject H₀ [Avg Age <> 40] — statistically significant at 5 percent level
Moderate support for H₁ (0.01 < p <= 0.05)


# T-Test

### # One-Sample T-Test: Check if average Income is significantly different from 60,000

In [14]:
Ho = "Average Income is <> 60000"
sample_data = df['Income']
pop_mean = 60000

t_stat, p_val = stats.ttest_1samp(sample_data, pop_mean)
val_print(t_stat,p_val)

hypothesis_result(p_val,Ho=Ho)

stat: 0.88, p_val: 0.3785
No evidence: Fail to reject H₀ [Average Income is <> 60000] — the result is not statistically significant
No support for H₁ (p > 0.10:)


#### In a two-sample independent T-test, we'll compare the mean Income between Male and Female customers to see if there's a significant difference.


In [15]:
Ho = "income comaprision between Male and Female"
male_income = df[df['Gender']=='Male']['Income']

female_income = df[df['Gender']=='Female']['Income']

t_stat , p_value = stats.ttest_ind(male_income,female_income)

val_print(t_stat,p_val)
hypothesis_result(p_val, Ho)


stat: -1.88, p_val: 0.3785
No evidence: Fail to reject H₀ [income comaprision between Male and Female] — the result is not statistically significant
No support for H₁ (p > 0.10:)


#### In a paired T-test, we compare the pre- and post-Spending_Score for each customer to see if there's a significant difference in spending scores before and after some intervention.

In [22]:
df

Unnamed: 0,Customer_ID,Age,Gender,Income,Education_Level,Spending_Score_pre,Spending_Score_post,Customer_Satisfaction,Purchase_Frequency
0,1,56,Male,49504,PhD,-68.815035,16.846098,4,3
1,2,46,Male,63209,PhD,225.243581,131.759754,2,26
2,3,32,Male,58315,High School,98.176549,-100.654257,1,1
3,4,60,Male,56685,PhD,-32.483138,113.987856,2,11
4,5,25,Male,69212,PhD,-249.940571,131.711507,1,16
...,...,...,...,...,...,...,...,...,...
495,496,37,Male,66851,Master's,32.579632,166.147017,3,29
496,497,41,Male,68546,Master's,19.438430,2.088649,2,6
497,498,29,Female,66715,PhD,-35.316629,23.070091,3,16
498,499,52,Male,69640,Bachelor's,33.848384,-126.016477,5,27


In [24]:
Ho = "Singnificane between pre and post spending"
pre_spending = df['Spending_Score_pre']
post_spending = df['Spending_Score_post']

t_stat, p_val = stats.ttest_rel(pre_spending,post_spending)
val_print(t_stat,p_val)

hypothesis_result(p_val,Ho)

stat: 0.98, p_val: 0.3295
No evidence: Fail to reject H₀ [Singnificane between pre and post spending] — the result is not statistically significant
No support for H₁ (p > 0.10:)


# Chi-Square Test Used for categorical data to test relationships between variables.

##### Are customer satisfaction and education level independent?

In [16]:
df.head()

Unnamed: 0,Customer_ID,Age,Gender,Income,Education_Level,Spending_Score_pre,Spending_Score_post,Customer_Satisfaction,Purchase_Frequency
0,1,56,Male,49504,PhD,-68.815035,16.846098,4,3
1,2,46,Male,63209,PhD,225.243581,131.759754,2,26
2,3,32,Male,58315,High School,98.176549,-100.654257,1,1
3,4,60,Male,56685,PhD,-32.483138,113.987856,2,11
4,5,25,Male,69212,PhD,-249.940571,131.711507,1,16


In [35]:
Ho = "Variable are Independent"
Customer_Satisfaction = df['Customer_Satisfaction']
Education_Level = df['Education_Level']
contingency = pd.crosstab(Customer_Satisfaction, Education_Level)
chi2, p_val , dof, expected = stats.chi2_contingency(contingency)
print(f"Chi-Square:{chi2:.2f}\nPvalue: {p_val:.4f}\nDegree od freedom: {dof}\nExpected Value\n{expected}")

hypothesis_result(p_val,Ho=Ho)

Chi-Square:20.40
Pvalue: 0.0598
Degree od freedom: 12
Expected Value
[[29.92  28.38  24.42  27.28 ]
 [25.568 24.252 20.868 23.312]
 [29.376 27.864 23.976 26.784]
 [25.568 24.252 20.868 23.312]
 [25.568 24.252 20.868 23.312]]
Weak evidence: Borderline result  H₀ [Variable are Independent] — might warrant further investigation
Weak support for H₁ (0.05 < p <= 0.10)


##### Are Gender and education level independent?

In [42]:
Ho = "Gender and education are not dependent"
Gender = df['Gender']
Education_Level = df['Education_Level']

contingency = pd.crosstab(Gender, Education_Level)

chi2, p_val, dof, expected = stats.chi2_contingency(contingency)
val_print(chi2,p_val)
hypothesis_result(p_val, Ho=Ho)

stat: 5.00, p_val: 0.1720
No evidence: Fail to reject H₀ [Gender and education are not dependent] — the result is not statistically significant
No support for H₁ (p > 0.10:)


# ANOVA (Analysis of Variance)
Used to compare 3 or more groups and see if there's a difference in means.

#### 1. Is there a difference in purchase frequency across different education levels?

In [None]:
Ho = "No difference in purchase frequency"


## Neumerical column
# [group[df.select_dtypes(include=(float,int)).columns] for name, group in df.groupby('Education_Level')]

## For specific Required Column
education_purchase = [group['Purchase_Frequency'] for name , group in df.groupby('Education_Level')]
education_purchase

[5      18
 8      10
 11     19
 12      4
 15     24
        ..
 479    24
 493     1
 494    22
 498    27
 499    20
 Name: Purchase_Frequency, Length: 136, dtype: int64,
 2       1
 10      6
 14     13
 19      6
 22     24
        ..
 485     7
 486     8
 487    13
 488     6
 490     2
 Name: Purchase_Frequency, Length: 129, dtype: int64,
 6      16
 9      18
 13     27
 24     14
 32      6
        ..
 481    27
 483     8
 491     8
 495    29
 496     6
 Name: Purchase_Frequency, Length: 111, dtype: int64,
 0       3
 1      26
 3      11
 4      16
 7      23
        ..
 478    25
 484    29
 489    18
 492    11
 497    16
 Name: Purchase_Frequency, Length: 124, dtype: int64]

In [70]:
f_stat, p_val = stats.f_oneway(*education_purchase)
val_print(f_stat,p_val)
hypothesis_result(p_val, Ho=Ho)

stat: 0.59, p_val: 0.6190
No evidence: Fail to reject H₀ [No difference in purchase frequency] — the result is not statistically significant
No support for H₁ (p > 0.10:)


## A/B Testing Data Type:
Usually compares proportions or means between two groups.