In [3]:
import pandas as pd
import numpy as np
from scipy import stats
from sklearn.datasets import load_iris
import seaborn as sns
import matplotlib.pyplot as plt

# Load the Iris dataset
iris = sns.load_dataset('iris')
print(iris.head())

   sepal_length  sepal_width  petal_length  petal_width species
0           5.1          3.5           1.4          0.2  setosa
1           4.9          3.0           1.4          0.2  setosa
2           4.7          3.2           1.3          0.2  setosa
3           4.6          3.1           1.5          0.2  setosa
4           5.0          3.6           1.4          0.2  setosa


In [4]:
# Z-Test
sample = iris['sepal_length']
mu = 5.8  # Hypothetical population mean
sigma = sample.std(ddof=0)  # Population std
n = len(sample)
sample_mean = sample.mean()
z = (sample_mean - mu) / (sigma / np.sqrt(n))
p = 2 * (1 - stats.norm.cdf(abs(z)))
print(f"Z-Statistic = {z:.4f}, p-value = {p:.4f}")

Z-Statistic = 0.6431, p-value = 0.5202


In [5]:
# T-Test (One-Sample)
t_stat, p_val = stats.ttest_1samp(iris['sepal_width'], 3.0)
print(f"One-Sample T-Test:\nT-Statistic = {t_stat:.4f}, p-value = {p_val:.4f}")

One-Sample T-Test:
T-Statistic = 1.6110, p-value = 0.1093


In [6]:
# T-Test (Two-Sample)
setosa = iris[iris['species'] == 'setosa']['petal_length']
versicolor = iris[iris['species'] == 'versicolor']['petal_length']
t_stat, p_val = stats.ttest_ind(setosa, versicolor)
print(f"Two-Sample T-Test:\nT-Statistic = {t_stat:.4f}, p-value = {p_val:.4f}")

Two-Sample T-Test:
T-Statistic = -39.4927, p-value = 0.0000


In [7]:
# ANOVA
grouped = [group['sepal_width'].values for name, group in iris.groupby('species')]
f_stat, p_val = stats.f_oneway(*grouped)
print(f"ANOVA:\nF-Statistic = {f_stat:.4f}, p-value = {p_val:.4f}")

ANOVA:
F-Statistic = 49.1600, p-value = 0.0000


In [9]:
# Chi-Square Test
#Convert petal_length into categories
iris['petal_length_cat'] = pd.cut(iris['petal_length'], bins=3, labels=['Short', 'Medium', 'Long'])
#Create contigency table
contingency_table = pd.crosstab(iris['species'], iris['petal_length_cat'])
#Chi-Square Test
chi2, p, dof, expected = stats.chi2_contingency(contingency_table)
print(f"Chi-Square Test:\nChi2 = {chi2:.4f}, p-value = {p:.4f}, Degrees of Freedom = {dof}")

Chi-Square Test:
Chi2 = 256.5217, p-value = 0.0000, Degrees of Freedom = 4


In [10]:
# F-Test
setosa_var = iris[iris['species'] == 'setosa']['sepal_length']
virginica_var = iris[iris['species'] == 'virginica']['sepal_length']
f = np.var(setosa_var, ddof=1) / np.var(virginica_var, ddof=1)
df1 = len(setosa_var) - 1
df2 = len(virginica_var) - 1
p = 1 - stats.f.cdf(f, df1, df2)
print(f"F-Test:\nF-Ratio = {f:.4f}, p-value = {p:.4f}")

F-Test:
F-Ratio = 0.3073, p-value = 1.0000
