In [3]:
import numpy as np
import pandas as pd
from scipy.stats import shapiro, kstest, anderson, normaltest, skew, kurtosis

# Generate normally distributed age data
age = np.random.normal(loc=40, scale=10, size=1000)

# Generate normally distributed savings_amount data
savings_amount = np.random.normal(loc=50000, scale=10000, size=1000)

# Create a DataFrame
df = pd.DataFrame({
    'Age': age,
    'SavingsAmount': savings_amount
})

df


Unnamed: 0,Age,SavingsAmount
0,42.079630,46745.969581
1,63.790518,36531.299916
2,23.799048,59862.622681
3,26.203016,50119.359177
4,55.488219,51942.992305
...,...,...
995,41.045186,44243.445209
996,50.303197,41593.025575
997,42.753055,42804.080771
998,34.426948,36341.481447


In [5]:
# Shapiro-Wilk Test
stat, p = shapiro(df['Age'])
print('Shapiro-Wilk Test: Statistics=%.3f, p=%.3f' % (stat, p))

# Kolmogorov-Smirnov Test
stat, p = kstest(df['Age'], 'norm')
print('Kolmogorov-Smirnov Test: Statistics=%.3f, p=%.3f' % (stat, p))

# Anderson-Darling Test
result = anderson(df['Age'])
print('Anderson-Darling Test: Statistics=%.3f' % (result.statistic))

# D'Agostino and Pearson's Test
stat, p = normaltest(df['Age'])
print('D\'Agostino and Pearson\'s Test: Statistics=%.3f, p=%.3f' % (stat, p))

# Skewness and Kurtosis
s = skew(df['Age'])
k = kurtosis(df['Age'])
print('Skewness=%.3f, Kurtosis=%.3f' % (s, k))


Shapiro-Wilk Test: Statistics=0.999, p=0.909
Kolmogorov-Smirnov Test: Statistics=1.000, p=0.000
Anderson-Darling Test: Statistics=0.238
D'Agostino and Pearson's Test: Statistics=0.990, p=0.609
Skewness=-0.076, Kurtosis=-0.028


In [None]:
# NULL Hypothesis: The data is normally distributed 
# In the above tests, if the p-value is < 0.05, we can reject the NULL Hypothesis. 