# Perform hypothesis test at 95% Interval

## KDDCUP99 XGB vs MLP

In [42]:
import numpy as np
from scipy.stats import ttest_ind,  mannwhitneyu
alpha = 0.05

# normal = rs_tuned 1
kdd_xgb_aucprc = np.array([0.9942, 0.9946, 0.9947, 0.9951,  0.9946, 0.9940,  0.9951,  0.9946, 0.9951, 0.9946])

# ros
kdd_mlp_aucprc = np.array([0.9841, 0.9864, 0.9866, 0.9873, 0.9851, 0.9855,  0.9868, 0.9866,0.9854,  0.9839])

print("t-test:")
t_stat, p_value = ttest_ind(kdd_xgb_aucprc, kdd_mlp_aucprc)
if p_value < alpha:
	print(f"Reject the null hypothesis: There is a significant difference between the models. p = {p_value}, alpha = {alpha}, t_stat = {t_stat}")
else:
    print(f"There is no significant difference between the models. p = {p_value}, alpha = {alpha},t_stat = {t_stat:.2f}")

print("\nmannwhitneyu:")

# Perform Mann-Whitney U test
u_stat, p_value_mannwhitney = mannwhitneyu(kdd_xgb_aucprc, kdd_mlp_aucprc, alternative='two-sided')
if p_value_mannwhitney < alpha:
    print(f"Reject the null hypothesis: There is a significant difference between the models (p = {p_value_mannwhitney}, u_stat = {u_stat:.2f}, n = {len(kdd_mlp_aucprc)}))")
else:
    print(f"There is no significant difference between the models (p = {p_value_mannwhitney}, u_stat = {u_stat}, n = {len(kdd_mlp_aucprc)})")

t-test:
Reject the null hypothesis: There is a significant difference between the models. p = 8.081915570690456e-15, alpha = 0.05, t_stat = 23.06951871198654

mannwhitneyu:
Reject the null hypothesis: There is a significant difference between the models (p = 0.00016780051625813792, u_stat = 100.00, n = 10))


## Credit Card XGB vs MLP

In [43]:
import numpy as np
from scipy.stats import ttest_ind, mannwhitneyu

alpha = 0.05

# normal = rs_tuned 1
cc_xgb_aucprc = np.array(
    [0.809, 0.797, 0.8706, 0.836, 0.8944, 0.8203, 0.9429, 0.9163, 0.775, 0.9342]
)

# ros
cc_mlp_aucprc = np.array(
    [0.7958, 0.808, 0.829, 0.7729, 0.8489, 0.7772, 0.9161, 0.8855, 0.7484, 0.8998]
)
print("t-test:")
t_stat, p_value = ttest_ind(cc_xgb_aucprc, cc_mlp_aucprc)
if p_value < alpha:
    print(f"Reject the null hypothesis: There is a significant difference between the models (p = {p_value:.2f}, alpha = {alpha}, t_stat = {t_stat:.2f}, n = {len(cc_mlp_aucprc)}))")
else:
    print(f"There is no significant difference between the models (p = {p_value:.2f}, alpha = {alpha}, t_stat = {t_stat:.2f}, n = {len(cc_mlp_aucprc)})")

print("\nmannwhitneyu:")

# Perform Mann-Whitney U test
u_stat, p_value_mannwhitney = mannwhitneyu(cc_xgb_aucprc, cc_mlp_aucprc, alternative='two-sided')
if p_value_mannwhitney < alpha:
    print(f"Reject the null hypothesis: There is a significant difference between the models (p = {p_value_mannwhitney:.2f}, u_stat = {u_stat:.2f}, alpha = {alpha}, n = {len(cc_mlp_aucprc)}))")
else:
    print(f"There is no significant difference between the models (p = {p_value_mannwhitney}, u_stat = {u_stat}, alpha = {alpha}, n = {len(cc_mlp_aucprc)})")

t-test:
There is no significant difference between the models (p = 0.25, alpha = 0.05, t_stat = 1.19, n = 10)

mannwhitneyu:
There is no significant difference between the models (p = 0.21229383619233166, u_stat = 67.0, alpha = 0.05, n = 10)
