In [4]:
import numpy as np
from scipy import stats

tables = {
    # DQ Issues
    "Inconsistent Learning": np.array([[5, 0], [24, 1]]),
    "Reduced Model Capacity": np.array([[4, 0], [24, 2]]),
    "Gradient Instability": np.array([[7, 0], [20, 3]]),
    "Gradient Skewness": np.array([[7, 0], [20, 23]]),
    "Overfitting": np.array([[4, 0], [24, 2]]),
    "Vanishing Gradients": np.array([[5, 0], [22, 3]]),
    "Poor Optimization": np.array([[8, 0], [18, 4]]),
    "Slow Convergence": np.array([[10, 0], [18, 2]]),
    "Incorrect Learning": np.array([[13, 0], [15, 2]]),
    # Preprocessing Issues
    "Abnormal Weight Distribution": np.array([[2, 0], [26, 2]]),
    "Sparse Parameter Updates": np.array([[7, 0], [21, 2]]),
    "Numerical Instability": np.array([[10, 0], [17, 3]]),
    "Skewed Bias Distributions": np.array([[13, 0], [14, 3]]),
    "Exploding Gradients": np.array([[8, 0], [18, 4]]),
    "High Variance in Weight Distribution": np.array([[13, 0], [13, 4]])
}

def calculate_statistics(table):
    #
    ep = 0.01
    b = table[0, 1] + ep
    c = table[1, 0] + ep

    statistic = (abs(b - c) - 1)**2 / (b + c)
    p_value = stats.chi2.sf(statistic, df=1)

    # Odds ratio
    a = table[0, 0] + ep
    d = table[1, 1]  + ep
    odds_ratio = (a/b) / (c/d)

    return statistic, p_value, odds_ratio

print("Statistical Test for DQ and Preprocessing Issues:")
print("-" * 80)
print(f"{'Scenario':<35} {'McNemar Stat':>15} {'p-value':>15} {'Odds Ratio':>15}")
print("-" * 80)

for name, table in tables.items():
    statistic, p_value, odds_ratio = calculate_statistics(table)
    print(f"{name:<35} {statistic:>15.4f} {p_value:>15.4f} {odds_ratio:>15.4f}")

Statistical Test for DQ and Preprocessing Issues:
--------------------------------------------------------------------------------
Scenario                               McNemar Stat         p-value      Odds Ratio
--------------------------------------------------------------------------------
Inconsistent Learning                       22.0233          0.0000         21.0750
Reduced Model Capacity                      22.0233          0.0000         33.5698
Gradient Instability                        18.0320          0.0000        105.4478
Gradient Skewness                           18.0320          0.0000        806.0975
Overfitting                                 22.0233          0.0000         33.5698
Vanishing Gradients                         20.0272          0.0000         68.5148
Poor Optimization                           16.0377          0.0001        178.3459
Slow Convergence                            16.0377          0.0001        111.7163
Incorrect Learning              