# Compare results of linear regression model and machine learning model on set data

In [1]:
import numpy as np
import scipy.stats

In [2]:
# (6, 8, 10) colors
# Number of samples
n = np.array((10347, 10371, 1705))
# Accuracy of linear regression model
p1 = np.array((0.53861, 0.54758, 0.54604))
# Accuracy of machine learning model
p2 = np.array((0.58246, 0.57541, 0.57470))

In [3]:
def calc_p_value(n, p1, p2):
    # H0: p1 == p2
    # H1: p2 > p1
    # See table C1 of https://doi.org/10.1148/radiol.2263011500
    pc = (p1 + p2) / 2
    z = (p1 - p2) / np.sqrt(2 * pc * (1 - pc) / n)
    print("      z:", z)
    print("p value:", scipy.stats.norm.cdf(z))

In [4]:
# Six colors
calc_p_value(n[0], p1[0], p2[0])

      z: -6.354747668885115
p value: 1.0438451511418363e-10


In [5]:
# Eight colors
calc_p_value(n[1], p1[1], p2[1])

      z: -4.038762397198655
p value: 2.6866980401601672e-05


In [6]:
# Ten colors
calc_p_value(n[2], p1[2], p2[2])

      z: -1.6859406461735216
p value: 0.04590361602742372


In [7]:
# Across all set lengths
p1_avg = np.sum(np.array(n) * np.array(p1)) / np.sum(n)
p2_avg = np.sum(np.array(n) * np.array(p2)) / np.sum(n)
calc_p_value(np.sum(n), p1_avg, p2_avg)

      z: -7.528535765495648
p value: 2.5656231847287956e-14
