In [10]:
import pickle
import numpy as np
from scipy.stats import norm

# Short burst dataset

Test if 

* samples come from different distributions
* sample are from equal distributions

In [11]:
results_file = "./outputs/results_dataset_grb180703949.pkl"
results = pickle.load(open(results_file, 'rb'))
fluences = results["fluences"]
labels = list(results["true"].keys())

In [12]:
detection_summary = {}

for label in labels:
    correct_detections = np.where(results["true"][label]["significance"] > 0., 1, 0)
    total_correct = np.sum(np.where(results["true"][label]["significance"] > 0., 1, 0))
    tests_num = np.prod(correct_detections.shape)
    detection_summary[label] = (total_correct, tests_num)

In [13]:
detection_summary

{'Exhaustive': (19024, 30000),
 'FOCuS': (18877, 30000),
 'FOCuS-AES': (18913, 30000),
 'GBM': (16400, 30000),
 'BATSE': (14100, 30000)}

We consider the hypothesis test in which:

* Null hypothesis: the binomial distribution are statistically the same.
* Alternative hypothesis: The binomial distributions are statistically different.

Under the normal-binomial approximation (legit, given the sample size), the test statistic:

In [14]:
def test_statistics(x1, n1, x2, n2):
    p1 = x1 / n1
    p2 = x2 / n2
    p = (n1*p1 + n2*p2) / (n1 + n2)
    ts = np.abs(p1 - p2)/np.sqrt(p * (1 - p) * (1/n1 + 1/n2))
    return ts

In [15]:
label_1 = 'FOCuS-AES'
label_2 = 'Exhaustive'
z = test_statistics(
    *detection_summary[label_1],
    *detection_summary[label_2]
)
pvalue = 1 - (norm.sf(-z) - norm.sf(z))
print("test statistic value between {} and {}: {:.2f}".format(label_1, label_2, z))
print("p value is {:.5f}".format(pvalue))

test statistic value between FOCuS-AES and Exhaustive: 0.94
p value is 0.34732


We can not refuse the null hypothesis within $95\%$ confidence level since:

$$z < \frac{z_\alpha}{2} = 1.96$$.

In [16]:
label_1 = 'FOCuS'
label_2 = 'Exhaustive'
z = test_statistics(
    *detection_summary[label_1],
    *detection_summary[label_2]
)
pvalue = 1 - (norm.sf(-z) - norm.sf(z))
print("test statistic value between {} and {}: {:.2f}".format(label_1, label_2, z))
print("p value is {:.5f}".format(pvalue))

test statistic value between FOCuS and Exhaustive: 1.24
p value is 0.21344


# Long burst dataset

In [17]:
folder ="20230413_160258_long/"
results_file = "./outputs/results_dataset_grb120707800.pkl"
results = pickle.load(open(results_file, 'rb'))
fluences = results["fluences"]
labels = list(results["true"].keys())

In [18]:
detection_summary = {}

for label in labels:
    correct_detections = np.where(results["true"][label]["significance"] > 0., 1, 0)
    total_correct = np.sum(np.where(results["true"][label]["significance"] > 0., 1, 0))
    tests_num = np.prod(correct_detections.shape)
    detection_summary[label] = (total_correct, tests_num)

In [19]:
detection_summary

{'Exhaustive': (25001, 30000),
 'FOCuS': (25072, 30000),
 'FOCuS-AES': (21626, 30000),
 'GBM': (18668, 30000),
 'BATSE': (13360, 30000)}

In [20]:
def test_statistics(x1, n1, x2, n2):
    p1 = x1 / n1
    p2 = x2 / n2
    p = (n1*p1 + n2*p2) / (n1 + n2)
    ts = np.abs(p1 - p2)/np.sqrt(p * (1 - p) * (1/n1 + 1/n2))
    return ts

FOCuS-AES and exhaustive performed very differently over the long grb dataset.

In [21]:
label_1 = 'FOCuS-AES'
label_2 = 'Exhaustive'
z = test_statistics(
    *detection_summary[label_1],
    *detection_summary[label_2]
)
pvalue = 1 - (norm.sf(-z) - norm.sf(z))
print("test statistic value between {} and {}: {:.2f}".format(label_1, label_2, z))
print("p value is {:.5f}".format(pvalue))

test statistic value between FOCuS-AES and Exhaustive: 33.11
p value is 0.00000


The same cannot be said for FOCuS with true background estimate.

In [22]:
label_1 = 'FOCuS'
label_2 = 'Exhaustive'
z = test_statistics(
    *detection_summary[label_1],
    *detection_summary[label_2]
)
print("test statistic value between {} and {}: {:.2f}".format(label_1, label_2, z))

test statistic value between FOCuS and Exhaustive: 0.78
