-
Notifications
You must be signed in to change notification settings - Fork 224
/
chi_squared_goodness_of_fit.js
103 lines (94 loc) · 4.78 KB
/
chi_squared_goodness_of_fit.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import chiSquaredDistributionTable from "./chi_squared_distribution_table.js";
import mean from "./mean.js";
/**
* The [χ2 (Chi-Squared) Goodness-of-Fit Test](http://en.wikipedia.org/wiki/Goodness_of_fit#Pearson.27s_chi-squared_test)
* uses a measure of goodness of fit which is the sum of differences between observed and expected outcome frequencies
* (that is, counts of observations), each squared and divided by the number of observations expected given the
* hypothesized distribution. The resulting χ2 statistic, `chiSquared`, can be compared to the chi-squared distribution
* to determine the goodness of fit. In order to determine the degrees of freedom of the chi-squared distribution, one
* takes the total number of observed frequencies and subtracts the number of estimated parameters. The test statistic
* follows, approximately, a chi-square distribution with (k − c) degrees of freedom where `k` is the number of non-empty
* cells and `c` is the number of estimated parameters for the distribution.
*
* @param {Array<number>} data
* @param {Function} distributionType a function that returns a point in a distribution:
* for instance, binomial, bernoulli, or poisson
* @param {number} significance
* @returns {number} chi squared goodness of fit
* @example
* // Data from Poisson goodness-of-fit example 10-19 in William W. Hines & Douglas C. Montgomery,
* // "Probability and Statistics in Engineering and Management Science", Wiley (1980).
* var data1019 = [
* 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
* 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
* 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
* 2, 2, 2, 2, 2, 2, 2, 2, 2,
* 3, 3, 3, 3
* ];
* ss.chiSquaredGoodnessOfFit(data1019, ss.poissonDistribution, 0.05); //= false
*/
function chiSquaredGoodnessOfFit(data, distributionType, significance) {
// Estimate from the sample data, a weighted mean.
const inputMean = mean(data);
// Calculated value of the χ2 statistic.
let chiSquared = 0;
// Number of hypothesized distribution parameters estimated, expected to be supplied in the distribution test.
// Lose one degree of freedom for estimating `lambda` from the sample data.
const c = 1;
// The hypothesized distribution.
// Generate the hypothesized distribution.
const hypothesizedDistribution = distributionType(inputMean);
const observedFrequencies = [];
const expectedFrequencies = [];
// Create an array holding a histogram from the sample data, of
// the form `{ value: numberOfOcurrences }`
for (let i = 0; i < data.length; i++) {
if (observedFrequencies[data[i]] === undefined) {
observedFrequencies[data[i]] = 0;
}
observedFrequencies[data[i]]++;
}
// The histogram we created might be sparse - there might be gaps
// between values. So we iterate through the histogram, making
// sure that instead of undefined, gaps have 0 values.
for (let i = 0; i < observedFrequencies.length; i++) {
if (observedFrequencies[i] === undefined) {
observedFrequencies[i] = 0;
}
}
// Create an array holding a histogram of expected data given the
// sample size and hypothesized distribution.
for (const k in hypothesizedDistribution) {
if (k in observedFrequencies) {
expectedFrequencies[+k] = hypothesizedDistribution[k] * data.length;
}
}
// Working backward through the expected frequencies, collapse classes
// if less than three observations are expected for a class.
// This transformation is applied to the observed frequencies as well.
for (let k = expectedFrequencies.length - 1; k >= 0; k--) {
if (expectedFrequencies[k] < 3) {
expectedFrequencies[k - 1] += expectedFrequencies[k];
expectedFrequencies.pop();
observedFrequencies[k - 1] += observedFrequencies[k];
observedFrequencies.pop();
}
}
// Iterate through the squared differences between observed & expected
// frequencies, accumulating the `chiSquared` statistic.
for (let k = 0; k < observedFrequencies.length; k++) {
chiSquared +=
Math.pow(observedFrequencies[k] - expectedFrequencies[k], 2) /
expectedFrequencies[k];
}
// Calculate degrees of freedom for this test and look it up in the
// `chiSquaredDistributionTable` in order to
// accept or reject the goodness-of-fit of the hypothesized distribution.
// Degrees of freedom, calculated as (number of class intervals -
// number of hypothesized distribution parameters estimated - 1)
const degreesOfFreedom = observedFrequencies.length - c - 1;
return (
chiSquaredDistributionTable[degreesOfFreedom][significance] < chiSquared
);
}
export default chiSquaredGoodnessOfFit;