-
Notifications
You must be signed in to change notification settings - Fork 0
/
friedman.jl
148 lines (122 loc) · 5.54 KB
/
friedman.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
export average_ranks, ChisqFriedmanTest, FDistFriedmanTest, FriedmanTest
using HypothesisTests: HypothesisTest, tiedrank_adj
FRIEDMAN_DOC = """
Test the null hypothesis that `n` repeated observations of a set of `k` treatments
have the same distribution across all treatments. These observations are arranged
in `k` vectors `x_i` of `n` observations each or in an `(n, k)`-shaped matrix `X`.
""" # documentation that is shared across subtypes of FriedmanTest
FRIEDMAN_KWARGS_DOC = """
# Keyword arguments
- `maximize_outcome=false` specifies whether the ranks represent a maximization or a
minimization of the outcomes.
"""
"""
FriedmanTest(x_1, x_2, ..., x_k; kwargs...) = FDistFriedmanTest(x_1, x_2, ..., x_k; kwargs...)
FriedmanTest(X; kwargs...) = FDistFriedmanTest(X; kwargs...)
$(FRIEDMAN_DOC)
The default version of this test, the `FDistFriedmanTest`, uses an F-distributed statistic.
**See also:** `FDistFriedmanTest`, `ChisqFriedmanTest`
$(FRIEDMAN_KWARGS_DOC)
"""
abstract type FriedmanTest <: HypothesisTest end
"""
ChisqFriedmanTest(x_1, x_2, ..., x_k; kwargs...)
ChisqFriedmanTest(X; kwargs...)
$(FRIEDMAN_DOC)
This version of the `FriedmanTest` uses a χ²-distributed statistic.
$(FRIEDMAN_KWARGS_DOC)
"""
struct ChisqFriedmanTest <: FriedmanTest
F::Float64 # test statistic
df::Int # number of degrees of freedom
r::Vector{Float64} # average ranks
n::Int # number of observations per treatment
k::Int # number of treatments
maximize_outcome::Bool # which optimization the ranks represent
end
"""
FDistFriedmanTest(x_1, x_2, ..., x_k; kwargs...)
FDistFriedmanTest(X; kwargs...)
$(FRIEDMAN_DOC)
This version of the `FriedmanTest` uses an F-distributed statistic.
$(FRIEDMAN_KWARGS_DOC)
"""
struct FDistFriedmanTest <: FriedmanTest
F::Float64 # test statistic
chisq::ChisqFriedmanTest
df_1::Int # degrees of freedom (first dimension)
df_2::Int # second dimension
end
FriedmanTest(X::AbstractMatrix{T}; kwargs...) where T <: Real = FDistFriedmanTest(X; kwargs...)
FriedmanTest(x::AbstractVector{T}...; kwargs...) where T <: Real = FDistFriedmanTest(x...; kwargs...)
function ChisqFriedmanTest(X::AbstractMatrix{T}; maximize_outcome::Bool=false) where T <: Real
n = size(X, 1)
k = size(X, 2)
if k < 3
throw(ArgumentError("The Friedman test requires at least 3 treatments; only $k were given"))
end
r = mean(rank_with_average_ties(X, maximize_outcome); dims=1) # average rank of each method
return ChisqFriedmanTest(
12*n/(k*(k+1)) * sum((r .- (k+1)/2).^2),
k-1,
vec(r), # convert row-matrix to vector
n,
k,
maximize_outcome
) # test statistic with k-1 degrees of freedom
end
ChisqFriedmanTest(x::AbstractVector{T}...; kwargs...) where T <: Real =
ChisqFriedmanTest(hcat(x...); kwargs...)
function FDistFriedmanTest(X::AbstractMatrix{T}; kwargs...) where T <: Real
chisq = ChisqFriedmanTest(X; kwargs...)
return FDistFriedmanTest(
(chisq.n - 1) * chisq.F / (chisq.n * (chisq.k - 1) - chisq.F),
chisq,
chisq.k-1,
(chisq.k-1)*(chisq.n-1)
) # test statistic with k-1 and (k-1)*(n-1) degrees of freedom
end
FDistFriedmanTest(x::AbstractVector{T}...) where T <: Real =
FDistFriedmanTest(hcat(x...))
function rank_with_average_ties(X::AbstractMatrix{T}, maximize_outcome::Bool) where T <: Real
R = zeros(size(X))
@inbounds for i in 1:size(X, 1)
R[i, :], _ = tiedrank_adj(
maximize_outcome ? -1 .* X[i, :] : X[i, :]
) # ranking for the i-th observation, wrt maximization or minimization
end
return R
end
"""
average_ranks(x) where x <: FriedmanTest
Return the average ranks of methods in the `FriedmanTest`.
"""
average_ranks(x::FDistFriedmanTest) = x.chisq.r
average_ranks(x::ChisqFriedmanTest) = x.r
HypothesisTests.pvalue(x::FDistFriedmanTest) = pvalue(FDist(x.df_1, x.df_2), x.F; tail=:right)
HypothesisTests.pvalue(x::ChisqFriedmanTest) = pvalue(Chisq(x.df), x.F; tail=:right)
HypothesisTests.testname(::FDistFriedmanTest) = "Friedman test with F-statistic"
HypothesisTests.testname(::ChisqFriedmanTest) = "Friedman test with χ²-statistic"
HypothesisTests.population_param_of_interest(x::FriedmanTest) =
("Average ranks of treatments", "all equal", NaN) # = (name, value under h0, point estimate)
HypothesisTests.default_tail(test::FriedmanTest) = :right
function HypothesisTests.show_params(io::IO, x::FDistFriedmanTest, indent)
println(io, indent, "number of treatments: ", x.chisq.k)
println(io, indent, "number of observations per treatment: ", x.chisq.n)
println(io, indent, "F-statistic: ", x.F)
println(io, indent, "optimization target of the ranks: ", x.chisq.maximize_outcome ? "maximization" : "minimization")
print(io, indent, "average ranks: ")
show(io, x.chisq.r)
println(io)
println(io, indent, "number of degrees of freedom: ", x.df_1, ", ", x.df_2)
end
function HypothesisTests.show_params(io::IO, x::ChisqFriedmanTest, indent)
println(io, indent, "number of treatments: ", x.k)
println(io, indent, "number of observations per treatment: ", x.n)
println(io, indent, "χ²-statistic: ", x.F)
println(io, indent, "optimization target of the ranks: ", x.maximize_outcome ? "maximization" : "minimization")
print(io, indent, "average ranks: ")
show(io, x.r)
println(io)
println(io, indent, "number of degrees of freedom: ", x.df)
end