-
Notifications
You must be signed in to change notification settings - Fork 4
/
ResamplingUnderTheNullHypothesis.jl
111 lines (78 loc) · 2.17 KB
/
ResamplingUnderTheNullHypothesis.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# RESEAMPLING UNDER THE NULL HYPOTHESIS
## In this tutorial
# - Seeding the pseudo-random number generator for reproducible results
# - Creating a DataFrame
# - Sample from a set of categorical values using StatsBase
# - Sample from a normal distribution using Distributions
# - Making sub-DataFrames using the filter function (conditional selection)
# - Extracting vectors from DataFrames
# - Summary statistics of a continuous numerical variable using StatsBase
# - Histogram using PlotlyJS
# - Shuffle a vector
# - Calculate a p-value using HypothesisTests
## Packages
using DataFrames
using Random
using Distributions
using StatsBase
using PlotlyJS
using HypothesisTests
## Data
n = 1000 # Sample size
Random.seed!(12)
df = DataFrames.DataFrame(
ID = 1:n,
Group = StatsBase.sample(["I", "II"], n),
Mass = rand(Distributions.Normal(100, 10), n)
)
## Making sub-DataFrames
group_I = filter(row -> row.Group == "I", df)
group_II = filter(row -> row.Group == "II", df)
## Extracting vectors from a DataFrame
mass = collect(df.Mass)
mass_I = collect(group_I.Mass)
mass_II = collect(group_II.Mass)
n_I = length(mass_I)
n_II = length(mass_II)
## Descriptive statistics
StatsBase.describe(mass_I)
describe(mass_II)
## Data visualization
PlotlyJS.plot(
df,
x = :Mass,
color = :Group,
kind = "histogram",
marker = attr(
opacity = 0.5
),
Layout(
title = "Histogram of mass per group",
barmode = "overlay"
)
)
## Difference in means (test statistic)
difference_in_means = mean(mass_I) - mean(mass_II)
## Reassignment under the null hypothesis
means = []
resamples = 5000
for i in 1:resamples
shuffled_mass = Random.shuffle(mass)
new_group_I = shuffled_mass[1:n_I]
new_group_II = shuffled_mass[n_I + 1:n]
append!(means, mean(new_group_I) - mean(new_group_II))
end
PlotlyJS.plot(
[
histogram(
x = means,
opacity = 0.7
)
],
Layout(
title = "Distribution of means"
)
)
## Probability of difference in means
(sum(means .< difference_in_means) + sum(means .> -difference_in_means)) / resamples
HypothesisTests.pvalue(EqualVarianceTTest(mass_I, mass_II))