This repository has been archived by the owner on Jul 24, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 298
/
moments.lean
370 lines (310 loc) · 15.4 KB
/
moments.lean
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
/-
Copyright (c) 2022 Rémy Degenne. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Rémy Degenne
-/
import probability.variance
/-!
# Moments and moment generating function
> THIS FILE IS SYNCHRONIZED WITH MATHLIB4.
> Any changes to this file require a corresponding PR to mathlib4.
## Main definitions
* `probability_theory.moment X p μ`: `p`th moment of a real random variable `X` with respect to
measure `μ`, `μ[X^p]`
* `probability_theory.central_moment X p μ`:`p`th central moment of `X` with respect to measure `μ`,
`μ[(X - μ[X])^p]`
* `probability_theory.mgf X μ t`: moment generating function of `X` with respect to measure `μ`,
`μ[exp(t*X)]`
* `probability_theory.cgf X μ t`: cumulant generating function, logarithm of the moment generating
function
## Main results
* `probability_theory.indep_fun.mgf_add`: if two real random variables `X` and `Y` are independent
and their mgf are defined at `t`, then `mgf (X + Y) μ t = mgf X μ t * mgf Y μ t`
* `probability_theory.indep_fun.cgf_add`: if two real random variables `X` and `Y` are independent
and their mgf are defined at `t`, then `cgf (X + Y) μ t = cgf X μ t + cgf Y μ t`
* `probability_theory.measure_ge_le_exp_cgf` and `probability_theory.measure_le_le_exp_cgf`:
Chernoff bound on the upper (resp. lower) tail of a random variable. For `t` nonnegative such that
the cgf exists, `ℙ(ε ≤ X) ≤ exp(- t*ε + cgf X ℙ t)`. See also
`probability_theory.measure_ge_le_exp_mul_mgf` and
`probability_theory.measure_le_le_exp_mul_mgf` for versions of these results using `mgf` instead
of `cgf`.
-/
open measure_theory filter finset real
noncomputable theory
open_locale big_operators measure_theory probability_theory ennreal nnreal
namespace probability_theory
variables {Ω ι : Type*} {m : measurable_space Ω} {X : Ω → ℝ} {p : ℕ} {μ : measure Ω}
include m
/-- Moment of a real random variable, `μ[X ^ p]`. -/
def moment (X : Ω → ℝ) (p : ℕ) (μ : measure Ω) : ℝ := μ[X ^ p]
/-- Central moment of a real random variable, `μ[(X - μ[X]) ^ p]`. -/
def central_moment (X : Ω → ℝ) (p : ℕ) (μ : measure Ω) : ℝ := μ[(X - (λ x, μ[X])) ^ p]
@[simp] lemma moment_zero (hp : p ≠ 0) : moment 0 p μ = 0 :=
by simp only [moment, hp, zero_pow', ne.def, not_false_iff, pi.zero_apply, integral_const,
algebra.id.smul_eq_mul, mul_zero]
@[simp] lemma central_moment_zero (hp : p ≠ 0) : central_moment 0 p μ = 0 :=
by simp only [central_moment, hp, pi.zero_apply, integral_const, algebra.id.smul_eq_mul,
mul_zero, zero_sub, pi.pow_apply, pi.neg_apply, neg_zero, zero_pow', ne.def, not_false_iff]
lemma central_moment_one' [is_finite_measure μ] (h_int : integrable X μ) :
central_moment X 1 μ = (1 - (μ set.univ).to_real) * μ[X] :=
begin
simp only [central_moment, pi.sub_apply, pow_one],
rw integral_sub h_int (integrable_const _),
simp only [sub_mul, integral_const, algebra.id.smul_eq_mul, one_mul],
end
@[simp] lemma central_moment_one [is_probability_measure μ] : central_moment X 1 μ = 0 :=
begin
by_cases h_int : integrable X μ,
{ rw central_moment_one' h_int,
simp only [measure_univ, ennreal.one_to_real, sub_self, zero_mul], },
{ simp only [central_moment, pi.sub_apply, pow_one],
have : ¬ integrable (λ x, X x - integral μ X) μ,
{ refine λ h_sub, h_int _,
have h_add : X = (λ x, X x - integral μ X) + (λ x, integral μ X),
{ ext1 x, simp, },
rw h_add,
exact h_sub.add (integrable_const _), },
rw integral_undef this, },
end
lemma central_moment_two_eq_variance [is_finite_measure μ] (hX : mem_ℒp X 2 μ) :
central_moment X 2 μ = variance X μ :=
by { rw hX.variance_eq, refl, }
section moment_generating_function
variables {t : ℝ}
/-- Moment generating function of a real random variable `X`: `λ t, μ[exp(t*X)]`. -/
def mgf (X : Ω → ℝ) (μ : measure Ω) (t : ℝ) : ℝ := μ[λ ω, exp (t * X ω)]
/-- Cumulant generating function of a real random variable `X`: `λ t, log μ[exp(t*X)]`. -/
def cgf (X : Ω → ℝ) (μ : measure Ω) (t : ℝ) : ℝ := log (mgf X μ t)
@[simp] lemma mgf_zero_fun : mgf 0 μ t = (μ set.univ).to_real :=
by simp only [mgf, pi.zero_apply, mul_zero, exp_zero, integral_const, algebra.id.smul_eq_mul,
mul_one]
@[simp] lemma cgf_zero_fun : cgf 0 μ t = log (μ set.univ).to_real :=
by simp only [cgf, mgf_zero_fun]
@[simp] lemma mgf_zero_measure : mgf X (0 : measure Ω) t = 0 :=
by simp only [mgf, integral_zero_measure]
@[simp] lemma cgf_zero_measure : cgf X (0 : measure Ω) t = 0 :=
by simp only [cgf, log_zero, mgf_zero_measure]
@[simp] lemma mgf_const' (c : ℝ) : mgf (λ _, c) μ t = (μ set.univ).to_real * exp (t * c) :=
by simp only [mgf, integral_const, algebra.id.smul_eq_mul]
@[simp] lemma mgf_const (c : ℝ) [is_probability_measure μ] : mgf (λ _, c) μ t = exp (t * c) :=
by simp only [mgf_const', measure_univ, ennreal.one_to_real, one_mul]
@[simp] lemma cgf_const' [is_finite_measure μ] (hμ : μ ≠ 0) (c : ℝ) :
cgf (λ _, c) μ t = log (μ set.univ).to_real + t * c :=
begin
simp only [cgf, mgf_const'],
rw log_mul _ (exp_pos _).ne',
{ rw log_exp _, },
{ rw [ne.def, ennreal.to_real_eq_zero_iff, measure.measure_univ_eq_zero],
simp only [hμ, measure_ne_top μ set.univ, or_self, not_false_iff], },
end
@[simp] lemma cgf_const [is_probability_measure μ] (c : ℝ) : cgf (λ _, c) μ t = t * c :=
by simp only [cgf, mgf_const, log_exp]
@[simp] lemma mgf_zero' : mgf X μ 0 = (μ set.univ).to_real :=
by simp only [mgf, zero_mul, exp_zero, integral_const, algebra.id.smul_eq_mul, mul_one]
@[simp] lemma mgf_zero [is_probability_measure μ] : mgf X μ 0 = 1 :=
by simp only [mgf_zero', measure_univ, ennreal.one_to_real]
@[simp] lemma cgf_zero' : cgf X μ 0 = log (μ set.univ).to_real :=
by simp only [cgf, mgf_zero']
@[simp] lemma cgf_zero [is_probability_measure μ] : cgf X μ 0 = 0 :=
by simp only [cgf_zero', measure_univ, ennreal.one_to_real, log_one]
lemma mgf_undef (hX : ¬ integrable (λ ω, exp (t * X ω)) μ) : mgf X μ t = 0 :=
by simp only [mgf, integral_undef hX]
lemma cgf_undef (hX : ¬ integrable (λ ω, exp (t * X ω)) μ) : cgf X μ t = 0 :=
by simp only [cgf, mgf_undef hX, log_zero]
lemma mgf_nonneg : 0 ≤ mgf X μ t :=
begin
refine integral_nonneg _,
intro ω,
simp only [pi.zero_apply],
exact (exp_pos _).le,
end
lemma mgf_pos' (hμ : μ ≠ 0) (h_int_X : integrable (λ ω, exp (t * X ω)) μ) : 0 < mgf X μ t :=
begin
simp_rw mgf,
have : ∫ (x : Ω), exp (t * X x) ∂μ = ∫ (x : Ω) in set.univ, exp (t * X x) ∂μ,
{ simp only [measure.restrict_univ], },
rw [this, set_integral_pos_iff_support_of_nonneg_ae _ _],
{ have h_eq_univ : function.support (λ (x : Ω), exp (t * X x)) = set.univ,
{ ext1 x,
simp only [function.mem_support, set.mem_univ, iff_true],
exact (exp_pos _).ne', },
rw [h_eq_univ, set.inter_univ _],
refine ne.bot_lt _,
simp only [hμ, ennreal.bot_eq_zero, ne.def, measure.measure_univ_eq_zero, not_false_iff], },
{ refine eventually_of_forall (λ x, _),
rw pi.zero_apply,
exact (exp_pos _).le, },
{ rwa integrable_on_univ, },
end
lemma mgf_pos [is_probability_measure μ] (h_int_X : integrable (λ ω, exp (t * X ω)) μ) :
0 < mgf X μ t :=
mgf_pos' (is_probability_measure.ne_zero μ) h_int_X
lemma mgf_neg : mgf (-X) μ t = mgf X μ (-t) :=
by simp_rw [mgf, pi.neg_apply, mul_neg, neg_mul]
lemma cgf_neg : cgf (-X) μ t = cgf X μ (-t) := by simp_rw [cgf, mgf_neg]
/-- This is a trivial application of `indep_fun.comp` but it will come up frequently. -/
lemma indep_fun.exp_mul {X Y : Ω → ℝ} (h_indep : indep_fun X Y μ) (s t : ℝ) :
indep_fun (λ ω, exp (s * X ω)) (λ ω, exp (t * Y ω)) μ :=
begin
have h_meas : ∀ t, measurable (λ x, exp (t * x)) := λ t, (measurable_id'.const_mul t).exp,
change indep_fun ((λ x, exp (s * x)) ∘ X) ((λ x, exp (t * x)) ∘ Y) μ,
exact indep_fun.comp h_indep (h_meas s) (h_meas t),
end
lemma indep_fun.mgf_add {X Y : Ω → ℝ} (h_indep : indep_fun X Y μ)
(hX : ae_strongly_measurable (λ ω, exp (t * X ω)) μ)
(hY : ae_strongly_measurable (λ ω, exp (t * Y ω)) μ) :
mgf (X + Y) μ t = mgf X μ t * mgf Y μ t :=
begin
simp_rw [mgf, pi.add_apply, mul_add, exp_add],
exact (h_indep.exp_mul t t).integral_mul hX hY,
end
lemma indep_fun.mgf_add' {X Y : Ω → ℝ} (h_indep : indep_fun X Y μ)
(hX : ae_strongly_measurable X μ) (hY : ae_strongly_measurable Y μ) :
mgf (X + Y) μ t = mgf X μ t * mgf Y μ t :=
begin
have A : continuous (λ (x : ℝ), exp (t * x)), by continuity,
have h'X : ae_strongly_measurable (λ ω, exp (t * X ω)) μ :=
A.ae_strongly_measurable.comp_ae_measurable hX.ae_measurable,
have h'Y : ae_strongly_measurable (λ ω, exp (t * Y ω)) μ :=
A.ae_strongly_measurable.comp_ae_measurable hY.ae_measurable,
exact h_indep.mgf_add h'X h'Y
end
lemma indep_fun.cgf_add {X Y : Ω → ℝ} (h_indep : indep_fun X Y μ)
(h_int_X : integrable (λ ω, exp (t * X ω)) μ)
(h_int_Y : integrable (λ ω, exp (t * Y ω)) μ) :
cgf (X + Y) μ t = cgf X μ t + cgf Y μ t :=
begin
by_cases hμ : μ = 0,
{ simp [hμ], },
simp only [cgf, h_indep.mgf_add h_int_X.ae_strongly_measurable h_int_Y.ae_strongly_measurable],
exact log_mul (mgf_pos' hμ h_int_X).ne' (mgf_pos' hμ h_int_Y).ne',
end
lemma ae_strongly_measurable_exp_mul_add {X Y : Ω → ℝ}
(h_int_X : ae_strongly_measurable (λ ω, exp (t * X ω)) μ)
(h_int_Y : ae_strongly_measurable (λ ω, exp (t * Y ω)) μ) :
ae_strongly_measurable (λ ω, exp (t * (X + Y) ω)) μ :=
begin
simp_rw [pi.add_apply, mul_add, exp_add],
exact ae_strongly_measurable.mul h_int_X h_int_Y,
end
lemma ae_strongly_measurable_exp_mul_sum {X : ι → Ω → ℝ} {s : finset ι}
(h_int : ∀ i ∈ s, ae_strongly_measurable (λ ω, exp (t * X i ω)) μ) :
ae_strongly_measurable (λ ω, exp (t * (∑ i in s, X i) ω)) μ :=
begin
classical,
induction s using finset.induction_on with i s hi_notin_s h_rec h_int,
{ simp only [pi.zero_apply, sum_apply, sum_empty, mul_zero, exp_zero],
exact ae_strongly_measurable_const, },
{ have : ∀ (i : ι), i ∈ s → ae_strongly_measurable (λ (ω : Ω), exp (t * X i ω)) μ,
from λ i hi, h_int i (mem_insert_of_mem hi),
specialize h_rec this,
rw sum_insert hi_notin_s,
apply ae_strongly_measurable_exp_mul_add (h_int i (mem_insert_self _ _)) h_rec }
end
lemma indep_fun.integrable_exp_mul_add {X Y : Ω → ℝ} (h_indep : indep_fun X Y μ)
(h_int_X : integrable (λ ω, exp (t * X ω)) μ)
(h_int_Y : integrable (λ ω, exp (t * Y ω)) μ) :
integrable (λ ω, exp (t * (X + Y) ω)) μ :=
begin
simp_rw [pi.add_apply, mul_add, exp_add],
exact (h_indep.exp_mul t t).integrable_mul h_int_X h_int_Y,
end
lemma Indep_fun.integrable_exp_mul_sum [is_probability_measure μ]
{X : ι → Ω → ℝ} (h_indep : Indep_fun (λ i, infer_instance) X μ) (h_meas : ∀ i, measurable (X i))
{s : finset ι} (h_int : ∀ i ∈ s, integrable (λ ω, exp (t * X i ω)) μ) :
integrable (λ ω, exp (t * (∑ i in s, X i) ω)) μ :=
begin
classical,
induction s using finset.induction_on with i s hi_notin_s h_rec h_int,
{ simp only [pi.zero_apply, sum_apply, sum_empty, mul_zero, exp_zero],
exact integrable_const _, },
{ have : ∀ (i : ι), i ∈ s → integrable (λ (ω : Ω), exp (t * X i ω)) μ,
from λ i hi, h_int i (mem_insert_of_mem hi),
specialize h_rec this,
rw sum_insert hi_notin_s,
refine indep_fun.integrable_exp_mul_add _ (h_int i (mem_insert_self _ _)) h_rec,
exact (h_indep.indep_fun_finset_sum_of_not_mem h_meas hi_notin_s).symm, },
end
lemma Indep_fun.mgf_sum [is_probability_measure μ]
{X : ι → Ω → ℝ} (h_indep : Indep_fun (λ i, infer_instance) X μ) (h_meas : ∀ i, measurable (X i))
(s : finset ι) :
mgf (∑ i in s, X i) μ t = ∏ i in s, mgf (X i) μ t :=
begin
classical,
induction s using finset.induction_on with i s hi_notin_s h_rec h_int,
{ simp only [sum_empty, mgf_zero_fun, measure_univ, ennreal.one_to_real, prod_empty], },
{ have h_int' : ∀ (i : ι), ae_strongly_measurable (λ (ω : Ω), exp (t * X i ω)) μ,
from λ i, ((h_meas i).const_mul t).exp.ae_strongly_measurable,
rw [sum_insert hi_notin_s, indep_fun.mgf_add
(h_indep.indep_fun_finset_sum_of_not_mem h_meas hi_notin_s).symm (h_int' i)
(ae_strongly_measurable_exp_mul_sum (λ i hi, h_int' i)),
h_rec, prod_insert hi_notin_s] }
end
lemma Indep_fun.cgf_sum [is_probability_measure μ]
{X : ι → Ω → ℝ} (h_indep : Indep_fun (λ i, infer_instance) X μ) (h_meas : ∀ i, measurable (X i))
{s : finset ι} (h_int : ∀ i ∈ s, integrable (λ ω, exp (t * X i ω)) μ) :
cgf (∑ i in s, X i) μ t = ∑ i in s, cgf (X i) μ t :=
begin
simp_rw cgf,
rw ← log_prod _ _ (λ j hj, _),
{ rw h_indep.mgf_sum h_meas },
{ exact (mgf_pos (h_int j hj)).ne', },
end
/-- **Chernoff bound** on the upper tail of a real random variable. -/
lemma measure_ge_le_exp_mul_mgf [is_finite_measure μ] (ε : ℝ) (ht : 0 ≤ t)
(h_int : integrable (λ ω, exp (t * X ω)) μ) :
(μ {ω | ε ≤ X ω}).to_real ≤ exp (- t * ε) * mgf X μ t :=
begin
cases ht.eq_or_lt with ht_zero_eq ht_pos,
{ rw ht_zero_eq.symm,
simp only [neg_zero, zero_mul, exp_zero, mgf_zero', one_mul],
rw ennreal.to_real_le_to_real (measure_ne_top μ _) (measure_ne_top μ _),
exact measure_mono (set.subset_univ _), },
calc (μ {ω | ε ≤ X ω}).to_real
= (μ {ω | exp (t * ε) ≤ exp (t * X ω)}).to_real :
begin
congr' with ω,
simp only [exp_le_exp, eq_iff_iff],
exact ⟨λ h, mul_le_mul_of_nonneg_left h ht_pos.le, λ h, le_of_mul_le_mul_left h ht_pos⟩,
end
... ≤ (exp (t * ε))⁻¹ * μ[λ ω, exp (t * X ω)] :
begin
have : exp (t * ε) * (μ {ω | exp (t * ε) ≤ exp (t * X ω)}).to_real
≤ μ[λ ω, exp (t * X ω)],
from mul_meas_ge_le_integral_of_nonneg (λ x, (exp_pos _).le) h_int _,
rwa [mul_comm (exp (t * ε))⁻¹, ← div_eq_mul_inv, le_div_iff' (exp_pos _)],
end
... = exp (- t * ε) * mgf X μ t : by { rw [neg_mul, exp_neg], refl, },
end
/-- **Chernoff bound** on the lower tail of a real random variable. -/
lemma measure_le_le_exp_mul_mgf [is_finite_measure μ] (ε : ℝ) (ht : t ≤ 0)
(h_int : integrable (λ ω, exp (t * X ω)) μ) :
(μ {ω | X ω ≤ ε}).to_real ≤ exp (- t * ε) * mgf X μ t :=
begin
rw [← neg_neg t, ← mgf_neg, neg_neg, ← neg_mul_neg (-t)],
refine eq.trans_le _ (measure_ge_le_exp_mul_mgf (-ε) (neg_nonneg.mpr ht) _),
{ congr' with ω,
simp only [pi.neg_apply, neg_le_neg_iff], },
{ simp_rw [pi.neg_apply, neg_mul_neg],
exact h_int, },
end
/-- **Chernoff bound** on the upper tail of a real random variable. -/
lemma measure_ge_le_exp_cgf [is_finite_measure μ] (ε : ℝ) (ht : 0 ≤ t)
(h_int : integrable (λ ω, exp (t * X ω)) μ) :
(μ {ω | ε ≤ X ω}).to_real ≤ exp (- t * ε + cgf X μ t) :=
begin
refine (measure_ge_le_exp_mul_mgf ε ht h_int).trans _,
rw exp_add,
exact mul_le_mul le_rfl (le_exp_log _) mgf_nonneg (exp_pos _).le,
end
/-- **Chernoff bound** on the lower tail of a real random variable. -/
lemma measure_le_le_exp_cgf [is_finite_measure μ] (ε : ℝ) (ht : t ≤ 0)
(h_int : integrable (λ ω, exp (t * X ω)) μ) :
(μ {ω | X ω ≤ ε}).to_real ≤ exp (- t * ε + cgf X μ t) :=
begin
refine (measure_le_le_exp_mul_mgf ε ht h_int).trans _,
rw exp_add,
exact mul_le_mul le_rfl (le_exp_log _) mgf_nonneg (exp_pos _).le,
end
end moment_generating_function
end probability_theory