/
bernstein.lean
307 lines (270 loc) · 12.5 KB
/
bernstein.lean
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
/-
Copyright (c) 2021 Scott Morrison. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Scott Morrison
-/
import algebra.order.field.basic
import ring_theory.polynomial.bernstein
import topology.continuous_function.polynomial
import topology.continuous_function.compact
/-!
# Bernstein approximations and Weierstrass' theorem
We prove that the Bernstein approximations
```
∑ k : fin (n+1), f (k/n : ℝ) * n.choose k * x^k * (1-x)^(n-k)
```
for a continuous function `f : C([0,1], ℝ)` converge uniformly to `f` as `n` tends to infinity.
Our proof follows [Richard Beals' *Analysis, an introduction*][beals-analysis], §7D.
The original proof, due to [Bernstein](bernstein1912) in 1912, is probabilistic,
and relies on Bernoulli's theorem,
which gives bounds for how quickly the observed frequencies in a
Bernoulli trial approach the underlying probability.
The proof here does not directly rely on Bernoulli's theorem,
but can also be given a probabilistic account.
* Consider a weighted coin which with probability `x` produces heads,
and with probability `1-x` produces tails.
* The value of `bernstein n k x` is the probability that
such a coin gives exactly `k` heads in a sequence of `n` tosses.
* If such an appearance of `k` heads results in a payoff of `f(k / n)`,
the `n`-th Bernstein approximation for `f` evaluated at `x` is the expected payoff.
* The main estimate in the proof bounds the probability that
the observed frequency of heads differs from `x` by more than some `δ`,
obtaining a bound of `(4 * n * δ^2)⁻¹`, irrespective of `x`.
* This ensures that for `n` large, the Bernstein approximation is (uniformly) close to the
payoff function `f`.
(You don't need to think in these terms to follow the proof below: it's a giant `calc` block!)
This result proves Weierstrass' theorem that polynomials are dense in `C([0,1], ℝ)`,
although we defer an abstract statement of this until later.
-/
noncomputable theory
open_locale classical
open_locale big_operators
open_locale bounded_continuous_function
open_locale unit_interval
/--
The Bernstein polynomials, as continuous functions on `[0,1]`.
-/
def bernstein (n ν : ℕ) : C(I, ℝ) :=
(bernstein_polynomial ℝ n ν).to_continuous_map_on I
@[simp] lemma bernstein_apply (n ν : ℕ) (x : I) :
bernstein n ν x = n.choose ν * x^ν * (1-x)^(n-ν) :=
begin
dsimp [bernstein, polynomial.to_continuous_map_on, polynomial.to_continuous_map,
bernstein_polynomial],
simp,
end
lemma bernstein_nonneg {n ν : ℕ} {x : I} :
0 ≤ bernstein n ν x :=
begin
simp only [bernstein_apply],
exact mul_nonneg
(mul_nonneg (nat.cast_nonneg _) (pow_nonneg (by unit_interval) _))
(pow_nonneg (by unit_interval) _),
end
/-!
We now give a slight reformulation of `bernstein_polynomial.variance`.
-/
namespace bernstein
/--
Send `k : fin (n+1)` to the equally spaced points `k/n` in the unit interval.
-/
def z {n : ℕ} (k : fin (n+1)) : I :=
⟨(k : ℝ) / n,
begin
cases n,
{ norm_num },
{ have h₁ : 0 < (n.succ : ℝ) := by exact_mod_cast (nat.succ_pos _),
have h₂ : ↑k ≤ n.succ := by exact_mod_cast (fin.le_last k),
rw [set.mem_Icc, le_div_iff h₁, div_le_iff h₁],
norm_cast,
simp [h₂], },
end⟩
local postfix `/ₙ`:90 := z
lemma probability (n : ℕ) (x : I) :
∑ k : fin (n+1), bernstein n k x = 1 :=
begin
have := bernstein_polynomial.sum ℝ n,
apply_fun (λ p, polynomial.aeval (x : ℝ) p) at this,
simp [alg_hom.map_sum, finset.sum_range] at this,
exact this,
end
lemma variance {n : ℕ} (h : 0 < (n : ℝ)) (x : I) :
∑ k : fin (n+1), (x - k/ₙ : ℝ)^2 * bernstein n k x = x * (1-x) / n :=
begin
have h' : (n : ℝ) ≠ 0 := ne_of_gt h,
apply_fun (λ x : ℝ, x * n) using group_with_zero.mul_right_injective h',
apply_fun (λ x : ℝ, x * n) using group_with_zero.mul_right_injective h',
dsimp,
conv_lhs { simp only [finset.sum_mul, z], },
conv_rhs { rw div_mul_cancel _ h', },
have := bernstein_polynomial.variance ℝ n,
apply_fun (λ p, polynomial.aeval (x : ℝ) p) at this,
simp [alg_hom.map_sum, finset.sum_range, ←polynomial.nat_cast_mul] at this,
convert this using 1,
{ congr' 1, funext k,
rw [mul_comm _ (n : ℝ), mul_comm _ (n : ℝ), ←mul_assoc, ←mul_assoc],
congr' 1,
field_simp [h],
ring, },
{ ring, },
end
end bernstein
open bernstein
local postfix `/ₙ`:2000 := z
/--
The `n`-th approximation of a continuous function on `[0,1]` by Bernstein polynomials,
given by `∑ k, f (k/n) * bernstein n k x`.
-/
def bernstein_approximation (n : ℕ) (f : C(I, ℝ)) : C(I, ℝ) :=
∑ k : fin (n+1), f k/ₙ • bernstein n k
/-!
We now set up some of the basic machinery of the proof that the Bernstein approximations
converge uniformly.
A key player is the set `S f ε h n x`,
for some function `f : C(I, ℝ)`, `h : 0 < ε`, `n : ℕ` and `x : I`.
This is the set of points `k` in `fin (n+1)` such that
`k/n` is within `δ` of `x`, where `δ` is the modulus of uniform continuity for `f`,
chosen so `|f x - f y| < ε/2` when `|x - y| < δ`.
We show that if `k ∉ S`, then `1 ≤ δ^-2 * (x - k/n)^2`.
-/
namespace bernstein_approximation
@[simp] lemma apply (n : ℕ) (f : C(I, ℝ)) (x : I) :
bernstein_approximation n f x = ∑ k : fin (n+1), f k/ₙ * bernstein n k x :=
by simp [bernstein_approximation]
/--
The modulus of (uniform) continuity for `f`, chosen so `|f x - f y| < ε/2` when `|x - y| < δ`.
-/
def δ (f : C(I, ℝ)) (ε : ℝ) (h : 0 < ε) : ℝ := f.modulus (ε/2) (half_pos h)
lemma δ_pos {f : C(I, ℝ)} {ε : ℝ} {h : 0 < ε} : 0 < δ f ε h := f.modulus_pos
/--
The set of points `k` so `k/n` is within `δ` of `x`.
-/
def S (f : C(I, ℝ)) (ε : ℝ) (h : 0 < ε) (n : ℕ) (x : I) : finset (fin (n+1)) :=
{ k : fin (n+1) | dist k/ₙ x < δ f ε h }.to_finset
/--
If `k ∈ S`, then `f(k/n)` is close to `f x`.
-/
lemma lt_of_mem_S
{f : C(I, ℝ)} {ε : ℝ} {h : 0 < ε} {n : ℕ} {x : I} {k : fin (n+1)} (m : k ∈ S f ε h n x) :
|f k/ₙ - f x| < ε/2 :=
begin
apply f.dist_lt_of_dist_lt_modulus (ε/2) (half_pos h),
simpa [S] using m,
end
/--
If `k ∉ S`, then as `δ ≤ |x - k/n|`, we have the inequality `1 ≤ δ^-2 * (x - k/n)^2`.
This particular formulation will be helpful later.
-/
lemma le_of_mem_S_compl
{f : C(I, ℝ)} {ε : ℝ} {h : 0 < ε} {n : ℕ} {x : I} {k : fin (n+1)} (m : k ∈ (S f ε h n x)ᶜ) :
(1 : ℝ) ≤ (δ f ε h)^(-2 : ℤ) * (x - k/ₙ) ^ 2 :=
begin
simp only [finset.mem_compl, not_lt, set.mem_to_finset, set.mem_set_of_eq, S] at m,
rw [zpow_neg, ← div_eq_inv_mul, zpow_two, ←pow_two, one_le_div (pow_pos δ_pos 2), sq_le_sq,
abs_of_pos δ_pos],
rwa [dist_comm] at m
end
end bernstein_approximation
open bernstein_approximation
open bounded_continuous_function
open filter
open_locale topological_space
/--
The Bernstein approximations
```
∑ k : fin (n+1), f (k/n : ℝ) * n.choose k * x^k * (1-x)^(n-k)
```
for a continuous function `f : C([0,1], ℝ)` converge uniformly to `f` as `n` tends to infinity.
This is the proof given in [Richard Beals' *Analysis, an introduction*][beals-analysis], §7D,
and reproduced on wikipedia.
-/
theorem bernstein_approximation_uniform (f : C(I, ℝ)) :
tendsto (λ n : ℕ, bernstein_approximation n f) at_top (𝓝 f) :=
begin
simp only [metric.nhds_basis_ball.tendsto_right_iff, metric.mem_ball, dist_eq_norm],
intros ε h,
let δ := δ f ε h,
have nhds_zero := tendsto_const_div_at_top_nhds_0_nat (2 * ‖f‖ * δ ^ (-2 : ℤ)),
filter_upwards [nhds_zero.eventually (gt_mem_nhds (half_pos h)), eventually_gt_at_top 0]
with n nh npos',
have npos : 0 < (n:ℝ) := by exact_mod_cast npos',
-- Two easy inequalities we'll need later:
have w₁ : 0 ≤ 2 * ‖f‖ := mul_nonneg (by norm_num) (norm_nonneg f),
have w₂ : 0 ≤ 2 * ‖f‖ * δ^(-2 : ℤ) := mul_nonneg w₁ pow_minus_two_nonneg,
-- As `[0,1]` is compact, it suffices to check the inequality pointwise.
rw (continuous_map.norm_lt_iff _ h),
intro x,
-- The idea is to split up the sum over `k` into two sets,
-- `S`, where `x - k/n < δ`, and its complement.
let S := S f ε h n x,
calc
|(bernstein_approximation n f - f) x|
= |bernstein_approximation n f x - f x|
: rfl
... = |bernstein_approximation n f x - f x * 1|
: by rw mul_one
... = |bernstein_approximation n f x - f x * (∑ k : fin (n+1), bernstein n k x)|
: by rw bernstein.probability
... = |∑ k : fin (n+1), (f k/ₙ - f x) * bernstein n k x|
: by simp [bernstein_approximation, finset.mul_sum, sub_mul]
... ≤ ∑ k : fin (n+1), |(f k/ₙ - f x) * bernstein n k x|
: finset.abs_sum_le_sum_abs _ _
... = ∑ k : fin (n+1), |f k/ₙ - f x| * bernstein n k x
: by simp_rw [abs_mul, abs_eq_self.mpr bernstein_nonneg]
... = ∑ k in S, |f k/ₙ - f x| * bernstein n k x +
∑ k in Sᶜ, |f k/ₙ - f x| * bernstein n k x
: (S.sum_add_sum_compl _).symm
-- We'll now deal with the terms in `S` and the terms in `Sᶜ` in separate calc blocks.
... < ε/2 + ε/2 : add_lt_add_of_le_of_lt _ _
... = ε : add_halves ε,
{ -- We now work on the terms in `S`: uniform continuity and `bernstein.probability`
-- quickly give us a bound.
calc ∑ k in S, |f k/ₙ - f x| * bernstein n k x
≤ ∑ k in S, ε/2 * bernstein n k x
: finset.sum_le_sum
(λ k m, (mul_le_mul_of_nonneg_right (le_of_lt (lt_of_mem_S m))
bernstein_nonneg))
... = ε/2 * ∑ k in S, bernstein n k x
: by rw finset.mul_sum
-- In this step we increase the sum over `S` back to a sum over all of `fin (n+1)`,
-- so that we can use `bernstein.probability`.
... ≤ ε/2 * ∑ k : fin (n+1), bernstein n k x
: mul_le_mul_of_nonneg_left
(finset.sum_le_univ_sum_of_nonneg (λ k, bernstein_nonneg))
(le_of_lt (half_pos h))
... = ε/2 : by rw [bernstein.probability, mul_one] },
{ -- We now turn to working on `Sᶜ`: we control the difference term just using `‖f‖`,
-- and then insert a `δ^(-2) * (x - k/n)^2` factor
-- (which is at least one because we are not in `S`).
calc ∑ k in Sᶜ, |f k/ₙ - f x| * bernstein n k x
≤ ∑ k in Sᶜ, (2 * ‖f‖) * bernstein n k x
: finset.sum_le_sum
(λ k m, mul_le_mul_of_nonneg_right (f.dist_le_two_norm _ _)
bernstein_nonneg)
... = (2 * ‖f‖) * ∑ k in Sᶜ, bernstein n k x
: by rw finset.mul_sum
... ≤ (2 * ‖f‖) * ∑ k in Sᶜ, δ^(-2 : ℤ) * (x - k/ₙ)^2 * bernstein n k x
: mul_le_mul_of_nonneg_left
(finset.sum_le_sum (λ k m, begin
conv_lhs { rw ←one_mul (bernstein _ _ _), },
exact mul_le_mul_of_nonneg_right
(le_of_mem_S_compl m) bernstein_nonneg,
end)) w₁
-- Again enlarging the sum from `Sᶜ` to all of `fin (n+1)`
... ≤ (2 * ‖f‖) * ∑ k : fin (n+1), δ^(-2 : ℤ) * (x - k/ₙ)^2 * bernstein n k x
: mul_le_mul_of_nonneg_left
(finset.sum_le_univ_sum_of_nonneg
(λ k, mul_nonneg
(mul_nonneg pow_minus_two_nonneg (sq_nonneg _))
bernstein_nonneg)) w₁
... = (2 * ‖f‖) * δ^(-2 : ℤ) * ∑ k : fin (n+1), (x - k/ₙ)^2 * bernstein n k x
: by conv_rhs
{ rw [mul_assoc, finset.mul_sum], simp only [←mul_assoc], }
-- `bernstein.variance` and `x ∈ [0,1]` gives the uniform bound
... = (2 * ‖f‖) * δ^(-2 : ℤ) * x * (1-x) / n
: by { rw variance npos, ring, }
... ≤ (2 * ‖f‖) * δ^(-2 : ℤ) / n
: (div_le_div_right npos).mpr $
by refine mul_le_of_le_of_le_one' (mul_le_of_le_one_right w₂ _) _ _ w₂; unit_interval
... < ε/2 : nh, }
end