Skip to content
This repository was archived by the owner on Jul 24, 2024. It is now read-only.

Commit d4cb237

Browse files
committed
feat(algebra/module): define ordered semimodules and generalize convexity of functions (#3728)
Co-authored-by: Frédéric Dupuis <31101893+dupuisf@users.noreply.github.com>
1 parent bc72d90 commit d4cb237

File tree

3 files changed

+170
-50
lines changed

3 files changed

+170
-50
lines changed

src/algebra/module/ordered.lean

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
/-
2+
Copyright (c) 2020 Frédéric Dupuis. All rights reserved.
3+
Released under Apache 2.0 license as described in the file LICENSE.
4+
Authors: Frédéric Dupuis
5+
-/
6+
7+
import algebra.module.basic
8+
import algebra.ordered_group
9+
10+
/-!
11+
# Ordered semimodules
12+
13+
In this file we define
14+
15+
* `ordered_semimodule R M` : an ordered additive commutative monoid `M` is an `ordered_semimodule`
16+
over an `ordered_semiring` `R` if the scalar product respects the order relation on the
17+
monoid and on the ring.
18+
19+
## Implementation notes
20+
21+
* We choose to define `ordered_semimodule` so that it extends `semimodule` only, as is done
22+
for semimodules itself.
23+
* To get ordered modules and ordered vector spaces, it suffices to the replace the
24+
`order_add_comm_monoid` and the `ordered_semiring` as desired.
25+
26+
## TODO
27+
28+
* Connect this with convex cones: show that a convex cone defines an order on the vector space
29+
and vice-versa.
30+
31+
## References
32+
33+
* https://en.wikipedia.org/wiki/Ordered_vector_space
34+
35+
## Tags
36+
37+
ordered semimodule, ordered module, ordered vector space
38+
-/
39+
40+
41+
set_option default_priority 100 -- see Note [default priority]
42+
43+
/--
44+
An ordered semimodule is an ordered additive commutative monoid
45+
with a partial order in which the scalar multiplication is compatible with the order.
46+
-/
47+
@[protect_proj, ancestor semimodule]
48+
class ordered_semimodule (R β : Type*)
49+
[ordered_semiring R] [ordered_add_comm_monoid β] extends semimodule R β :=
50+
(smul_lt_smul_of_pos : ∀ {a b : β}, ∀ {c : R}, a < b → 0 < c → c • a < c • b)
51+
(lt_of_smul_lt_smul_of_nonneg : ∀ {a b : β}, ∀ {c : R}, c • a < c • b → 0 ≤ c → a < b)
52+
53+
variable {R : Type*}
54+
55+
instance linear_ordered_ring.to_ordered_semimodule [linear_ordered_ring R] :
56+
ordered_semimodule R R :=
57+
{ smul_lt_smul_of_pos := ordered_semiring.mul_lt_mul_of_pos_left,
58+
lt_of_smul_lt_smul_of_nonneg := λ _ _ _, lt_of_mul_lt_mul_left }
59+
60+
variables {β : Type*} [ordered_semiring R] [ordered_add_comm_monoid β] [ordered_semimodule R β]
61+
{a b : β} {c : R}
62+
63+
lemma smul_lt_smul_of_pos : a < b → 0 < c → c • a < c • b := ordered_semimodule.smul_lt_smul_of_pos
64+
65+
lemma smul_le_smul_of_nonneg (h₁ : a ≤ b) (h₂ : 0 ≤ c) : c • a ≤ c • b :=
66+
begin
67+
by_cases H₁ : c = 0,
68+
{ simp [H₁, zero_smul] },
69+
{ by_cases H₂ : a = b,
70+
{ rw H₂ },
71+
{ exact le_of_lt
72+
(smul_lt_smul_of_pos (lt_of_le_of_ne h₁ H₂) (lt_of_le_of_ne h₂ (ne.symm H₁))), } }
73+
end

src/analysis/convex/basic.lean

Lines changed: 93 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ import data.set.intervals.ord_connected
77
import data.set.intervals.image_preimage
88
import data.complex.module
99
import linear_algebra.affine_space.basic
10+
import algebra.module.ordered
11+
1012

1113
/-!
1214
# Convex sets and functions on real vector spaces
@@ -15,10 +17,10 @@ In a real vector space, we define the following objects and properties.
1517
1618
* `segment x y` is the closed segment joining `x` and `y`.
1719
* A set `s` is `convex` if for any two points `x y ∈ s` it includes `segment x y`;
18-
* A function `f` is `convex_on` a set `s` if `s` is itself a convex set, and for any two points
19-
`x y ∈ s` the segment joining `(x, f x)` to `(y, f y)` is (non-strictly) above the graph of `f`;
20-
equivalently, `convex_on f s` means that the epigraph `{p : E × ℝ | p.1 ∈ s ∧ f p.1 ≤ p.2}`
21-
is a convex set;
20+
* A function `f : E → β` is `convex_on` a set `s` if `s` is itself a convex set, and for any two
21+
points `x y ∈ s` the segment joining `(x, f x)` to `(y, f y)` is (non-strictly) above the graph
22+
of `f`; equivalently, `convex_on f s` means that the epigraph
23+
`{p : E × β | p.1 ∈ s ∧ f p.1 ≤ p.2}` is a convex set;
2224
* Center mass of a finite set of points with prescribed weights.
2325
* Convex hull of a set `s` is the minimal convex set that includes `s`.
2426
* Standard simplex `std_simplex ι [fintype ι]` is the intersection of the positive quadrant with
@@ -27,6 +29,9 @@ In a real vector space, we define the following objects and properties.
2729
We also provide various equivalent versions of the definitions above, prove that some specific sets
2830
are convex, and prove Jensen's inequality.
2931
32+
Note: To define convexity for functions `f : E → β`, we need `β` to be an ordered vector space,
33+
defined using the instance `ordered_semimodule ℝ β`.
34+
3035
## Notations
3136
3237
We use the following local notations:
@@ -462,26 +467,28 @@ end sets
462467

463468
section functions
464469

470+
variables {β : Type*} [ordered_add_comm_monoid β] [ordered_semimodule ℝ β]
471+
465472
local notation `[`x `, ` y `]` := segment x y
466473

467474
/-! ### Convex functions -/
468475

469476
/-- Convexity of functions -/
470-
def convex_on (s : set E) (f : E → ) : Prop :=
477+
def convex_on (s : set E) (f : E → β) : Prop :=
471478
convex s ∧
472479
∀ ⦃x y : E⦄, x ∈ s → y ∈ s → ∀ ⦃a b : ℝ⦄, 0 ≤ a → 0 ≤ b → a + b = 1
473-
f (a • x + b • y) ≤ a * f x + b * f y
480+
f (a • x + b • y) ≤ a f x + b f y
474481

475482
lemma convex_on_id {s : set ℝ} (hs : convex s) : convex_on s id := ⟨hs, by { intros, refl }⟩
476483

477-
lemma convex_on_const (c : ) (hs : convex s) : convex_on s (λ x:E, c) :=
478-
⟨hs, by { intros, simp only [← add_mul, *, one_mul] }⟩
484+
lemma convex_on_const (c : β) (hs : convex s) : convex_on s (λ x:E, c) :=
485+
⟨hs, by { intros, simp only [← add_smul, *, one_smul] }⟩
479486

480-
variables {t : set E} {f g : E → ℝ}
487+
variables {t : set E}
481488

482-
lemma convex_on_iff_div:
489+
lemma convex_on_iff_div {f : E → β} :
483490
convex_on s f ↔ convex s ∧ ∀ ⦃x y : E⦄, x ∈ s → y ∈ s → ∀ ⦃a b : ℝ⦄, 0 ≤ a → 0 ≤ b → 0 < a + b →
484-
f ((a/(a+b)) • x + (b/(a+b)) • y) ≤ (a/(a+b)) * f x + (b/(a+b)) * f y :=
491+
f ((a/(a+b)) • x + (b/(a+b)) • y) ≤ (a/(a+b)) f x + (b/(a+b)) f y :=
485492
and_congr iff.rfl
486493
begin
487494
intros h x y hx hy a b ha hb hab,
@@ -495,19 +502,19 @@ begin
495502
end
496503

497504
/-- For a function on a convex set in a linear ordered space, in order to prove that it is convex
498-
it suffices to verify the inequality `f (a • x + b • y) ≤ a * f x + b * f y` only for `x < y`
505+
it suffices to verify the inequality `f (a • x + b • y) ≤ a f x + b f y` only for `x < y`
499506
and positive `a`, `b`. The main use case is `E = ℝ` however one can apply it, e.g., to `ℝ^n` with
500507
lexicographic order. -/
501-
lemma linear_order.convex_on_of_lt [linear_order E] (hs : convex s)
508+
lemma linear_order.convex_on_of_lt {f : E → β} [linear_order E] (hs : convex s)
502509
(hf : ∀ ⦃x y : E⦄, x ∈ s → y ∈ s → x < y → ∀ ⦃a b : ℝ⦄, 0 < a → 0 < b → a + b = 1
503-
f (a • x + b • y) ≤ a * f x + b * f y) : convex_on s f :=
510+
f (a • x + b • y) ≤ a f x + b f y) : convex_on s f :=
504511
begin
505512
use hs,
506513
intros x y hx hy a b ha hb hab,
507514
wlog hxy : x<=y using [x y a b, y x b a],
508515
{ exact le_total _ _ },
509516
{ cases eq_or_lt_of_le hxy with hxy hxy,
510-
by { subst y, rw [← add_smul, ← add_mul, hab, one_smul, one_mul] },
517+
by { subst y, rw [← add_smul, ← add_smul, hab, one_smul, one_smul] },
511518
cases eq_or_lt_of_le ha with ha ha,
512519
by { subst a, rw [zero_add] at hab, subst b, simp },
513520
cases eq_or_lt_of_le hb with hb hb,
@@ -544,36 +551,42 @@ begin
544551
convert this; symmetry; simp only [div_eq_iff (ne_of_gt B), y]; ring
545552
end
546553

547-
lemma convex_on.subset (h_convex_on : convex_on t f) (h_subset : s ⊆ t) (h_convex : convex s) :
548-
convex_on s f :=
554+
lemma convex_on.subset {f : E → β} (h_convex_on : convex_on t f)
555+
(h_subset : s ⊆ t) (h_convex : convex s) : convex_on s f :=
549556
begin
550557
apply and.intro h_convex,
551558
intros x y hx hy,
552559
exact h_convex_on.2 (h_subset hx) (h_subset hy),
553560
end
554561

555-
lemma convex_on.add (hf : convex_on s f) (hg : convex_on s g) : convex_on s (λx, f x + g x) :=
562+
lemma convex_on.add {f g : E → β} (hf : convex_on s f) (hg : convex_on s g) :
563+
convex_on s (λx, f x + g x) :=
556564
begin
557565
apply and.intro hf.1,
558566
intros x y hx hy a b ha hb hab,
559567
calc
560-
f (a • x + b • y) + g (a • x + b • y) ≤ (a * f x + b * f y) + (a * g x + b * g y)
568+
f (a • x + b • y) + g (a • x + b • y) ≤ (a f x + b f y) + (a g x + b g y)
561569
: add_le_add (hf.2 hx hy ha hb hab) (hg.2 hx hy ha hb hab)
562-
... = a * f x + a * g x + b * f y + b * g y : by linarith
563-
... = a * (f x + g x) + b * (f y + g y) : by simp [mul_add, add_assoc]
570+
... = a f x + a g x + b f y + b g y : by abel
571+
... = a (f x + g x) + b (f y + g y) : by simp [smul_add, add_assoc]
564572
end
565573

566-
lemma convex_on.smul {c : ℝ} (hc : 0 ≤ c) (hf : convex_on s f) : convex_on s (λx, c * f x) :=
574+
lemma convex_on.smul {f : E → β} {c : ℝ} (hc : 0 ≤ c) (hf : convex_on s f) :
575+
convex_on s (λx, c • f x) :=
567576
begin
568577
apply and.intro hf.1,
569578
intros x y hx hy a b ha hb hab,
570579
calc
571-
c * f (a • x + b • y) ≤ c * (a * f x + b * f y)
572-
: mul_le_mul_of_nonneg_left (hf.2 hx hy ha hb hab) hc
573-
... = a * (c * f x) + b * (c * f y) : by rw mul_add; ac_refl
580+
c f (a • x + b • y) ≤ c (a f x + b f y)
581+
: smul_le_smul_of_nonneg (hf.2 hx hy ha hb hab) hc
582+
... = a (c f x) + b (c f y) : by simp only [smul_add, smul_comm]
574583
end
575584

576-
lemma convex_on.le_on_segment' {x y : E} {a b : ℝ}
585+
/--
586+
A convex function on a segment is upper-bounded by the max of its endpoints.
587+
Note: This cannot be generalized to E → β because it needs a linear order.
588+
-/
589+
lemma convex_on.le_on_segment' {f : E → ℝ} {x y : E} {a b : ℝ}
577590
(hf : convex_on s f) (hx : x ∈ s) (hy : y ∈ s) (ha : 0 ≤ a) (hb : 0 ≤ b) (hab : a + b = 1) :
578591
f (a • x + b • y) ≤ max (f x) (f y) :=
579592
calc
@@ -582,32 +595,66 @@ calc
582595
add_le_add (mul_le_mul_of_nonneg_left (le_max_left _ _) ha) (mul_le_mul_of_nonneg_left (le_max_right _ _) hb)
583596
... ≤ max (f x) (f y) : by rw [←add_mul, hab, one_mul]
584597

585-
lemma convex_on.le_on_segment (hf : convex_on s f) {x y z : E}
598+
/--
599+
A convex function on a segment is upper-bounded by the max of its endpoints.
600+
Note: This cannot be generalized to E → β because it needs a linear order.
601+
-/
602+
lemma convex_on.le_on_segment {f : E → ℝ} (hf : convex_on s f) {x y z : E}
586603
(hx : x ∈ s) (hy : y ∈ s) (hz : z ∈ [x, y]) :
587604
f z ≤ max (f x) (f y) :=
588605
let ⟨a, b, ha, hb, hab, hz⟩ := hz in hz ▸ hf.le_on_segment' hx hy ha hb hab
589606

590-
lemma convex_on.convex_le (hf : convex_on s f) (r : ) : convex {x ∈ s | f x ≤ r} :=
607+
lemma convex_on.convex_le {f : E → β} (hf : convex_on s f) (r : β) : convex {x ∈ s | f x ≤ r} :=
591608
convex_iff_segment_subset.2 $ λ x y hx hy z hz,
592-
⟨hf.1.segment_subset hx.1 hy.1 hz,
593-
le_trans (hf.le_on_segment hx.1 hy.1 hz) $ max_le hx.2 hy.2
609+
begin
610+
refine ⟨hf.1.segment_subset hx.1 hy.1 hz,_⟩,
611+
rcases hz with ⟨za,zb,hza,hzb,hzazb,H⟩,
612+
rw ←H,
613+
calc
614+
f (za • x + zb • y) ≤ za • (f x) + zb • (f y) : hf.2 hx.1 hy.1 hza hzb hzazb
615+
... ≤ za • r + zb • r
616+
: add_le_add (smul_le_smul_of_nonneg hx.2 hza)
617+
(smul_le_smul_of_nonneg hy.2 hzb)
618+
... ≤ r : by simp [←add_smul, hzazb]
619+
end
594620

595-
lemma convex_on.convex_lt (hf : convex_on s f) (r : ℝ) : convex {x ∈ s | f x < r} :=
596-
convex_iff_segment_subset.2 $ λ x y hx hy z hz,
597-
⟨hf.1.segment_subset hx.1 hy.1 hz,
598-
lt_of_le_of_lt (hf.le_on_segment hx.1 hy.1 hz) $ max_lt hx.2 hy.2
621+
lemma convex_on.convex_lt {γ : Type*} [ordered_cancel_add_comm_monoid γ] [ordered_semimodule ℝ γ]
622+
{f : E → γ} (hf : convex_on s f) (r : γ) : convex {x ∈ s | f x < r} :=
623+
begin
624+
intros a b as bs xa xb hxa hxb hxaxb,
625+
refine ⟨hf.1 as.1 bs.1 hxa hxb hxaxb,_⟩,
626+
dsimp,
627+
by_cases H : xa = 0,
628+
{ have H' : xb = 1 := by rwa [H, zero_add] at hxaxb,
629+
rw [H, H', zero_smul, one_smul, zero_add],
630+
exact bs.2 },
631+
{ calc
632+
f (xa • a + xb • b) ≤ xa • (f a) + xb • (f b) : hf.2 as.1 bs.1 hxa hxb hxaxb
633+
... < xa • r + xb • (f b)
634+
: (add_lt_add_iff_right (xb • (f b))).mpr
635+
(smul_lt_smul_of_pos as.2
636+
(lt_of_le_of_ne hxa (ne.symm H)))
637+
... ≤ xa • r + xb • r
638+
: (add_le_add_iff_left (xa • r)).mpr
639+
(smul_le_smul_of_nonneg (le_of_lt bs.2) hxb)
640+
... = r
641+
: by simp only [←add_smul, hxaxb, one_smul] }
642+
end
599643

600-
lemma convex_on.convex_epigraph (hf : convex_on s f) :
601-
convex {p : E × ℝ | p.1 ∈ s ∧ f p.1 ≤ p.2} :=
644+
lemma convex_on.convex_epigraph {γ : Type*} [ordered_add_comm_group γ] [ordered_semimodule ℝ γ]
645+
{f : E → γ} (hf : convex_on s f) :
646+
convex {p : E × γ | p.1 ∈ s ∧ f p.1 ≤ p.2} :=
602647
begin
603648
rintros ⟨x, r⟩ ⟨y, t⟩ ⟨hx, hr⟩ ⟨hy, ht⟩ a b ha hb hab,
604649
refine ⟨hf.1 hx hy ha hb hab, _⟩,
605-
calc f (a • x + b • y) ≤ a * f x + b * f y : hf.2 hx hy ha hb hab
606-
... ≤ a * r + b * t : add_le_add (mul_le_mul_of_nonneg_left hr ha)
607-
(mul_le_mul_of_nonneg_left ht hb)
650+
calc f (a • x + b • y) ≤ a f x + b f y : hf.2 hx hy ha hb hab
651+
... ≤ a r + b t : add_le_add (smul_le_smul_of_nonneg hr ha)
652+
(smul_le_smul_of_nonneg ht hb)
608653
end
609654

610-
lemma convex_on_iff_convex_epigraph : convex_on s f ↔ convex {p : E × ℝ | p.1 ∈ s ∧ f p.1 ≤ p.2} :=
655+
lemma convex_on_iff_convex_epigraph {γ : Type*} [ordered_add_comm_group γ] [ordered_semimodule ℝ γ]
656+
{f : E → γ} :
657+
convex_on s f ↔ convex {p : E × γ | p.1 ∈ s ∧ f p.1 ≤ p.2} :=
611658
begin
612659
refine ⟨convex_on.convex_epigraph, λ h, ⟨_, _⟩⟩,
613660
{ assume x y hx hy a b ha hb hab,
@@ -617,30 +664,30 @@ begin
617664
end
618665

619666
/-- If a function is convex on s, it remains convex when precomposed by an affine map -/
620-
lemma convex_on.comp_affine_map {f : F → } (g : affine_map ℝ E F) {s : set F}
667+
lemma convex_on.comp_affine_map {f : F → β} (g : affine_map ℝ E F) {s : set F}
621668
(hf : convex_on s f) : convex_on (g ⁻¹' s) (f ∘ g) :=
622669
begin
623670
refine ⟨hf.1.affine_preimage _,_⟩,
624671
intros x y xs ys a b ha hb hab,
625672
calc
626673
(f ∘ g) (a • x + b • y) = f (g (a • x + b • y)) : rfl
627674
... = f (a • (g x) + b • (g y)) : by rw [convex.combo_affine_apply hab]
628-
... ≤ a * f (g x) + b * f (g y) : hf.2 xs ys ha hb hab
629-
... = a * (f ∘ g) x + b * (f ∘ g) y : rfl
675+
... ≤ a f (g x) + b f (g y) : hf.2 xs ys ha hb hab
676+
... = a (f ∘ g) x + b (f ∘ g) y : rfl
630677
end
631678

632679
/-- If g is convex on s, so is (g ∘ f) on f ⁻¹' s for a linear f. -/
633-
lemma convex_on.comp_linear_map {g : F → } {s : set F} (hg : convex_on s g) (f : E →ₗ[ℝ] F) :
680+
lemma convex_on.comp_linear_map {g : F → β} {s : set F} (hg : convex_on s g) (f : E →ₗ[ℝ] F) :
634681
convex_on (f ⁻¹' s) (g ∘ f) :=
635682
hg.comp_affine_map f.to_affine_map
636683

637684
/-- If a function is convex on s, it remains convex after a translation. -/
638-
lemma convex_on.translate_right {f : E → } {s : set E} {a : E} (hf : convex_on s f) :
685+
lemma convex_on.translate_right {f : E → β} {s : set E} {a : E} (hf : convex_on s f) :
639686
convex_on ((λ z, a + z) ⁻¹' s) (f ∘ (λ z, a + z)) :=
640687
hf.comp_affine_map $ affine_map.const ℝ E a +ᵥ affine_map.id ℝ E
641688

642689
/-- If a function is convex on s, it remains convex after a translation. -/
643-
lemma convex_on.translate_left {f : E → } {s : set E} {a : E} (hf : convex_on s f) :
690+
lemma convex_on.translate_left {f : E → β} {s : set E} {a : E} (hf : convex_on s f) :
644691
convex_on ((λ z, a + z) ⁻¹' s) (f ∘ (λ z, z + a)) :=
645692
by simpa only [add_comm] using hf.translate_right
646693

src/analysis/convex/specific_functions.lean

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ convex_on_univ_of_deriv2_nonneg differentiable_exp (by simp)
2929
(assume x, (iter_deriv_exp 2).symm ▸ le_of_lt (exp_pos x))
3030

3131
/-- `x^n`, `n : ℕ` is convex on the whole real line whenever `n` is even -/
32-
lemma convex_on_pow_of_even {n : ℕ} (hn : n.even) : convex_on set.univ (λ x, x^n) :=
32+
lemma convex_on_pow_of_even {n : ℕ} (hn : n.even) : convex_on set.univ (λ x : ℝ, x^n) :=
3333
begin
3434
apply convex_on_univ_of_deriv2_nonneg differentiable_pow,
3535
{ simp only [deriv_pow', differentiable.mul, differentiable_const, differentiable_pow] },
@@ -41,7 +41,7 @@ begin
4141
end
4242

4343
/-- `x^n`, `n : ℕ` is convex on `[0, +∞)` for all `n` -/
44-
lemma convex_on_pow (n : ℕ) : convex_on (Ici 0) (λ x, x^n) :=
44+
lemma convex_on_pow (n : ℕ) : convex_on (Ici 0) (λ x : ℝ, x^n) :=
4545
begin
4646
apply convex_on_of_deriv2_nonneg (convex_Ici _) (continuous_pow n).continuous_on;
4747
simp only [interior_Ici, differentiable_on_pow, deriv_pow',
@@ -82,7 +82,7 @@ begin
8282
end
8383

8484
/-- `x^m`, `m : ℤ` is convex on `(0, +∞)` for all `m` -/
85-
lemma convex_on_fpow (m : ℤ) : convex_on (Ioi 0) (λ x, x^m) :=
85+
lemma convex_on_fpow (m : ℤ) : convex_on (Ioi 0) (λ x : ℝ, x^m) :=
8686
begin
8787
apply convex_on_of_deriv2_nonneg (convex_Ioi 0); try { rw [interior_Ioi] },
8888
{ exact (differentiable_on_fpow $ lt_irrefl _).continuous_on },
@@ -97,7 +97,7 @@ begin
9797
exact int_prod_range_nonneg _ _ (nat.even_bit0 1) }
9898
end
9999

100-
lemma convex_on_rpow {p : ℝ} (hp : 1 ≤ p) : convex_on (Ici 0) (λ x, x^p) :=
100+
lemma convex_on_rpow {p : ℝ} (hp : 1 ≤ p) : convex_on (Ici 0) (λ x : ℝ, x^p) :=
101101
begin
102102
have A : deriv (λ (x : ℝ), x ^ p) = λ x, p * x^(p-1), by { ext x, simp [hp] },
103103
apply convex_on_of_deriv2_nonneg (convex_Ici 0),

0 commit comments

Comments
 (0)