feat(algebra/module): define ordered semimodules and generalize convexity of functions (#3728)

dupuisf · dupuisf · commit d4cb23728655 · 2020-08-17T15:28:52.000Z
Co-authored-by: Frédéric Dupuis &lt;31101893+dupuisf@users.noreply.github.com&gt;
diff --git a/src/algebra/module/ordered.lean b/src/algebra/module/ordered.lean
@@ -0,0 +1,73 @@
+/-
+Copyright (c) 2020 Frédéric Dupuis. All rights reserved.
+Released under Apache 2.0 license as described in the file LICENSE.
+Authors: Frédéric Dupuis
+-/
+
+import algebra.module.basic
+import algebra.ordered_group
+
+/-!
+# Ordered semimodules
+
+In this file we define
+
+* `ordered_semimodule R M` : an ordered additive commutative monoid `M` is an `ordered_semimodule`
+  over an `ordered_semiring` `R` if the scalar product respects the order relation on the
+  monoid and on the ring.
+
+## Implementation notes
+
+* We choose to define `ordered_semimodule` so that it extends `semimodule` only, as is done
+  for semimodules itself.
+* To get ordered modules and ordered vector spaces, it suffices to the replace the
+  `order_add_comm_monoid` and the `ordered_semiring` as desired.
+
+## TODO
+
+* Connect this with convex cones: show that a convex cone defines an order on the vector space
+  and vice-versa.
+
+## References
+
+* https://en.wikipedia.org/wiki/Ordered_vector_space
+
+## Tags
+
+ordered semimodule, ordered module, ordered vector space
+-/
+
+
+set_option default_priority 100 -- see Note [default priority]
+
+/--
+An ordered semimodule is an ordered additive commutative monoid
+with a partial order in which the scalar multiplication is compatible with the order.
+-/
+@[protect_proj, ancestor semimodule]
+class ordered_semimodule (R β : Type*)
+  [ordered_semiring R] [ordered_add_comm_monoid β] extends semimodule R β :=
+(smul_lt_smul_of_pos : ∀ {a b : β}, ∀ {c : R}, a < b → 0 < c → c • a < c • b)
+(lt_of_smul_lt_smul_of_nonneg : ∀ {a b : β}, ∀ {c : R}, c • a < c • b → 0 ≤ c → a < b)
+
+variable {R : Type*}
+
+instance linear_ordered_ring.to_ordered_semimodule [linear_ordered_ring R] :
+  ordered_semimodule R R :=
+{ smul_lt_smul_of_pos      := ordered_semiring.mul_lt_mul_of_pos_left,
+  lt_of_smul_lt_smul_of_nonneg  := λ _ _ _, lt_of_mul_lt_mul_left }
+
+variables {β : Type*} [ordered_semiring R] [ordered_add_comm_monoid β] [ordered_semimodule R β]
+  {a b : β} {c : R}
+
+lemma smul_lt_smul_of_pos : a < b → 0 < c → c • a < c • b := ordered_semimodule.smul_lt_smul_of_pos
+
+lemma smul_le_smul_of_nonneg (h₁ : a ≤ b) (h₂ : 0 ≤ c) : c • a ≤ c • b :=
+begin
+  by_cases H₁ : c = 0,
+  { simp [H₁, zero_smul] },
+  { by_cases H₂ : a = b,
+    { rw H₂ },
+    { exact le_of_lt
+        (smul_lt_smul_of_pos (lt_of_le_of_ne h₁ H₂) (lt_of_le_of_ne h₂ (ne.symm H₁))), } }
+end
diff --git a/src/analysis/convex/basic.lean b/src/analysis/convex/basic.lean
@@ -7,6 +7,8 @@ import data.set.intervals.ord_connected
 import data.set.intervals.image_preimage
 import data.complex.module
 import linear_algebra.affine_space.basic
+import algebra.module.ordered
+
 
 /-!
 # Convex sets and functions on real vector spaces
@@ -15,10 +17,10 @@ In a real vector space, we define the following objects and properties.
 
 * `segment x y` is the closed segment joining `x` and `y`.
 * A set `s` is `convex` if for any two points `x y ∈ s` it includes `segment x y`;
-* A function `f` is `convex_on` a set `s` if `s` is itself a convex set, and for any two points
-  `x y ∈ s` the segment joining `(x, f x)` to `(y, f y)` is (non-strictly) above the graph of `f`;
-  equivalently, `convex_on f s` means that the epigraph `{p : E × ℝ | p.1 ∈ s ∧ f p.1 ≤ p.2}`
-  is a convex set;
+* A function `f : E → β` is `convex_on` a set `s` if `s` is itself a convex set, and for any two
+  points `x y ∈ s` the segment joining `(x, f x)` to `(y, f y)` is (non-strictly) above the graph
+  of `f`; equivalently, `convex_on f s` means that the epigraph
+  `{p : E × β | p.1 ∈ s ∧ f p.1 ≤ p.2}` is a convex set;
 * Center mass of a finite set of points with prescribed weights.
 * Convex hull of a set `s` is the minimal convex set that includes `s`.
 * Standard simplex `std_simplex ι [fintype ι]` is the intersection of the positive quadrant with
@@ -27,6 +29,9 @@ In a real vector space, we define the following objects and properties.
 We also provide various equivalent versions of the definitions above, prove that some specific sets
 are convex, and prove Jensen's inequality.
 
+Note: To define convexity for functions `f : E → β`, we need `β` to be an ordered vector space,
+defined using the instance `ordered_semimodule ℝ β`.
+
 ## Notations
 
 We use the following local notations:
@@ -462,26 +467,28 @@ end sets
 
 section functions
 
+variables {β : Type*} [ordered_add_comm_monoid β] [ordered_semimodule ℝ β]
+
 local notation `[`x `, ` y `]` := segment x y
 
 /-! ### Convex functions -/
 
 /-- Convexity of functions -/
-def convex_on (s : set E) (f : E → ℝ) : Prop :=
+def convex_on (s : set E) (f : E → β) : Prop :=
   convex s ∧
   ∀ ⦃x y : E⦄, x ∈ s → y ∈ s → ∀ ⦃a b : ℝ⦄, 0 ≤ a → 0 ≤ b → a + b = 1 →
-    f (a • x + b • y) ≤ a * f x + b * f y
+    f (a • x + b • y) ≤ a • f x + b • f y
 
 lemma convex_on_id {s : set ℝ} (hs : convex s) : convex_on s id := ⟨hs, by { intros, refl }⟩
 
-lemma convex_on_const (c : ℝ) (hs : convex s) : convex_on s (λ x:E, c) :=
-⟨hs, by { intros, simp only [← add_mul, *, one_mul] }⟩
+lemma convex_on_const (c : β) (hs : convex s) : convex_on s (λ x:E, c) :=
+⟨hs, by { intros, simp only [← add_smul, *, one_smul] }⟩
 
-variables {t : set E} {f g : E → ℝ}
+variables {t : set E}
 
-lemma convex_on_iff_div:
+lemma convex_on_iff_div {f : E → β} :
   convex_on s f ↔ convex s ∧ ∀ ⦃x y : E⦄, x ∈ s → y ∈ s → ∀  ⦃a b : ℝ⦄, 0 ≤ a → 0 ≤ b → 0 < a + b →
-    f ((a/(a+b)) • x + (b/(a+b)) • y) ≤ (a/(a+b)) * f x + (b/(a+b)) * f y :=
+    f ((a/(a+b)) • x + (b/(a+b)) • y) ≤ (a/(a+b)) • f x + (b/(a+b)) • f y :=
 and_congr iff.rfl
 ⟨begin
   intros h x y hx hy a b ha hb hab,
@@ -495,19 +502,19 @@ begin
 end⟩
 
 /-- For a function on a convex set in a linear ordered space, in order to prove that it is convex
-it suffices to verify the inequality `f (a • x + b • y) ≤ a * f x + b * f y` only for `x < y`
+it suffices to verify the inequality `f (a • x + b • y) ≤ a • f x + b • f y` only for `x < y`
 and positive `a`, `b`. The main use case is `E = ℝ` however one can apply it, e.g., to `ℝ^n` with
 lexicographic order. -/
-lemma linear_order.convex_on_of_lt [linear_order E] (hs : convex s)
+lemma linear_order.convex_on_of_lt {f : E → β} [linear_order E] (hs : convex s)
   (hf : ∀ ⦃x y : E⦄, x ∈ s → y ∈ s → x < y → ∀ ⦃a b : ℝ⦄, 0 < a → 0 < b → a + b = 1 →
-    f (a • x + b • y) ≤ a * f x + b * f y) : convex_on s f :=
+    f (a • x + b • y) ≤ a • f x + b • f y) : convex_on s f :=
 begin
   use hs,
   intros x y hx hy a b ha hb hab,
   wlog hxy : x<=y using [x y a b, y x b a],
   { exact le_total _ _ },
   { cases eq_or_lt_of_le hxy with hxy hxy,
-      by { subst y, rw [← add_smul, ← add_mul, hab, one_smul, one_mul] },
+      by { subst y, rw [← add_smul, ← add_smul, hab, one_smul, one_smul] },
     cases eq_or_lt_of_le ha with ha ha,
       by { subst a, rw [zero_add] at hab, subst b, simp },
     cases eq_or_lt_of_le hb with hb hb,
@@ -544,36 +551,42 @@ begin
   convert this; symmetry; simp only [div_eq_iff (ne_of_gt B), y]; ring
 end
 
-lemma convex_on.subset (h_convex_on : convex_on t f) (h_subset : s ⊆ t) (h_convex : convex s) :
-  convex_on s f :=
+lemma convex_on.subset {f : E → β} (h_convex_on : convex_on t f)
+  (h_subset : s ⊆ t) (h_convex : convex s) : convex_on s f :=
 begin
   apply and.intro h_convex,
   intros x y hx hy,
   exact h_convex_on.2 (h_subset hx) (h_subset hy),
 end
 
-lemma convex_on.add (hf : convex_on s f) (hg : convex_on s g) : convex_on s (λx, f x + g x) :=
+lemma convex_on.add {f g : E → β} (hf : convex_on s f) (hg : convex_on s g) :
+  convex_on s (λx, f x + g x) :=
 begin
   apply and.intro hf.1,
   intros x y hx hy a b ha hb hab,
   calc
-    f (a • x + b • y) + g (a • x + b • y) ≤ (a * f x + b * f y) + (a * g x + b * g y)
+    f (a • x + b • y) + g (a • x + b • y) ≤ (a • f x + b • f y) + (a • g x + b • g y)
       : add_le_add (hf.2 hx hy ha hb hab) (hg.2 hx hy ha hb hab)
-    ... = a * f x + a * g x + b * f y + b * g y : by linarith
-    ... = a * (f x + g x) + b * (f y + g y) : by simp [mul_add, add_assoc]
+    ... = a • f x + a • g x + b • f y + b • g y : by abel
+    ... = a • (f x + g x) + b • (f y + g y) : by simp [smul_add, add_assoc]
 end
 
-lemma convex_on.smul {c : ℝ} (hc : 0 ≤ c) (hf : convex_on s f) : convex_on s (λx, c * f x) :=
+lemma convex_on.smul {f : E → β} {c : ℝ} (hc : 0 ≤ c) (hf : convex_on s f) :
+  convex_on s (λx, c • f x) :=
 begin
   apply and.intro hf.1,
   intros x y hx hy a b ha hb hab,
   calc
-    c * f (a • x + b • y) ≤ c * (a * f x + b * f y)
-      : mul_le_mul_of_nonneg_left (hf.2 hx hy ha hb hab) hc
-    ... = a * (c * f x) + b * (c * f y) : by rw mul_add; ac_refl
+    c • f (a • x + b • y) ≤ c • (a • f x + b • f y)
+      : smul_le_smul_of_nonneg (hf.2 hx hy ha hb hab) hc
+    ... = a • (c • f x) + b • (c • f y) : by simp only [smul_add, smul_comm]
 end
 
-lemma convex_on.le_on_segment' {x y : E} {a b : ℝ}
+/--
+A convex function on a segment is upper-bounded by the max of its endpoints.
+Note: This cannot be generalized to E → β because it needs a linear order.
+-/
+lemma convex_on.le_on_segment' {f : E → ℝ} {x y : E} {a b : ℝ}
   (hf : convex_on s f) (hx : x ∈ s) (hy : y ∈ s) (ha : 0 ≤ a) (hb : 0 ≤ b) (hab : a + b = 1) :
   f (a • x + b • y) ≤ max (f x) (f y) :=
 calc
@@ -582,32 +595,66 @@ calc
     add_le_add (mul_le_mul_of_nonneg_left (le_max_left _ _) ha) (mul_le_mul_of_nonneg_left (le_max_right _ _) hb)
   ... ≤ max (f x) (f y) : by rw [←add_mul, hab, one_mul]
 
-lemma convex_on.le_on_segment (hf : convex_on s f) {x y z : E}
+/--
+A convex function on a segment is upper-bounded by the max of its endpoints.
+Note: This cannot be generalized to E → β because it needs a linear order.
+-/
+lemma convex_on.le_on_segment {f : E → ℝ} (hf : convex_on s f) {x y z : E}
   (hx : x ∈ s) (hy : y ∈ s) (hz : z ∈ [x, y]) :
   f z ≤ max (f x) (f y) :=
 let ⟨a, b, ha, hb, hab, hz⟩ := hz in hz ▸ hf.le_on_segment' hx hy ha hb hab
 
-lemma convex_on.convex_le (hf : convex_on s f) (r : ℝ) : convex {x ∈ s | f x ≤ r} :=
+lemma convex_on.convex_le {f : E → β} (hf : convex_on s f) (r : β) : convex {x ∈ s | f x ≤ r} :=
 convex_iff_segment_subset.2 $ λ x y hx hy z hz,
-  ⟨hf.1.segment_subset hx.1 hy.1 hz,
-    le_trans (hf.le_on_segment hx.1 hy.1 hz) $ max_le hx.2 hy.2⟩
+begin
+  refine ⟨hf.1.segment_subset hx.1 hy.1 hz,_⟩,
+  rcases hz with ⟨za,zb,hza,hzb,hzazb,H⟩,
+  rw ←H,
+  calc
+    f (za • x + zb • y) ≤ za • (f x) + zb • (f y)   : hf.2 hx.1 hy.1 hza hzb hzazb
+                    ... ≤ za • r + zb • r
+                      : add_le_add (smul_le_smul_of_nonneg hx.2 hza)
+                                    (smul_le_smul_of_nonneg hy.2 hzb)
+                    ... ≤ r                         : by simp [←add_smul, hzazb]
+end
 
-lemma convex_on.convex_lt (hf : convex_on s f) (r : ℝ) : convex {x ∈ s | f x < r} :=
-convex_iff_segment_subset.2 $ λ x y hx hy z hz,
-  ⟨hf.1.segment_subset hx.1 hy.1 hz,
-    lt_of_le_of_lt (hf.le_on_segment hx.1 hy.1 hz) $ max_lt hx.2 hy.2⟩
+lemma convex_on.convex_lt {γ : Type*} [ordered_cancel_add_comm_monoid γ] [ordered_semimodule ℝ γ]
+  {f : E → γ} (hf : convex_on s f) (r : γ) : convex {x ∈ s | f x < r} :=
+begin
+  intros a b as bs xa xb hxa hxb hxaxb,
+  refine ⟨hf.1 as.1 bs.1 hxa hxb hxaxb,_⟩,
+  dsimp,
+  by_cases H : xa = 0,
+  { have H' : xb = 1 := by rwa [H, zero_add] at hxaxb,
+    rw [H, H', zero_smul, one_smul, zero_add],
+    exact bs.2 },
+  { calc
+      f (xa • a + xb • b) ≤ xa • (f a) + xb • (f b)       : hf.2 as.1 bs.1 hxa hxb hxaxb
+                      ... < xa • r + xb • (f b)
+                        : (add_lt_add_iff_right (xb • (f b))).mpr
+                          (smul_lt_smul_of_pos as.2
+                            (lt_of_le_of_ne hxa (ne.symm H)))
+                      ... ≤ xa • r + xb • r
+                        : (add_le_add_iff_left (xa • r)).mpr
+                          (smul_le_smul_of_nonneg (le_of_lt bs.2) hxb)
+                      ... = r
+                        : by simp only [←add_smul, hxaxb, one_smul] }
+end
 
-lemma convex_on.convex_epigraph (hf : convex_on s f) :
-  convex {p : E × ℝ | p.1 ∈ s ∧ f p.1 ≤ p.2} :=
+lemma convex_on.convex_epigraph {γ : Type*} [ordered_add_comm_group γ] [ordered_semimodule ℝ γ]
+  {f : E → γ} (hf : convex_on s f) :
+  convex {p : E × γ | p.1 ∈ s ∧ f p.1 ≤ p.2} :=
 begin
   rintros ⟨x, r⟩ ⟨y, t⟩ ⟨hx, hr⟩ ⟨hy, ht⟩ a b ha hb hab,
   refine ⟨hf.1 hx hy ha hb hab, _⟩,
-  calc f (a • x + b • y) ≤ a * f x + b * f y : hf.2 hx hy ha hb hab
-  ... ≤ a * r + b * t : add_le_add (mul_le_mul_of_nonneg_left hr ha)
-    (mul_le_mul_of_nonneg_left ht hb)
+  calc f (a • x + b • y) ≤ a • f x + b • f y : hf.2 hx hy ha hb hab
+  ... ≤ a • r + b • t : add_le_add (smul_le_smul_of_nonneg hr ha)
+                            (smul_le_smul_of_nonneg ht hb)
 end
 
-lemma convex_on_iff_convex_epigraph : convex_on s f ↔ convex {p : E × ℝ | p.1 ∈ s ∧ f p.1 ≤ p.2} :=
+lemma convex_on_iff_convex_epigraph {γ : Type*} [ordered_add_comm_group γ] [ordered_semimodule ℝ γ]
+  {f : E → γ} :
+  convex_on s f ↔ convex {p : E × γ | p.1 ∈ s ∧ f p.1 ≤ p.2} :=
 begin
   refine ⟨convex_on.convex_epigraph, λ h, ⟨_, _⟩⟩,
   { assume x y hx hy a b ha hb hab,
@@ -617,30 +664,30 @@ begin
 end
 
 /-- If a function is convex on s, it remains convex when precomposed by an affine map -/
-lemma convex_on.comp_affine_map {f : F → ℝ} (g : affine_map ℝ E F) {s : set F}
+lemma convex_on.comp_affine_map {f : F → β} (g : affine_map ℝ E F) {s : set F}
   (hf : convex_on s f) : convex_on (g ⁻¹' s) (f ∘ g) :=
 begin
   refine ⟨hf.1.affine_preimage  _,_⟩,
   intros x y xs ys a b ha hb hab,
   calc
     (f ∘ g) (a • x + b • y) = f (g (a • x + b • y))         : rfl
                        ...  = f (a • (g x) + b • (g y))     : by rw [convex.combo_affine_apply hab]
-                       ...  ≤ a * f (g x) + b * f (g y)     : hf.2 xs ys ha hb hab
-                       ...  = a * (f ∘ g) x + b * (f ∘ g) y  : rfl
+                       ...  ≤ a • f (g x) + b • f (g y)     : hf.2 xs ys ha hb hab
+                       ...  = a • (f ∘ g) x + b • (f ∘ g) y  : rfl
 end
 
 /-- If g is convex on s, so is (g ∘ f) on f ⁻¹' s for a linear f. -/
-lemma convex_on.comp_linear_map {g : F → ℝ} {s : set F} (hg : convex_on s g) (f : E →ₗ[ℝ] F) :
+lemma convex_on.comp_linear_map {g : F → β} {s : set F} (hg : convex_on s g) (f : E →ₗ[ℝ] F) :
   convex_on (f ⁻¹' s) (g ∘ f) :=
 hg.comp_affine_map f.to_affine_map
 
 /-- If a function is convex on s, it remains convex after a translation. -/
-lemma convex_on.translate_right {f : E → ℝ} {s : set E} {a : E} (hf : convex_on s f) :
+lemma convex_on.translate_right {f : E → β} {s : set E} {a : E} (hf : convex_on s f) :
   convex_on ((λ z, a + z) ⁻¹' s) (f ∘ (λ z, a + z)) :=
 hf.comp_affine_map $ affine_map.const ℝ E a +ᵥ affine_map.id ℝ E
 
 /-- If a function is convex on s, it remains convex after a translation. -/
-lemma convex_on.translate_left {f : E → ℝ} {s : set E} {a : E} (hf : convex_on s f) :
+lemma convex_on.translate_left {f : E → β} {s : set E} {a : E} (hf : convex_on s f) :
   convex_on ((λ z, a + z) ⁻¹' s) (f ∘ (λ z, z + a)) :=
 by simpa only [add_comm] using  hf.translate_right
 
diff --git a/src/analysis/convex/specific_functions.lean b/src/analysis/convex/specific_functions.lean
@@ -29,7 +29,7 @@ convex_on_univ_of_deriv2_nonneg differentiable_exp (by simp)
   (assume x, (iter_deriv_exp 2).symm ▸ le_of_lt (exp_pos x))
 
 /-- `x^n`, `n : ℕ` is convex on the whole real line whenever `n` is even -/
-lemma convex_on_pow_of_even {n : ℕ} (hn : n.even) : convex_on set.univ (λ x, x^n) :=
+lemma convex_on_pow_of_even {n : ℕ} (hn : n.even) : convex_on set.univ (λ x : ℝ, x^n) :=
 begin
   apply convex_on_univ_of_deriv2_nonneg differentiable_pow,
   { simp only [deriv_pow', differentiable.mul, differentiable_const, differentiable_pow] },
@@ -41,7 +41,7 @@ begin
 end
 
 /-- `x^n`, `n : ℕ` is convex on `[0, +∞)` for all `n` -/
-lemma convex_on_pow (n : ℕ) : convex_on (Ici 0) (λ x, x^n) :=
+lemma convex_on_pow (n : ℕ) : convex_on (Ici 0) (λ x : ℝ, x^n) :=
 begin
   apply convex_on_of_deriv2_nonneg (convex_Ici _) (continuous_pow n).continuous_on;
     simp only [interior_Ici, differentiable_on_pow, deriv_pow',
@@ -82,7 +82,7 @@ begin
 end
 
 /-- `x^m`, `m : ℤ` is convex on `(0, +∞)` for all `m` -/
-lemma convex_on_fpow (m : ℤ) : convex_on (Ioi 0) (λ x, x^m) :=
+lemma convex_on_fpow (m : ℤ) : convex_on (Ioi 0) (λ x : ℝ, x^m) :=
 begin
   apply convex_on_of_deriv2_nonneg (convex_Ioi 0); try { rw [interior_Ioi] },
   { exact (differentiable_on_fpow $ lt_irrefl _).continuous_on },
@@ -97,7 +97,7 @@ begin
     exact int_prod_range_nonneg _ _ (nat.even_bit0 1) }
 end
 
-lemma convex_on_rpow {p : ℝ} (hp : 1 ≤ p) : convex_on (Ici 0) (λ x, x^p) :=
+lemma convex_on_rpow {p : ℝ} (hp : 1 ≤ p) : convex_on (Ici 0) (λ x : ℝ, x^p) :=
 begin
   have A : deriv (λ (x : ℝ), x ^ p) = λ x, p * x^(p-1), by { ext x, simp [hp] },
   apply convex_on_of_deriv2_nonneg (convex_Ici 0),