Skip to content

Commit c52e083

Browse files
fpvandoornJovanGerb
andcommitted
refactor: simplify dictionary in fixAbbreviation (#29352)
* Improve capitalization algorithm: you now don't have to give entries twice in `fixAbbreviation` (now `abbreviationDict`). * The new algorithm uses the fact that we can easily find the `lowerCamelCase` name from the `UpperCamelCase` name, namely by applying `String.decapitalizeSeq`. This means that both `nameDict` and `abbreviationDict` have as input `lowerCamelCase` and as output `UpperCamelCase`. * This removes the need to especially fix `NSMul`/`ZSMul` and similar. * `abbreviationDict` still needs two entries if both `snake_case` and either `camelCase` need to be translated. Co-authored-by: Jovan Gerbscheid <jovan.gerbscheid@gmail.com>
1 parent 13da915 commit c52e083

File tree

3 files changed

+166
-160
lines changed

3 files changed

+166
-160
lines changed

Mathlib/Tactic/ToAdditive/Frontend.lean

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ See the docstring of `ToAdditive.to_additive` for more information
2828
open Lean Meta Elab Command Std
2929

3030
namespace ToAdditive
31+
open ToAdditive -- currently needed to enable projection notation
3132

3233
/-- An attribute that tells `@[to_additive]` that certain arguments of this definition are not
3334
involved when using `@[to_additive]`.

Mathlib/Tactic/ToAdditive/GuessName.lean

Lines changed: 162 additions & 160 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import Mathlib.Data.String.Defs
1414
open Std
1515

1616
namespace ToAdditive
17+
open ToAdditive -- currently needed to enable projection notation
1718

1819
/-- A set of strings of names that end in a capital letter.
1920
* If the string contains a lowercase letter, the string should be split between the first occurrence
@@ -35,7 +36,7 @@ open String in
3536
3637
E.g. `#eval "InvHMulLEConjugate₂SMul_ne_top".splitCase` yields
3738
`["Inv", "HMul", "LE", "Conjugate₂", "SMul", "_", "ne", "_", "top"]`. -/
38-
partial def _root_.String.splitCase (s : String) (i₀ : Pos.Raw := 0) (r : List String := []) :
39+
partial def String.splitCase (s : String) (i₀ : Pos.Raw := 0) (r : List String := []) :
3940
List String := Id.run do
4041
-- We test if we need to split between `i₀` and `i₁`.
4142
let i₁ := i₀.next s
@@ -61,183 +62,185 @@ partial def _root_.String.splitCase (s : String) (i₀ : Pos.Raw := 0) (r : List
6162
(String.Pos.Raw.extract s 0 i₁)::r
6263
return splitCase s i₁ r
6364

64-
/-- Helper for `capitalizeLike`. -/
65-
partial def capitalizeLikeAux (s : String) (i : String.Pos.Raw := 0) (p : String) : String :=
66-
if i.atEnd p || i.atEnd s then
67-
p
65+
/-- Replaces characters in `s` by lower-casing the first characters until a non-upper-case character
66+
is found. -/
67+
partial def String.decapitalizeSeq (s : String) (i : String.Pos.Raw := 0) : String :=
68+
if i.atEnd s || !(i.get s).isUpper then
69+
s
6870
else
69-
let j := i.next p
70-
if (i.get s).isLower then
71-
capitalizeLikeAux s j <| i.set p (i.get p |>.toLower)
72-
else if (i.get s).isUpper then
73-
capitalizeLikeAux s j <| i.set p (i.get p |>.toUpper)
74-
else
75-
capitalizeLikeAux s j p
71+
decapitalizeSeq (i.set s (i.get s).toLower) <| i.next s
7672

77-
/-- Capitalizes `s` char-by-char like `r`. If `s` is longer, it leaves the tail untouched. -/
78-
def capitalizeLike (r : String) (s : String) :=
79-
capitalizeLikeAux r 0 s
73+
/-- If `r` starts with an upper-case letter, return `s`, otherwise return `s` with the
74+
initial sequence of upper-case letters lower-cased. -/
75+
def decapitalizeLike (r : String) (s : String) :=
76+
if String.Pos.Raw.get r 0 |>.isUpper then s else s.decapitalizeSeq
8077

81-
/-- Capitalize First element of a list like `s`.
82-
Note that we need to capitalize multiple characters in some cases,
83-
in examples like `HMul` or `hAdd`. -/
84-
def capitalizeFirstLike (s : String) : List String → List String
85-
| x :: r => capitalizeLike s x :: r
78+
/-- Decapitalize the first element of a list if `s` starts with a lower-case letter.
79+
Note that we need to decapitalize multiple characters in some cases,
80+
in examples like `HMul` or `HAdd`. -/
81+
def decapitalizeFirstLike (s : String) : List String → List String
82+
| x :: r => decapitalizeLike s x :: r
8683
| [] => []
8784

8885
/--
8986
Dictionary used by `guessName` to autogenerate names.
87+
This only transforms single name components, unlike `abbreviationDict`.
9088
91-
Note: `guessName` capitalizes first element of the output according to
92-
capitalization of the input. Input and first element should therefore be lower-case,
93-
2nd element should be capitalized properly.
89+
Note: `guessName` capitalizes the output according to the capitalization of the input.
90+
In order for this to work, the input should always start with a lower case letter, and the output
91+
should always start with an upper case letter.
9492
-/
95-
def nameDict : String → List String
96-
| "one" => ["zero"]
97-
| "mul" => ["add"]
98-
| "smul" => ["vadd"]
99-
| "inv" => ["neg"]
100-
| "div" => ["sub"]
101-
| "prod" => ["sum"]
102-
| "hmul" => ["hadd"]
103-
| "hsmul" => ["hvadd"]
104-
| "hdiv" => ["hsub"]
105-
| "hpow" => ["hsmul"]
106-
| "finprod" => ["finsum"]
107-
| "tprod" => ["tsum"]
108-
| "pow" => ["nsmul"]
109-
| "npow" => ["nsmul"]
110-
| "zpow" => ["zsmul"]
111-
| "mabs" => ["abs"]
112-
| "monoid" => ["add", "Monoid"]
113-
| "submonoid" => ["add", "Submonoid"]
114-
| "group" => ["add", "Group"]
115-
| "subgroup" => ["add", "Subgroup"]
116-
| "semigroup" => ["add", "Semigroup"]
117-
| "magma" => ["add", "Magma"]
118-
| "haar" => ["add", "Haar"]
119-
| "prehaar" => ["add", "Prehaar"]
120-
| "unit" => ["add", "Unit"]
121-
| "units" => ["add", "Units"]
122-
| "cyclic" => ["add", "Cyclic"]
123-
| "rootable" => ["divisible"]
124-
| "semigrp" => ["add", "Semigrp"]
125-
| "grp" => ["add", "Grp"]
126-
| "commute" => ["add", "Commute"]
127-
| "semiconj" => ["add", "Semiconj"]
128-
| "zpowers" => ["zmultiples"]
129-
| "powers" => ["multiples"]
130-
| "multipliable" => ["summable"]
131-
| "gpfree" => ["apfree"]
132-
| "quantale" => ["add", "Quantale"]
133-
| "square" => ["even"]
134-
| "mconv" => ["conv"]
135-
| "irreducible" => ["add", "Irreducible"]
136-
| "mlconvolution" => ["lconvolution"]
137-
| x => [x]
93+
def nameDict : Std.HashMap String (List String) := .ofList [
94+
("one", ["Zero"]),
95+
("mul", ["Add"]),
96+
("smul", ["VAdd"]),
97+
("inv", ["Neg"]),
98+
("div", ["Sub"]),
99+
("prod", ["Sum"]),
100+
("hmul", ["HAdd"]),
101+
("hsmul", ["HVAdd"]),
102+
("hdiv", ["HSub"]),
103+
("hpow", ["HSMul"]),
104+
("finprod", ["Finsum"]),
105+
("tprod", ["TSum"]),
106+
("pow", ["NSMul"]),
107+
("npow", ["NSMul"]),
108+
("zpow", ["ZSMul"]),
109+
("mabs", ["Abs"]),
110+
("monoid", ["Add", "Monoid"]),
111+
("submonoid", ["Add", "Submonoid"]),
112+
("group", ["Add", "Group"]),
113+
("subgroup", ["Add", "Subgroup"]),
114+
("semigroup", ["Add", "Semigroup"]),
115+
("magma", ["Add", "Magma"]),
116+
("haar", ["Add", "Haar"]),
117+
("prehaar", ["Add", "Prehaar"]),
118+
("unit", ["Add", "Unit"]),
119+
("units", ["Add", "Units"]),
120+
("cyclic", ["Add", "Cyclic"]),
121+
("semigrp", ["Add", "Semigrp"]),
122+
("grp", ["Add", "Grp"]),
123+
("commute", ["Add", "Commute"]),
124+
("semiconj", ["Add", "Semiconj"]),
125+
("rootable", ["Divisible"]),
126+
("zpowers", ["ZMultiples"]),
127+
("powers", ["Multiples"]),
128+
("multipliable", ["Summable"]),
129+
("gpfree", ["APFree"]),
130+
("quantale", ["Add", "Quantale"]),
131+
("square", ["Even"]),
132+
("mconv", ["Conv"]),
133+
("irreducible", ["Add", "Irreducible"]),
134+
("mlconvolution", ["LConvolution"])]
138135

139136
/--
140-
Turn each element to lower-case, apply the `nameDict` and
141-
capitalize the output like the input.
137+
Apply the `nameDict` and decapitalize the output like the input.
138+
139+
E.g.
140+
```
141+
#eval applyNameDict ["Inv", "HMul", "LE", "Conjugate₂", "SMul", "_", "ne", "_", "top"]
142+
```
143+
yields `["Neg", "HAdd", "LE", "Conjugate₂", "VAdd", "_", "ne", "_", "top"]`.
142144
-/
143145
def applyNameDict : List String → List String
144-
| x :: s => (capitalizeFirstLike x (nameDict x.toLower)) ++ applyNameDict s
146+
| x :: s =>
147+
let z := match nameDict.get? x.toLower with
148+
| some y => decapitalizeFirstLike x y
149+
| none => [x]
150+
z ++ applyNameDict s
145151
| [] => []
146152

147153
/--
148-
There are a few abbreviations we use. For example "Nonneg" instead of "ZeroLE"
149-
or "addComm" instead of "commAdd".
150-
Note: The input to this function is case sensitive!
151-
Todo: A lot of abbreviations here are manual fixes and there might be room to
152-
improve the naming logic to reduce the size of `fixAbbreviation`.
154+
We need to fix a few abbreviations after applying `nameDict`, i.e. replacing `ZeroLE` by `Nonneg`.
155+
This dictionary contains these fixes.
156+
The input should contain entries that is in `lowerCamelCase` (e.g. `ltzero`; the initial sequence
157+
of capital letters should be lower-cased) and the output should be in `UpperCamelCase`
158+
(e.g. `LTZero`).
159+
When applying the dictionary, we lower-case the output if the input was also given in lower-case.
153160
-/
154-
def fixAbbreviation : List String → List String
155-
| "is" :: "Cancel" :: "Add" :: s => "isCancelAdd" :: fixAbbreviation s
156-
| "Is" :: "Cancel" :: "Add" :: s => "IsCancelAdd" :: fixAbbreviation s
157-
| "is" :: "Left" :: "Cancel" :: "Add" :: s => "isLeftCancelAdd" :: fixAbbreviation s
158-
| "Is" :: "Left" :: "Cancel" :: "Add" :: s => "IsLeftCancelAdd" :: fixAbbreviation s
159-
| "is" :: "Right" :: "Cancel" :: "Add" :: s => "isRightCancelAdd" :: fixAbbreviation s
160-
| "Is" :: "Right" :: "Cancel" :: "Add" :: s => "IsRightCancelAdd" :: fixAbbreviation s
161-
| "cancel" :: "Add" :: s => "addCancel" :: fixAbbreviation s
162-
| "Cancel" :: "Add" :: s => "AddCancel" :: fixAbbreviation s
163-
| "left" :: "Cancel" :: "Add" :: s => "addLeftCancel" :: fixAbbreviation s
164-
| "Left" :: "Cancel" :: "Add" :: s => "AddLeftCancel" :: fixAbbreviation s
165-
| "right" :: "Cancel" :: "Add" :: s => "addRightCancel" :: fixAbbreviation s
166-
| "Right" :: "Cancel" :: "Add" :: s => "AddRightCancel" :: fixAbbreviation s
167-
| "cancel" :: "Comm" :: "Add" :: s => "addCancelComm" :: fixAbbreviation s
168-
| "Cancel" :: "Comm" :: "Add" :: s => "AddCancelComm" :: fixAbbreviation s
169-
| "comm" :: "Add" :: s => "addComm" :: fixAbbreviation s
170-
| "Comm" :: "Add" :: s => "AddComm" :: fixAbbreviation s
171-
| "Zero" :: "LE" :: s => "Nonneg" :: fixAbbreviation s
172-
| "zero" :: "_" :: "le" :: s => "nonneg" :: fixAbbreviation s
173-
| "zero" :: "LE" :: s => "nonneg" :: fixAbbreviation s
174-
| "Zero" :: "LT" :: s => "Pos" :: fixAbbreviation s
175-
| "zero" :: "_" :: "lt" :: s => "pos" :: fixAbbreviation s
176-
| "zero" :: "LT" :: s => "pos" :: fixAbbreviation s
177-
| "LE" :: "Zero" :: s => "Nonpos" :: fixAbbreviation s
178-
| "le" :: "_" :: "zero" :: s => "nonpos" :: fixAbbreviation s
179-
| "LT" :: "Zero" :: s => "Neg" :: fixAbbreviation s
180-
| "lt" :: "_" :: "zero" :: s => "neg" :: fixAbbreviation s
181-
| "Add" :: "Single" :: s => "Single" :: fixAbbreviation s
182-
| "add" :: "Single" :: s => "single" :: fixAbbreviation s
183-
| "add" :: "_" :: "single" :: s => "single" :: fixAbbreviation s
184-
| "Add" :: "Support" :: s => "Support" :: fixAbbreviation s
185-
| "add" :: "Support" :: s => "support" :: fixAbbreviation s
186-
| "add" :: "_" :: "support" :: s => "support" :: fixAbbreviation s
187-
| "Add" :: "TSupport" :: s => "TSupport" :: fixAbbreviation s
188-
| "add" :: "TSupport" :: s => "tsupport" :: fixAbbreviation s
189-
| "add" :: "_" :: "tsupport" :: s => "tsupport" :: fixAbbreviation s
190-
| "Add" :: "Indicator" :: s => "Indicator" :: fixAbbreviation s
191-
| "add" :: "Indicator" :: s => "indicator" :: fixAbbreviation s
192-
| "add" :: "_" :: "indicator" :: s => "indicator" :: fixAbbreviation s
193-
| "is" :: "Even" :: s => "even" :: fixAbbreviation s
194-
| "Is" :: "Even" :: s => "Even" :: fixAbbreviation s
161+
def abbreviationDict : Std.HashMap String String := .ofList [
162+
("isCancelAdd", "IsCancelAdd"),
163+
("isLeftCancelAdd", "IsLeftCancelAdd"),
164+
("isRightCancelAdd", "IsRightCancelAdd"),
165+
("cancelAdd", "AddCancel"),
166+
("leftCancelAdd", "AddLeftCancel"),
167+
("rightCancelAdd", "AddRightCancel"),
168+
("cancelCommAdd", "AddCancelComm"),
169+
("commAdd", "AddComm"),
170+
("zero_le", "Nonneg"),
171+
("zeroLE", "Nonneg"),
172+
("zero_lt", "Pos"),
173+
("zeroLT", "Pos"),
174+
("lezero", "Nonpos"),
175+
("le_zero", "Nonpos"),
176+
("ltzero", "Neg"),
177+
("lt_zero", "Neg"),
178+
("addSingle", "Single"),
179+
("add_single", "Single"),
180+
("addSupport", "Support"),
181+
("add_support", "Support"),
182+
("addTSupport", "TSupport"),
183+
("add_tsupport", "TSupport"),
184+
("addIndicator", "Indicator"),
185+
("add_indicator", "Indicator"),
186+
("isEven", "Even"),
195187
-- "Regular" is well-used in mathlib with various meanings (e.g. in
196188
-- measure theory) and a direct translation
197-
-- "regular" --> ["add", "Regular"] in `nameDict` above seems error-prone.
198-
| "is" :: "Regular" :: s => "isAddRegular" :: fixAbbreviation s
199-
| "Is" :: "Regular" :: s => "IsAddRegular" :: fixAbbreviation s
200-
| "is" :: "Left" :: "Regular" :: s => "isAddLeftRegular" :: fixAbbreviation s
201-
| "Is" :: "Left" :: "Regular" :: s => "IsAddLeftRegular" :: fixAbbreviation s
202-
| "is" :: "Right" :: "Regular" :: s => "isAddRightRegular" :: fixAbbreviation s
203-
| "Is" :: "Right" :: "Regular" :: s => "IsAddRightRegular" :: fixAbbreviation s
204-
| "Has" :: "Fundamental" :: "Domain" :: s => "HasAddFundamentalDomain" :: fixAbbreviation s
205-
| "has" :: "Fundamental" :: "Domain" :: s => "hasAddFundamentalDomain" :: fixAbbreviation s
206-
| "Quotient" :: "Measure" :: s => "AddQuotientMeasure" :: fixAbbreviation s
207-
| "quotient" :: "Measure" :: s => "addQuotientMeasure" :: fixAbbreviation s
208-
-- the capitalization heuristic of `applyNameDict` doesn't work in the following cases
209-
| "HSmul" :: s => "HSMul" :: fixAbbreviation s -- from `HPow`
210-
| "NSmul" :: s => "NSMul" :: fixAbbreviation s -- from `NPow`
211-
| "Nsmul" :: s => "NSMul" :: fixAbbreviation s -- from `Pow`
212-
| "ZSmul" :: s => "ZSMul" :: fixAbbreviation s -- from `ZPow`
213-
| "neg" :: "Fun" :: s => "invFun" :: fixAbbreviation s
214-
| "Neg" :: "Fun" :: s => "InvFun" :: fixAbbreviation s
215-
| "unique" :: "Prods" :: s => "uniqueSums" :: fixAbbreviation s
216-
| "Unique" :: "Prods" :: s => "UniqueSums" :: fixAbbreviation s
217-
| "order" :: "Of" :: s => "addOrderOf" :: fixAbbreviation s
218-
| "Order" :: "Of" :: s => "AddOrderOf" :: fixAbbreviation s
219-
| "is"::"Of"::"Fin"::"Order"::s => "isOfFinAddOrder" :: fixAbbreviation s
220-
| "Is"::"Of"::"Fin"::"Order"::s => "IsOfFinAddOrder" :: fixAbbreviation s
221-
| "is" :: "Central" :: "Scalar" :: s => "isCentralVAdd" :: fixAbbreviation s
222-
| "Is" :: "Central" :: "Scalar" :: s => "IsCentralVAdd" :: fixAbbreviation s
223-
| "is" :: "Scalar" :: "Tower" :: s => "vaddAssocClass" :: fixAbbreviation s
224-
| "Is" :: "Scalar" :: "Tower" :: s => "VAddAssocClass" :: fixAbbreviation s
225-
| "function" :: "_" :: "add" :: "Semiconj" :: s
226-
=> "function" :: "_" :: "semiconj" :: fixAbbreviation s
227-
| "function" :: "_" :: "add" :: "Commute" :: s
228-
=> "function" :: "_" :: "commute" :: fixAbbreviation s
229-
| "Zero" :: "Le" :: "Part" :: s => "PosPart" :: fixAbbreviation s
230-
| "Le" :: "Zero" :: "Part" :: s => "NegPart" :: fixAbbreviation s
231-
| "zero" :: "Le" :: "Part" :: s => "posPart" :: fixAbbreviation s
232-
| "le" :: "Zero" :: "Part" :: s => "negPart" :: fixAbbreviation s
233-
| "Division" :: "Add" :: "Monoid" :: s => "SubtractionMonoid" :: fixAbbreviation s
234-
| "division" :: "Add" :: "Monoid" :: s => "subtractionMonoid" :: fixAbbreviation s
235-
| "Sub" :: "Neg" :: "Zero" :: "Add" :: "Monoid" :: s => "SubNegZeroMonoid" :: fixAbbreviation s
236-
| "sub" :: "Neg" :: "Zero" :: "Add" :: "Monoid" :: s => "subNegZeroMonoid" :: fixAbbreviation s
237-
| "modular" :: "Character" :: s => "addModularCharacter" :: fixAbbreviation s
238-
| "Modular" :: "Character" :: s => "AddModularCharacter" :: fixAbbreviation s
239-
| x :: s => x :: fixAbbreviation s
240-
| [] => []
189+
-- "regular" --> "addRegular" in `nameDict` above seems error-prone.
190+
("isRegular", "IsAddRegular"),
191+
("isLeftRegular", "IsAddLeftRegular"),
192+
("isRightRegular", "IsAddRightRegular"),
193+
("hasFundamentalDomain", "HasAddFundamentalDomain"),
194+
("quotientMeasure", "AddQuotientMeasure"),
195+
("negFun", "InvFun"),
196+
("uniqueProds", "UniqueSums"),
197+
("orderOf", "AddOrderOf"),
198+
("zeroLePart", "PosPart"),
199+
("leZeroPart", "NegPart"),
200+
("isScalarTower", "VAddAssocClass"),
201+
("isOfFinOrder", "IsOfFinAddOrder"),
202+
("isCentralScalar", "IsCentralVAdd"),
203+
("function_addSemiconj", "Function_semiconj"),
204+
("function_addCommute", "Function_commute"),
205+
("divisionAddMonoid", "SubtractionMonoid"),
206+
("subNegZeroAddMonoid", "SubNegZeroMonoid"),
207+
("modularCharacter", "AddModularCharacter")]
208+
209+
/-- Helper for `fixAbbreviation`.
210+
Note: this function has a quadratic number of recursive calls, but is not a performance
211+
bottleneck. -/
212+
def fixAbbreviationAux : List String → List String → String
213+
| [], [] => ""
214+
| [], x::s => x ++ fixAbbreviationAux s []
215+
| pre::l, s' =>
216+
let s := s' ++ [pre]
217+
let t := String.join s
218+
/- If a name starts with upper-case, and contains an underscore, it cannot match anything in
219+
the abbreviation dictionary. This is necessary to correctly translate something like
220+
`fixAbbreviation ["eventually", "LE", "_", "one"]` to `"eventuallyLE_one"`, since otherwise the
221+
substring `LE_zero` gets replaced by `Nonpos`. -/
222+
if pre == "_" && (String.Pos.Raw.get t 0).isUpper then
223+
s[0]! ++ fixAbbreviationAux (s.drop 1 ++ l) []
224+
else match abbreviationDict.get? t.decapitalizeSeq with
225+
| some post => decapitalizeLike t post ++ fixAbbreviationAux l []
226+
| none => fixAbbreviationAux l s
227+
termination_by l s => (l.length + s.length, l.length)
228+
decreasing_by all_goals grind
229+
230+
/-- Replace substrings according to `abbreviationDict`, matching the case of the first letter.
231+
232+
Example:
233+
```
234+
#eval applyNameDict ["Mul", "Support"]
235+
```
236+
gives the preliminary translation `["Add", "Support"]`. Subsequently
237+
```
238+
#eval fixAbbreviation ["Add", "Support"]
239+
```
240+
"fixes" this translation and returns `Support`.
241+
-/
242+
def fixAbbreviation (l : List String) : String :=
243+
fixAbbreviationAux l []
241244

242245
/--
243246
Autogenerate additive name.
@@ -249,7 +252,6 @@ This runs in several steps:
249252
def guessName : String → String :=
250253
String.mapTokens '\'' <|
251254
fun s =>
252-
String.join <|
253255
fixAbbreviation <|
254256
applyNameDict <|
255257
s.splitCase

MathlibTest/toAdditive.lean

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,9 @@ run_cmd
426426
checkGuessName "instCoeOneHom" "instCoeZeroHom"
427427
checkGuessName "invFun_eq_symm" "invFun_eq_symm"
428428
checkGuessName "MulEquiv.symmInvFun" "AddEquiv.symmInvFun"
429+
checkGuessName "IsScalarTower" "VAddAssocClass"
430+
checkGuessName "isScalarTower" "vaddAssocClass"
431+
checkGuessName "eventuallyLE_one_mul_atBot" "eventuallyLE_zero_add_atBot"
429432

430433
end guessName
431434

0 commit comments

Comments
 (0)