@@ -34,6 +34,7 @@ type SpecialCasingEntry = {
3434 codePoint : number ;
3535 lower : number [ ] ;
3636 upper : number [ ] ;
37+ conditionalLower : number [ ] ;
3738 condition : string ;
3839} ;
3940
@@ -47,6 +48,11 @@ async function loadMapping(property: string): Promise<Map<number, number[]>> {
4748 return module . default as Map < number , number [ ] > ;
4849}
4950
51+ async function loadSimpleMapping ( property : string ) : Promise < Map < number , number >> {
52+ const module = await import ( `${ PACKAGE } /${ property } /code-points.js` ) ;
53+ return module . default as Map < number , number> ;
54+ }
55+
5056// Group a sorted, de-duplicated run of code points into ranges sharing a
5157// constant stride. The stride of each range is taken from the gap to the next
5258// code point, so it never includes a code point that is not in the set; this
@@ -120,7 +126,7 @@ ${r32}
120126` ;
121127}
122128
123- async function buildSpecialCasing ( ) : Promise < SpecialCasingEntry [ ] > {
129+ async function buildSpecialCasing ( simpleLowercase : Map < number , number > , simpleUppercase : Map < number , number > ) : Promise < SpecialCasingEntry [ ] > {
124130 // The unconditional, locale-insensitive multi-rune mappings. Each map keys a
125131 // code point to its full lower/upper expansion (identity when unchanged).
126132 const lowerMappings = await loadMapping ( "Special_Casing/Lowercase" ) ;
@@ -131,39 +137,52 @@ async function buildSpecialCasing(): Promise<SpecialCasingEntry[]> {
131137
132138 const entries : SpecialCasingEntry [ ] = [ ] ;
133139
134- const codePoints = new Set ( [ ...lowerMappings . keys ( ) , ...upperMappings . keys ( ) ] ) ;
140+ const codePoints = new Set ( [ ...simpleLowercase . keys ( ) , ... simpleUppercase . keys ( ) , ... lowerMappings . keys ( ) , ...upperMappings . keys ( ) ] ) ;
135141 for ( const codePoint of codePoints ) {
136142 entries . push ( {
137143 codePoint,
138- lower : lowerMappings . get ( codePoint ) ?? [ codePoint ] ,
139- upper : upperMappings . get ( codePoint ) ?? [ codePoint ] ,
144+ lower : lowerMappings . get ( codePoint ) ?? [ simpleLowercase . get ( codePoint ) ?? codePoint ] ,
145+ upper : upperMappings . get ( codePoint ) ?? [ simpleUppercase . get ( codePoint ) ?? codePoint ] ,
146+ conditionalLower : [ codePoint ] ,
140147 condition : "specialCasingConditionNone" ,
141148 } ) ;
142149 }
143150
144151 for ( const [ codePoint , lower ] of finalSigmaMappings ) {
145- entries . push ( {
146- codePoint,
147- lower,
148- upper : upperMappings . get ( codePoint ) ?? [ codePoint ] ,
149- condition : "specialCasingConditionFinalSigma" ,
150- } ) ;
152+ const entry = entries . find ( entry => entry . codePoint === codePoint ) ;
153+ if ( entry === undefined ) {
154+ entries . push ( {
155+ codePoint,
156+ lower : [ simpleLowercase . get ( codePoint ) ?? codePoint ] ,
157+ upper : upperMappings . get ( codePoint ) ?? [ simpleUppercase . get ( codePoint ) ?? codePoint ] ,
158+ conditionalLower : lower ,
159+ condition : "specialCasingConditionFinalSigma" ,
160+ } ) ;
161+ }
162+ else {
163+ entry . conditionalLower = lower ;
164+ entry . condition = "specialCasingConditionFinalSigma" ;
165+ }
151166 }
152167
153168 entries . sort ( ( a , b ) => a . codePoint - b . codePoint ) ;
154169 return entries ;
155170}
156171
157172function renderCaseFile ( entries : SpecialCasingEntry [ ] , casedTable : RangeTable , caseIgnorableTable : RangeTable ) : string {
158- const mappings = entries . map ( entry => `\t${ goRuneLiteral ( entry . codePoint ) } : {lower: ${ goStringLiteral ( entry . lower ) } , upper: ${ goStringLiteral ( entry . upper ) } , condition: ${ entry . condition } },` ) . join ( "\n" ) ;
173+ const mappings = entries . map ( entry => {
174+ const conditionalLower = entry . condition === "specialCasingConditionFinalSigma" ? `, conditionalLower: ${ goStringLiteral ( entry . conditionalLower ) } ` : "" ;
175+ return `\t${ goRuneLiteral ( entry . codePoint ) } : {lower: ${ goStringLiteral ( entry . lower ) } , upper: ${ goStringLiteral ( entry . upper ) } ${ conditionalLower } , condition: ${ entry . condition } },` ;
176+ } ) . join ( "\n" ) ;
159177
160178 return `// Code generated by generate-unicode-data.mts. DO NOT EDIT.
161179// Derived from the ${ PACKAGE } package (Unicode ${ UNICODE_VERSION } ).
162180// Includes only the locale-insensitive multi-rune mappings needed for ECMAScript
163181// default casing, plus the Final_Sigma context mapping. String.prototype.toLowerCase
164182// applies Final_Sigma, but Go's unicode package does not, so the caser applies it
165- // from this data when in context. Go's unicode package handles the simple one-rune
166- // mappings, so those are omitted here.
183+ // from this data when in context. Simple one-rune mappings are included here too
184+ // so casing stays pinned to this Unicode version, rather than the Go toolchain's
185+ // unicode tables.
167186
168187package stringutil
169188
@@ -177,9 +196,10 @@ const (
177196)
178197
179198type specialCasingMapping struct {
180- \tlower string
181- \tupper string
182- \tcondition specialCasingCondition
199+ \tlower string
200+ \tupper string
201+ \tconditionalLower string
202+ \tcondition specialCasingCondition
183203}
184204
185205var specialCasingMappings = map[rune]specialCasingMapping{
@@ -209,7 +229,9 @@ ${renderRangeTable("unicodeESNextIdentifierPart", partTable)}
209229}
210230
211231async function main ( ) {
212- const entries = await buildSpecialCasing ( ) ;
232+ const simpleLowercase = await loadSimpleMapping ( "Simple_Case_Mapping/Lowercase" ) ;
233+ const simpleUppercase = await loadSimpleMapping ( "Simple_Case_Mapping/Uppercase" ) ;
234+ const entries = await buildSpecialCasing ( simpleLowercase , simpleUppercase ) ;
213235 const casedTable = toRangeTable ( await loadCodePoints ( "Binary_Property/Cased" ) ) ;
214236 const caseIgnorableTable = toRangeTable ( await loadCodePoints ( "Binary_Property/Case_Ignorable" ) ) ;
215237 fs . writeFileSync ( CASE_OUTPUT_PATH , renderCaseFile ( entries , casedTable , caseIgnorableTable ) ) ;
0 commit comments