@@ -24,14 +24,14 @@ function getCachedOrProcess(text: string, pattern: RegExp): TextPart[] {
24
24
25
25
// Process and cache
26
26
const result = processTextInternal ( text , pattern )
27
-
27
+
28
28
// LRU cache management
29
29
if ( textResultCache . size >= MAX_CACHE_SIZE ) {
30
30
// Remove oldest entry (first in map)
31
31
const firstKey = textResultCache . keys ( ) . next ( ) . value
32
32
textResultCache . delete ( firstKey )
33
33
}
34
-
34
+
35
35
textResultCache . set ( text , result )
36
36
return result
37
37
}
@@ -82,13 +82,46 @@ export function processMatch(matched: string): ProcessingResult {
82
82
if ( PatternMatchers . isOrdinal ( matched ) ) {
83
83
const ordinal = PatternExtractors . extractOrdinal ( matched )
84
84
if ( ordinal ) {
85
- logger . debug ( 'Ordinal match confirmed:' , matched , '→' , ordinal )
86
- return {
87
- modified : true ,
88
- parts : [
89
- { type : 'text' , content : ordinal . number } ,
90
- { type : 'super' , content : ordinal . suffix } ,
91
- ] ,
85
+ // Validate that the ordinal suffix is correct for the number
86
+ const num = Number . parseInt ( ordinal . number , 10 )
87
+ const lastDigit = num % 10
88
+ const lastTwoDigits = num % 100
89
+
90
+ let expectedSuffix : string
91
+ if ( lastTwoDigits >= 11 && lastTwoDigits <= 13 ) {
92
+ expectedSuffix = 'th' // 11th, 12th, 13th
93
+ }
94
+ else if ( lastDigit === 1 ) {
95
+ expectedSuffix = 'st' // 1st, 21st, 31st, etc.
96
+ }
97
+ else if ( lastDigit === 2 ) {
98
+ expectedSuffix = 'nd' // 2nd, 22nd, 32nd, etc.
99
+ }
100
+ else if ( lastDigit === 3 ) {
101
+ expectedSuffix = 'rd' // 3rd, 23rd, 33rd, etc.
102
+ }
103
+ else {
104
+ expectedSuffix = 'th' // 4th, 5th, 6th, etc.
105
+ }
106
+
107
+ // Only transform if the suffix is correct
108
+ if ( ordinal . suffix === expectedSuffix ) {
109
+ logger . debug ( 'Ordinal match confirmed:' , matched , '→' , ordinal )
110
+ return {
111
+ modified : true ,
112
+ parts : [
113
+ { type : 'text' , content : ordinal . number } ,
114
+ { type : 'super' , content : ordinal . suffix } ,
115
+ ] ,
116
+ }
117
+ }
118
+ else {
119
+ // Invalid ordinal - don't transform
120
+ logger . trace ( 'Invalid ordinal suffix:' , matched )
121
+ return {
122
+ modified : false ,
123
+ parts : [ { type : 'text' , content : matched } ] ,
124
+ }
92
125
}
93
126
}
94
127
}
@@ -108,8 +141,12 @@ export function processMatch(matched: string): ProcessingResult {
108
141
}
109
142
}
110
143
111
- // Chemical element formulas
144
+ // Chemical element formulas - exclude H1-H6 HTML headers ONLY when standalone
112
145
if ( PatternMatchers . isChemicalElement ( matched ) ) {
146
+ // Skip H1-H6 ONLY when they appear to be HTML headers (standalone)
147
+ // H2 in "H2O" should still be processed as a chemical
148
+ // We check context in the full text processing, not here
149
+
113
150
const chemical = PatternExtractors . extractChemicalElement ( matched )
114
151
if ( chemical ) {
115
152
logger . debug ( 'Chemical element match:' , matched , '→' , chemical )
@@ -173,6 +210,22 @@ function processTextInternal(text: string, pattern: RegExp): TextPart[] {
173
210
pattern . lastIndex = 0
174
211
175
212
while ( ( match = pattern . exec ( text ) ) !== null ) {
213
+ const matchedText = match [ 0 ]
214
+
215
+ // Special handling for H1-H6 patterns - skip if standalone (not followed by uppercase)
216
+ if ( / ^ H [ 1 - 6 ] $ / . test ( matchedText ) ) {
217
+ const nextCharIndex = match . index + matchedText . length
218
+ const nextChar = text [ nextCharIndex ]
219
+
220
+ if ( ! nextChar || ! / [ A - Z ] / . test ( nextChar ) ) {
221
+ // Standalone H1-H6 - skip this match entirely
222
+ logger . trace ( 'Skipping standalone H1-H6 pattern:' , matchedText )
223
+ // Move the pattern's lastIndex forward to skip this match
224
+ pattern . lastIndex = match . index + 1
225
+ continue
226
+ }
227
+ }
228
+
176
229
// Add text before match
177
230
if ( match . index > lastIndex ) {
178
231
parts . push ( {
@@ -182,7 +235,7 @@ function processTextInternal(text: string, pattern: RegExp): TextPart[] {
182
235
}
183
236
184
237
// Process the matched text
185
- const result = processMatch ( match [ 0 ] )
238
+ const result = processMatch ( matchedText )
186
239
logger . trace ( 'processMatch returned:' , result )
187
240
parts . push ( ...result . parts )
188
241
0 commit comments