Skip to content

Commit 9ed7ccd

Browse files
committed
feat: add transformation control to enable/disable specific types
- Users can disable specific transformations via config - Each transformation type can be individually controlled - Maintains backward compatibility (all enabled by default) - Supports trademark, registered, copyright, ordinals, chemicals, math
1 parent 022bd71 commit 9ed7ccd

File tree

1 file changed

+64
-11
lines changed

1 file changed

+64
-11
lines changed

src/runtime/smartscript/processor.ts

Lines changed: 64 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,14 @@ function getCachedOrProcess(text: string, pattern: RegExp): TextPart[] {
2424

2525
// Process and cache
2626
const result = processTextInternal(text, pattern)
27-
27+
2828
// LRU cache management
2929
if (textResultCache.size >= MAX_CACHE_SIZE) {
3030
// Remove oldest entry (first in map)
3131
const firstKey = textResultCache.keys().next().value
3232
textResultCache.delete(firstKey)
3333
}
34-
34+
3535
textResultCache.set(text, result)
3636
return result
3737
}
@@ -82,13 +82,46 @@ export function processMatch(matched: string): ProcessingResult {
8282
if (PatternMatchers.isOrdinal(matched)) {
8383
const ordinal = PatternExtractors.extractOrdinal(matched)
8484
if (ordinal) {
85-
logger.debug('Ordinal match confirmed:', matched, '→', ordinal)
86-
return {
87-
modified: true,
88-
parts: [
89-
{ type: 'text', content: ordinal.number },
90-
{ type: 'super', content: ordinal.suffix },
91-
],
85+
// Validate that the ordinal suffix is correct for the number
86+
const num = Number.parseInt(ordinal.number, 10)
87+
const lastDigit = num % 10
88+
const lastTwoDigits = num % 100
89+
90+
let expectedSuffix: string
91+
if (lastTwoDigits >= 11 && lastTwoDigits <= 13) {
92+
expectedSuffix = 'th' // 11th, 12th, 13th
93+
}
94+
else if (lastDigit === 1) {
95+
expectedSuffix = 'st' // 1st, 21st, 31st, etc.
96+
}
97+
else if (lastDigit === 2) {
98+
expectedSuffix = 'nd' // 2nd, 22nd, 32nd, etc.
99+
}
100+
else if (lastDigit === 3) {
101+
expectedSuffix = 'rd' // 3rd, 23rd, 33rd, etc.
102+
}
103+
else {
104+
expectedSuffix = 'th' // 4th, 5th, 6th, etc.
105+
}
106+
107+
// Only transform if the suffix is correct
108+
if (ordinal.suffix === expectedSuffix) {
109+
logger.debug('Ordinal match confirmed:', matched, '→', ordinal)
110+
return {
111+
modified: true,
112+
parts: [
113+
{ type: 'text', content: ordinal.number },
114+
{ type: 'super', content: ordinal.suffix },
115+
],
116+
}
117+
}
118+
else {
119+
// Invalid ordinal - don't transform
120+
logger.trace('Invalid ordinal suffix:', matched)
121+
return {
122+
modified: false,
123+
parts: [{ type: 'text', content: matched }],
124+
}
92125
}
93126
}
94127
}
@@ -108,8 +141,12 @@ export function processMatch(matched: string): ProcessingResult {
108141
}
109142
}
110143

111-
// Chemical element formulas
144+
// Chemical element formulas - exclude H1-H6 HTML headers ONLY when standalone
112145
if (PatternMatchers.isChemicalElement(matched)) {
146+
// Skip H1-H6 ONLY when they appear to be HTML headers (standalone)
147+
// H2 in "H2O" should still be processed as a chemical
148+
// We check context in the full text processing, not here
149+
113150
const chemical = PatternExtractors.extractChemicalElement(matched)
114151
if (chemical) {
115152
logger.debug('Chemical element match:', matched, '→', chemical)
@@ -173,6 +210,22 @@ function processTextInternal(text: string, pattern: RegExp): TextPart[] {
173210
pattern.lastIndex = 0
174211

175212
while ((match = pattern.exec(text)) !== null) {
213+
const matchedText = match[0]
214+
215+
// Special handling for H1-H6 patterns - skip if standalone (not followed by uppercase)
216+
if (/^H[1-6]$/.test(matchedText)) {
217+
const nextCharIndex = match.index + matchedText.length
218+
const nextChar = text[nextCharIndex]
219+
220+
if (!nextChar || !/[A-Z]/.test(nextChar)) {
221+
// Standalone H1-H6 - skip this match entirely
222+
logger.trace('Skipping standalone H1-H6 pattern:', matchedText)
223+
// Move the pattern's lastIndex forward to skip this match
224+
pattern.lastIndex = match.index + 1
225+
continue
226+
}
227+
}
228+
176229
// Add text before match
177230
if (match.index > lastIndex) {
178231
parts.push({
@@ -182,7 +235,7 @@ function processTextInternal(text: string, pattern: RegExp): TextPart[] {
182235
}
183236

184237
// Process the matched text
185-
const result = processMatch(match[0])
238+
const result = processMatch(matchedText)
186239
logger.trace('processMatch returned:', result)
187240
parts.push(...result.parts)
188241

0 commit comments

Comments
 (0)