Skip to content

Commit

Permalink
🐛 fix valid Unicode Properties (fixes #6)
Browse files Browse the repository at this point in the history
  • Loading branch information
mysticatea committed Jul 23, 2019
1 parent 00abeab commit 2bd358f
Show file tree
Hide file tree
Showing 6 changed files with 418 additions and 478 deletions.
6 changes: 5 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,13 @@
"devDependencies": {
"@mysticatea/eslint-plugin": "^11.0.0",
"@types/eslint": "^4.16.2",
"@types/jsdom": "^12.2.4",
"@types/mocha": "^5.2.2",
"@types/node": "^12.6.8",
"coveralls": "^3.0.1",
"dts-bundle": "^0.7.3",
"eslint": "^6.1.0",
"jsdom": "^15.1.1",
"mocha": "^6.2.0",
"npm-run-all": "^4.1.5",
"nyc": "^14.1.1",
Expand All @@ -40,7 +42,9 @@
"pretest": "run-s build lint",
"test": "nyc _mocha \"test/*.ts\" --reporter dot --timeout 10000",
"update:test": "ts-node scripts/update-fixtures.ts",
"update:ids": "ts-node scripts/update-unicode-ids.ts",
"update:unicode": "run-s update:unicode:*",
"update:unicode:ids": "ts-node scripts/update-unicode-ids.ts",
"update:unicode:props": "ts-node scripts/update-unicode-properties.ts",
"preversion": "npm test",
"version": "npm run -s build",
"postversion": "git push && git push --tags",
Expand Down
213 changes: 213 additions & 0 deletions scripts/update-unicode-properties.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
import fs from "fs"
import { JSDOM, DOMWindow } from "jsdom"
import { CLIEngine } from "eslint"

const DataSources = [
{
url: "https://www.ecma-international.org/ecma-262/9.0/",
version: 2018,
binProperties: "#table-binary-unicode-properties",
gcValues: "#table-unicode-general-category-values",
scValues: "#table-unicode-script-values",
},
{
url: "https://www.ecma-international.org/ecma-262/10.0/",
version: 2019,
binProperties: "#table-binary-unicode-properties",
gcValues: "#table-unicode-general-category-values",
scValues: "#table-unicode-script-values",
},
{
url: "https://tc39.es/ecma262/",
version: 2020,
binProperties: "#table-binary-unicode-properties",
gcValues: "#table-unicode-general-category-values",
scValues: "#table-unicode-script-values",
},
]
const FILE_PATH = "src/unicode/properties.ts"
const logger = console

type Datum = {
binProperties: string[]
gcValues: string[]
scValues: string[]
}

// Main
;(async () => {
const data: Record<number, Datum> = Object.create(null)
const existing = {
binProperties: new Set<string>(),
gcValues: new Set<string>(),
scValues: new Set<string>(),
}

for (const {
binProperties,
gcValues,
scValues,
url,
version,
} of DataSources) {
logger.log("---- ECMAScript %d ----", version)
const datum: Datum = {
binProperties: [],
gcValues: [],
scValues: [],
}
data[version] = datum

let window: DOMWindow | null = null
do {
try {
logger.log("Fetching data from %o", url)
;({ window } = await JSDOM.fromURL(url))
} catch (error) {
if (!error || error.message !== "Error: socket hang up") {
throw error
}
logger.log("Failed: %s", error)
await new Promise(resolve => setTimeout(resolve, 2000))
}
} while (window == null)

logger.log("Parsing tables")
datum.binProperties = collectValues(
window,
binProperties,
existing.binProperties,
)
datum.gcValues = collectValues(window, gcValues, existing.gcValues)
datum.scValues = collectValues(window, scValues, existing.scValues)

logger.log("Done")
}

logger.log("Generating code...")
let code = `/* This file was generated with ECMAScript specifications. */
const gcNamePattern = /^(?:General_Category|gc)$/u
const scNamePattern = /^(?:Script(?:_Extensions)?|scx?)$/u
const gcValuePatterns = {
${Array.from(
Object.keys(data),
version => `es${version}: null as RegExp | null,`,
).join("\n")}
}
const scValuePatterns = {
${Array.from(
Object.keys(data),
version => `es${version}: null as RegExp | null,`,
).join("\n")}
}
const binPropertyPatterns = {
${Array.from(
Object.keys(data),
version => `es${version}: null as RegExp | null,`,
).join("\n")}
}
export function isValidUnicodeProperty(version: number, name: string, value: string): boolean {
if (gcNamePattern.test(name)) {
${Array.from(Object.entries(data), ([version, { gcValues }]) =>
makeVerificationCode(version, "gcValuePatterns", gcValues, 52),
).join("\n")}
}
if (scNamePattern.test(name)) {
${Array.from(Object.entries(data), ([version, { scValues }]) =>
makeVerificationCode(version, "scValuePatterns", scValues, 52),
).join("\n")}
}
return false
}
export function isValidLoneUnicodeProperty(version: number, value: string): boolean {
${Array.from(Object.entries(data), ([version, { binProperties }]) =>
makeVerificationCode(version, "binPropertyPatterns", binProperties, 56),
).join("\n")}
return false
}
`

logger.log("Formatting code...")
const engine = new CLIEngine({ fix: true })
const result = engine.executeOnText(code, "properties.ts").results[0]
code = result.output || code

logger.log("Writing '%s'...", FILE_PATH)
await save(code)

logger.log("Completed!")
})().catch(error => {
logger.error(error.stack)
process.exitCode = 1
})

function collectValues(
window: Window,
id: string,
existingSet: Set<string>,
): string[] {
return Array.from(
window.document.querySelectorAll(`${id} td:nth-child(1) code`),
node => node.textContent || "",
)
.filter(value => {
if (existingSet.has(value)) {
return false
}
existingSet.add(value)
return true
})
.sort(undefined)
}

function makeVerificationCode(
version: string,
patternVar: string,
values: string[],
maxLen: number,
): string {
if (values.length === 0) {
return ""
}

return `
if (version >= ${version}) {
if (!${patternVar}.es${version}) {
${patternVar}.es${version} = new RegExp(
${makeRegExpPatternCode(values, maxLen)},
"u"
)
}
if (${patternVar}.es${version}.test(value)) {
return true
}
}
`
}

function makeRegExpPatternCode(names: string[], maxLen: number): string {
const lines = ["^(?:"]
for (const name of names) {
const line = lines[lines.length - 1]
const part = `${name}|`

if (line.length + part.length > maxLen) {
lines.push(part)
} else {
lines[lines.length - 1] += part
}
}
lines[lines.length - 1] = `${lines[lines.length - 1].replace(/\|$/u, "")})$`
return lines.map(line => `"${line}"`).join("+")
}

function save(content: string): Promise<void> {
return new Promise((resolve, reject) => {
fs.writeFile(FILE_PATH, content, error =>
error ? reject(error) : resolve(),
)
})
}
5 changes: 4 additions & 1 deletion src/unicode/index.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
export { isIdContinue, isIdStart } from "./ids"
export { PropertyData } from "./property-data"
export {
isValidLoneUnicodeProperty,
isValidUnicodeProperty,
} from "./properties"

export const Null = 0x00
export const Backspace = 0x08
Expand Down

0 comments on commit 2bd358f

Please sign in to comment.