Skip to content

Commit

Permalink
chore: add script to auto update unicode-alias (#730)
Browse files Browse the repository at this point in the history
  • Loading branch information
ota-meshi committed Apr 12, 2024
1 parent 8c23a79 commit 035f6bf
Show file tree
Hide file tree
Showing 4 changed files with 184 additions and 0 deletions.
24 changes: 24 additions & 0 deletions .github/workflows/cron.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: cron
on:
schedule:
- cron: 0 0 * * 0

jobs:
check-unicode-alias-update:
name: check-unicode-alias-update
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Install Node.js
uses: actions/setup-node@v4
- name: Install Packages
run: npm install
- name: Update
run: npm run update:unicode-alias
- name: Format
run: npm run eslint-fix
- name: Check changes
run: |
git add --all && \
git diff-index --cached HEAD --stat --exit-code
4 changes: 4 additions & 0 deletions lib/utils/unicode-alias.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ export const UNICODE_CATEGORY_ALIAS = new AliasMap({
otherToLong: {},
})

/* PROPERTY_ALIASES_START */
// https://unicode.org/Public/UCD/latest/ucd/PropertyAliases.txt
export const UNICODE_BINARY_PROPERTY_ALIAS = new AliasMap({
shortToLong: {
Expand Down Expand Up @@ -120,7 +121,9 @@ export const UNICODE_BINARY_PROPERTY_ALIAS = new AliasMap({
space: "White_Space",
},
})
/* PROPERTY_ALIASES_END */

/* PROPERTY_VALUE_ALIASES_START */
// https://unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt
export const UNICODE_GENERAL_CATEGORY_ALIAS = new AliasMap({
shortToLong: {
Expand Down Expand Up @@ -337,3 +340,4 @@ export const UNICODE_SCRIPT_ALIAS = new AliasMap({
Qaai: "Inherited",
},
})
/* PROPERTY_VALUE_ALIASES_END */
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
"test:update": "npm run test:base -- --update",
"update": "ts-node --transpile-only ./tools/update.ts && npm run eslint-fix && npm run update:eslint-docs",
"update:eslint-docs": "npm run build && eslint-doc-generator",
"update:unicode-alias": "ts-node ./tools/update-unicode-alias.ts",
"new": "ts-node ./tools/new-rule.ts",
"docs:watch": "vitepress dev docs",
"docs:build": "vitepress build docs",
Expand Down
155 changes: 155 additions & 0 deletions tools/update-unicode-alias.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
import fs from "fs"
import path from "path"

type Alias = {
short: string
long: string
other: string[]
}
type UnicodePropertyValueAlias = {
propertyAlias: string
} & Alias
type UnicodePropertyAlias = {
category: string
} & Alias

const filePath = path.resolve(__dirname, "../lib/utils/unicode-alias.ts")
const logger = console

void main()

async function main() {
const propertyAliases: UnicodePropertyAlias[] = []
for await (const item of getUnicodePropertyAliases()) {
propertyAliases.push(item)
}
const propertyValueAliases: UnicodePropertyValueAlias[] = []
for await (const item of getUnicodePropertyValueAliases()) {
propertyValueAliases.push(item)
}
const content = fs
.readFileSync(filePath, "utf-8")
.replace(
/\/\*\s*PROPERTY_ALIASES_START\s*\*\/[\s\S]*\/\*\s*PROPERTY_ALIASES_END\s*\*\//u,
`/* PROPERTY_ALIASES_START */
// https://unicode.org/Public/UCD/latest/ucd/PropertyAliases.txt
${generateAliasMap(
"UNICODE_BINARY_PROPERTY_ALIAS",
propertyAliases.filter((u) => u.category === "Binary Properties"),
)}
/* PROPERTY_ALIASES_END */`,
)
.replace(
/\/\*\s*PROPERTY_VALUE_ALIASES_START\s*\*\/[\s\S]*\/\*\s*PROPERTY_VALUE_ALIASES_END\s*\*\//u,
`/* PROPERTY_VALUE_ALIASES_START */
// https://unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt
${generateAliasMap(
"UNICODE_GENERAL_CATEGORY_ALIAS",
propertyValueAliases.filter((u) => u.propertyAlias === "gc"),
)}
// https://unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt
${generateAliasMap(
"UNICODE_SCRIPT_ALIAS",
propertyValueAliases.filter((u) => u.propertyAlias === "sc"),
)}
/* PROPERTY_VALUE_ALIASES_END */`,
)

// Update file.
fs.writeFileSync(filePath, content)
}

function generateAliasMap(name: string, aliases: Alias[]): string {
let content = `export const ${name} = new AliasMap({
shortToLong: {
`
const shortToLong = new Map<string, string>()
const otherToLong = new Map<string, string>()
for (const item of aliases) {
shortToLong.set(item.short, item.long)
for (const o of item.other) {
otherToLong.set(o, item.long)
}
}
content += mapToProperties(shortToLong)
content += `
},
otherToLong: {
`
content += mapToProperties(otherToLong)
content += `
},
})`

return content

function mapToProperties(map: Map<string, string>) {
return (
[...map]
.filter(([s, l]) => s !== l)
// .sort(([a], [b]) => (a > b ? 1 : -1))
.map(([s, l]) => ` ${s}: "${l}",`)
.join("\n")
)
}
}

async function* getUnicodePropertyAliases(): AsyncIterable<UnicodePropertyAlias> {
const DB_URL =
"https://unicode.org/Public/UCD/latest/ucd/PropertyAliases.txt"
logger.log("Fetching data... (%s)", DB_URL)

const dbContent = await fetch(DB_URL).then((res) => res.text())
const dbLines = dbContent.split("\n")
let category = ""
for (let index = 0; index < dbLines.length; index++) {
const line = dbLines[index]
if (!line) {
continue
}
if (line.startsWith("#")) {
if (
/^#\s*=+$/u.test(dbLines[index - 1]) &&
/^#\s*=+$/u.test(dbLines[index + 1])
) {
category = line.slice(1).trim()
}
continue
}
const [short, long, ...other] = line
.split("#")[0] // strip comments
.split(";") // split by semicolon
.map((x) => x.trim()) // trim

yield {
category,
short,
long,
other,
}
}
}

async function* getUnicodePropertyValueAliases(): AsyncIterable<UnicodePropertyValueAlias> {
const DB_URL =
"https://unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt"
logger.log("Fetching data... (%s)", DB_URL)
const dbContent = await fetch(DB_URL).then((res) => res.text())
for (const line of dbContent.split("\n")) {
if (!line || line.startsWith("#")) {
continue
}
const [propertyAlias, short, long, ...other] = line
.split("#")[0] // strip comments
.split(";") // split by semicolon
.map((x) => x.trim()) // trim

yield {
propertyAlias,
short,
long,
other,
}
}
}

0 comments on commit 035f6bf

Please sign in to comment.