Skip to content

Commit

Permalink
chore: initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
seanghay committed Nov 5, 2023
0 parents commit 8687412
Show file tree
Hide file tree
Showing 8 changed files with 366 additions and 0 deletions.
157 changes: 157 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
lerna-debug.log*
.pnpm-debug.log*

# Diagnostic reports (https://nodejs.org/api/report.html)
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json

# Runtime data
pids
*.pid
*.seed
*.pid.lock

# Directory for instrumented libs generated by jscoverage/JSCover
lib-cov

# Coverage directory used by tools like istanbul
coverage
*.lcov

# nyc test coverage
.nyc_output

# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
.grunt

# Bower dependency directory (https://bower.io/)
bower_components

# node-waf configuration
.lock-wscript

# Compiled binary addons (https://nodejs.org/api/addons.html)
build/Release

# Dependency directories
node_modules/
jspm_packages/

# Snowpack dependency directory (https://snowpack.dev/)
web_modules/

# TypeScript cache
*.tsbuildinfo

# Optional npm cache directory
.npm

# Optional eslint cache
.eslintcache

# Optional stylelint cache
.stylelintcache

# Microbundle cache
.rpt2_cache/
.rts2_cache_cjs/
.rts2_cache_es/
.rts2_cache_umd/

# Optional REPL history
.node_repl_history

# Output of 'npm pack'
*.tgz

# Yarn Integrity file
.yarn-integrity

# dotenv environment variable files
.env
.env.development.local
.env.test.local
.env.production.local
.env.local

# parcel-bundler cache (https://parceljs.org/)
.cache
.parcel-cache

# Next.js build output
.next
out

# Nuxt.js build / generate output
.nuxt
dist

# Gatsby files
.cache/
# Comment in the public line in if your project uses Gatsby and not Next.js
# https://nextjs.org/blog/next-9-1#public-directory-support
# public

# vuepress build output
.vuepress/dist

# vuepress v2.x temp and cache directory
.temp
.cache

# Docusaurus cache and generated files
.docusaurus

# Serverless directories
.serverless/

# FuseBox cache
.fusebox/

# DynamoDB Local files
.dynamodb/

# TernJS port file
.tern-port

# Stores VSCode versions used for testing VSCode extensions
.vscode-test

# yarn v2
.yarn/cache
.yarn/unplugged
.yarn/build-state.yml
.yarn/install-state.gz
.pnp.*

# General
.DS_Store
.AppleDouble
.LSOverride

# Icon must end with two \r
Icon

# Thumbnails
._*

# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent

# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk
Expand Down
1 change: 1 addition & 0 deletions .npmrc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
package-lock=false
17 changes: 17 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"name": "slugify-khmer",
"license": "MIT",
"version": "0.1.0",
"repository": "seanghay/slugify-khmer",
"description": "Slugify Khmer text into a latin form.",
"main": "./slugify.js",
"author": {
"name": "Seanghay Yath",
"email": "seanghay.dev@gmail.com",
"url": "https://github.com/seanghay"
},
"files": ["slugify.js", "slugify.d.ts"],
"dependencies": {
"split-khmer": "^1.0.1"
}
}
24 changes: 24 additions & 0 deletions readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
### Slugify Khmer

A simple Khmer text slugify built for speed not correctness and was built on top of [`split-khmer`](https://github.com/seanghay/split-khmer)

### Install
```
npm install slugify-khmer
```

### Usage

```javascript
import { slugify } from 'slugify-khmer';

slugify('មិនដឹងទេ that\'s nice')
// => mindoeng-te that's nice

slugify('មិនដឹងទេ that\'s nice', "_")
// => mindoeng_te that's nice
```

### License

MIT
2 changes: 2 additions & 0 deletions slugify.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
export function transform(text: string, delimiter?: string): Generator<string>;
export function slugify(text: string, delimiter?: string): string;
159 changes: 159 additions & 0 deletions slugify.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
const { split } = require('split-khmer');

const firstSeries = new Set('កខចឆដឋណតថបផឝសហឡអ');
const vowelsDefault = ['a', 'o'];
const consonants = new Map([
["ក", "k"],
["ខ", "kh"],
["គ", "k"],
["ឃ", "kh"],
["ង", "ng"],
["ច", "ch"],
["ឆ", "chh"],
["ជ", "ch"],
["ឈ", "chh"],
["ញ", "nh"],
["ដ", "d"],
["ឋ", "th"],
["ឌ", "d"],
["ឍ", "th"],
["ណ", "n"],
["ត", "t"],
["ថ", "th"],
["ទ", "t"],
["ធ", "th"],
["ន", "n"],
["ប", "b"],
["ផ", "ph"],
["ព", "p"],
["ភ", "ph"],
["ម", "m"],
["យ", "y"],
["រ", "r"],
["ល", "l"],
["វ", "v"],
["ឝ", "sh"],
["ឞ", "ss"],
["ស", "s"],
["ហ", "h"],
["ឡ", "l"],
["អ", "a"],
//
['ឥ', 'e'],
['ឦ', 'ei'],
['ឧ', 'u'],
['ឩ', 'u'],
['ឩ', 'au'],
['ឫ', 'rue'],
['ឭ', 'lue'],
['ឭ', 'lue'],
['ឮ', 'lueu'],
['ឯ', 'ae'],
['ឰ', 'ai'],
['ឲ', 'ao'],
['ឱ', 'ao'],
['ឳ', 'au'],
]);

const vowelEntries = [
['◌់', ['a', 'o']],
['ា', ['a', 'ea']],
['ា់', ['a', 'ea']],
[' ័◌', ['a', 'oa']],
['ៈ', ['ak', 'eak']],
['័យ', ['ai', 'ey']],
['ិ', ['e', 'i']],
['ី', ['ei', 'i']],
['ឹ', ['oe', 'ue']],
['ឺ', ['eu', 'ueu']],
['ុ', ['o', 'u']],
['ូ', ['ou', 'u']],
['ួ', ['uo', 'uo']],
['ើ', ['aeu', 'eu']],
['ឿ', ['oea', 'oea']],
['ៀ', ['ie', 'ie']],
['េ', ['e', 'e']],
['ែ', ['ae', 'eae']],
['ៃ', ['ai', 'ey']],
['ោ', ['ao', 'ou']],
['ៅ', ['au', 'ov']],
['ុំ', ['om', 'um']],
['ំ', ['am', 'um']],
['ាំ', ['am', 'oam']],
['ាំង', ['ang', 'eang']],
['ះ', ['ah', 'eah']],
['ិះ', ['eh', 'is']],
['ឹះ', ['oeh', 'ueh']],
['ុះ', ['oh', 'uh']],
['េះ', ['eh', 'eh']],
['ើះ', ['aeuh', 'euh']],
['ែះ', ['aeh', 'eaeh']],
['ោះ', ['aoh', 'uoh']],
].sort((a, b) => {
return b[0].length - a[0].length
}).map(([k, v]) => [new RegExp(`^${k}`), v])

/**
* Convert Khmer word into a romanization form
* @param {string} input
* @returns {Generator<string>}
*/
function* transform(input) {
let pc = null;

const sindex = () => +!(pc != null && firstSeries.has(pc));

for (let i = 0; i < input.length; i++) {
let c = input[i];

if (/[\s\u200b\u200a]|[^\u1780-\u17dd]/.test(c)) {
vowelsMatch = null;
pc = null;
yield c;
continue;
}



if (consonants.has(c)) {

if (pc != null) {
if (i - 2 >= 0) {
if (consonants.has(input[i - 2])) {
yield vowelsDefault[sindex()];
}
}
}

pc = c;
yield consonants.get(c);
continue
}

for (const [pattern, values] of vowelEntries) {
const m = pattern.exec(input.slice(i));
if (!m) continue;
i += m[0].length - 1;
const r = values[sindex()];
yield r
break
}

pc = null;
// yield c;

}
}

exports.transform = transform;

/**
* Convert Khmer text into a romaization representation
* @param {string} text
* @returns {string}
*/
exports.slugify = function (text, delimiter = '-') {
return split(text)
.map(word => Array.from(transform(word)).join(""))
.join(delimiter)
}
3 changes: 3 additions & 0 deletions slugify.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
const { slugify } = require('.')

console.log(slugify('មិនដឹងទេ that\'s nice'))
3 changes: 3 additions & 0 deletions slugify.test.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import { slugify } from './slugify.js'

console.log(slugify('មិនដឹងទេ that\'s nice'))

0 comments on commit 8687412

Please sign in to comment.