-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 8687412
Showing
8 changed files
with
366 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
package-lock=false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
{ | ||
"name": "slugify-khmer", | ||
"license": "MIT", | ||
"version": "0.1.0", | ||
"repository": "seanghay/slugify-khmer", | ||
"description": "Slugify Khmer text into a latin form.", | ||
"main": "./slugify.js", | ||
"author": { | ||
"name": "Seanghay Yath", | ||
"email": "seanghay.dev@gmail.com", | ||
"url": "https://github.com/seanghay" | ||
}, | ||
"files": ["slugify.js", "slugify.d.ts"], | ||
"dependencies": { | ||
"split-khmer": "^1.0.1" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
### Slugify Khmer | ||
|
||
A simple Khmer text slugify built for speed not correctness and was built on top of [`split-khmer`](https://github.com/seanghay/split-khmer) | ||
|
||
### Install | ||
``` | ||
npm install slugify-khmer | ||
``` | ||
|
||
### Usage | ||
|
||
```javascript | ||
import { slugify } from 'slugify-khmer'; | ||
|
||
slugify('មិនដឹងទេ that\'s nice') | ||
// => mindoeng-te that's nice | ||
|
||
slugify('មិនដឹងទេ that\'s nice', "_") | ||
// => mindoeng_te that's nice | ||
``` | ||
|
||
### License | ||
|
||
MIT |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
export function transform(text: string, delimiter?: string): Generator<string>; | ||
export function slugify(text: string, delimiter?: string): string; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,159 @@ | ||
const { split } = require('split-khmer'); | ||
|
||
const firstSeries = new Set('កខចឆដឋណតថបផឝសហឡអ'); | ||
const vowelsDefault = ['a', 'o']; | ||
const consonants = new Map([ | ||
["ក", "k"], | ||
["ខ", "kh"], | ||
["គ", "k"], | ||
["ឃ", "kh"], | ||
["ង", "ng"], | ||
["ច", "ch"], | ||
["ឆ", "chh"], | ||
["ជ", "ch"], | ||
["ឈ", "chh"], | ||
["ញ", "nh"], | ||
["ដ", "d"], | ||
["ឋ", "th"], | ||
["ឌ", "d"], | ||
["ឍ", "th"], | ||
["ណ", "n"], | ||
["ត", "t"], | ||
["ថ", "th"], | ||
["ទ", "t"], | ||
["ធ", "th"], | ||
["ន", "n"], | ||
["ប", "b"], | ||
["ផ", "ph"], | ||
["ព", "p"], | ||
["ភ", "ph"], | ||
["ម", "m"], | ||
["យ", "y"], | ||
["រ", "r"], | ||
["ល", "l"], | ||
["វ", "v"], | ||
["ឝ", "sh"], | ||
["ឞ", "ss"], | ||
["ស", "s"], | ||
["ហ", "h"], | ||
["ឡ", "l"], | ||
["អ", "a"], | ||
// | ||
['ឥ', 'e'], | ||
['ឦ', 'ei'], | ||
['ឧ', 'u'], | ||
['ឩ', 'u'], | ||
['ឩ', 'au'], | ||
['ឫ', 'rue'], | ||
['ឭ', 'lue'], | ||
['ឭ', 'lue'], | ||
['ឮ', 'lueu'], | ||
['ឯ', 'ae'], | ||
['ឰ', 'ai'], | ||
['ឲ', 'ao'], | ||
['ឱ', 'ao'], | ||
['ឳ', 'au'], | ||
]); | ||
|
||
const vowelEntries = [ | ||
['◌់', ['a', 'o']], | ||
['ា', ['a', 'ea']], | ||
['ា់', ['a', 'ea']], | ||
[' ័◌', ['a', 'oa']], | ||
['ៈ', ['ak', 'eak']], | ||
['័យ', ['ai', 'ey']], | ||
['ិ', ['e', 'i']], | ||
['ី', ['ei', 'i']], | ||
['ឹ', ['oe', 'ue']], | ||
['ឺ', ['eu', 'ueu']], | ||
['ុ', ['o', 'u']], | ||
['ូ', ['ou', 'u']], | ||
['ួ', ['uo', 'uo']], | ||
['ើ', ['aeu', 'eu']], | ||
['ឿ', ['oea', 'oea']], | ||
['ៀ', ['ie', 'ie']], | ||
['េ', ['e', 'e']], | ||
['ែ', ['ae', 'eae']], | ||
['ៃ', ['ai', 'ey']], | ||
['ោ', ['ao', 'ou']], | ||
['ៅ', ['au', 'ov']], | ||
['ុំ', ['om', 'um']], | ||
['ំ', ['am', 'um']], | ||
['ាំ', ['am', 'oam']], | ||
['ាំង', ['ang', 'eang']], | ||
['ះ', ['ah', 'eah']], | ||
['ិះ', ['eh', 'is']], | ||
['ឹះ', ['oeh', 'ueh']], | ||
['ុះ', ['oh', 'uh']], | ||
['េះ', ['eh', 'eh']], | ||
['ើះ', ['aeuh', 'euh']], | ||
['ែះ', ['aeh', 'eaeh']], | ||
['ោះ', ['aoh', 'uoh']], | ||
].sort((a, b) => { | ||
return b[0].length - a[0].length | ||
}).map(([k, v]) => [new RegExp(`^${k}`), v]) | ||
|
||
/** | ||
* Convert Khmer word into a romanization form | ||
* @param {string} input | ||
* @returns {Generator<string>} | ||
*/ | ||
function* transform(input) { | ||
let pc = null; | ||
|
||
const sindex = () => +!(pc != null && firstSeries.has(pc)); | ||
|
||
for (let i = 0; i < input.length; i++) { | ||
let c = input[i]; | ||
|
||
if (/[\s\u200b\u200a]|[^\u1780-\u17dd]/.test(c)) { | ||
vowelsMatch = null; | ||
pc = null; | ||
yield c; | ||
continue; | ||
} | ||
|
||
|
||
|
||
if (consonants.has(c)) { | ||
|
||
if (pc != null) { | ||
if (i - 2 >= 0) { | ||
if (consonants.has(input[i - 2])) { | ||
yield vowelsDefault[sindex()]; | ||
} | ||
} | ||
} | ||
|
||
pc = c; | ||
yield consonants.get(c); | ||
continue | ||
} | ||
|
||
for (const [pattern, values] of vowelEntries) { | ||
const m = pattern.exec(input.slice(i)); | ||
if (!m) continue; | ||
i += m[0].length - 1; | ||
const r = values[sindex()]; | ||
yield r | ||
break | ||
} | ||
|
||
pc = null; | ||
// yield c; | ||
|
||
} | ||
} | ||
|
||
exports.transform = transform; | ||
|
||
/** | ||
* Convert Khmer text into a romaization representation | ||
* @param {string} text | ||
* @returns {string} | ||
*/ | ||
exports.slugify = function (text, delimiter = '-') { | ||
return split(text) | ||
.map(word => Array.from(transform(word)).join("")) | ||
.join(delimiter) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
const { slugify } = require('.') | ||
|
||
console.log(slugify('មិនដឹងទេ that\'s nice')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
import { slugify } from './slugify.js' | ||
|
||
console.log(slugify('មិនដឹងទេ that\'s nice')) |