Skip to content
This repository has been archived by the owner on Mar 13, 2023. It is now read-only.

Commit

Permalink
feat(parser): add support for scripts other than latin
Browse files Browse the repository at this point in the history
ISSUES CLOSED: #105
  • Loading branch information
moranje committed Mar 16, 2019
1 parent 7d79fd3 commit b12ce3d
Showing 1 changed file with 13 additions and 5 deletions.
18 changes: 13 additions & 5 deletions src/todoist/lexer.ts
@@ -1,4 +1,12 @@
import moo from 'moo';
import moo from 'moo'
import escape from 'escape-string-regexp'

// Includes numbers, lowercase letters an titlecase letters
// Generaterd with: https://apps.timwhitlock.info/js/regex#
const ALL_SCRIPTS =
'[0-9A-Za-zªµºÀ-ÖØ-öø-ƺƼ-ƿDŽ-ʓʕ-ʯͰ-ͳͶ-ͷͻ-ͽΆΈ-ΊΌΎ-ΡΣ-ϵϷ-ҁҊ-ԣԱ-Ֆա-և٠-٩۰-۹߀-߉०-९০-৯੦-੯૦-૯୦-୯௦-௯౦-౯೦-೯൦-൯๐-๙໐-໙༠-༩၀-၉႐-႙Ⴀ-Ⴥ០-៩᠐-᠙᥆-᥏᧐-᧙᭐-᭙᮰-᮹᱀-᱉᱐-᱙ᴀ-ᴫᵢ-ᵷᵹ-ᶚḀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼιῂ-ῄῆ-ῌῐ-ΐῖ-Ίῠ-Ῥῲ-ῴῶ-ῼⁱⁿℂℇℊ-ℓℕℙ-ℝℤΩℨK-ℭℯ-ℴℹℼ-ℿⅅ-ⅉⅎↃ-ↄⰀ-Ⱞⰰ-ⱞⱠ-Ɐⱱ-ⱼⲀ-ⳤⴀ-ⴥ꘠-꘩Ꙁ-ꙟꙢ-ꙭꚀ-ꚗꜢ-ꝯꝱ-ꞇꞋ-ꞌ꣐-꣙꤀-꤉꩐-꩙ff-stﬓ-ﬗ0-9A-Za-z]|\ud801[\udc00-\udc4f\udca0-\udca9]|\ud835[\udc00-\udc54\udc56-\udc9c\udc9e-\udc9f\udca2\udca5-\udca6\udca9-\udcac\udcae-\udcb9\udcbb\udcbd-\udcc3\udcc5-\udd05\udd07-\udd0a\udd0d-\udd14\udd16-\udd1c\udd1e-\udd39\udd3b-\udd3e\udd40-\udd44\udd46\udd4a-\udd50\udd52-\udea5\udea8-\udec0\udec2-\udeda\udedc-\udefa\udefc-\udf14\udf16-\udf34\udf36-\udf4e\udf50-\udf6e\udf70-\udf88\udf8a-\udfa8\udfaa-\udfc2\udfc4-\udfcb\udfce-\udfff]'
const SEPARATORS = new RegExp(`(?:${ALL_SCRIPTS}|_|-)+`)
const WHITESPACE = new RegExp(`(?:${ALL_SCRIPTS}|_| |-)+`)

/** @hidden */
const lexer = moo.states({
Expand All @@ -16,24 +24,24 @@ const lexer = moo.states({

project: {
open: { match: /\[/, next: 'projectWithSpaces' },
name: { match: /[a-zA-Z0-9_-]+/, pop: 1 }
name: { match: SEPARATORS, pop: 1 }
},

projectWithSpaces: {
close: { match: /\]/, pop: 1 },
name: /[a-zA-Z0-9_ -]+/
name: WHITESPACE
},

label: {
name: { match: /[a-zA-Z0-9_-]+/, pop: 1 }
name: { match: SEPARATORS, pop: 1 }
},

priority: {
number: { match: /[1-4]/, pop: 1 }
},

person: {
name: { match: /[a-zA-Z0-9_-]+/, pop: 1 }
name: { match: SEPARATORS, pop: 1 }
},

date: {
Expand Down

0 comments on commit b12ce3d

Please sign in to comment.