-
-
Notifications
You must be signed in to change notification settings - Fork 190
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* wip: diff between hunspell_words and en_US.txt * ci: make it easier to add words to en_US Adjust the publishing process to catch state dictionaries. * Staring will en_US and will add others later.
- Loading branch information
Showing
10 changed files
with
55 additions
and
133,111 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
992510ad9a274f96e60f92412400acb574b2b81e ?en_US.trie.gz | ||
3f23e3eac82926ce3c46244aa34486193e6d7daa ?./src/additional_words.txt | ||
e7354aa22bb5593d71deb2852467dbb1fff15556 ?./src/en_US.txt | ||
93502f493fd250c11dc87625ecfa723f08dc77cb ?./src/hunspell/en_US.dic |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
#!/usr/bin/env node | ||
|
||
'use strict'; | ||
|
||
/** | ||
* This script was used to pair down the number of duplicate words in | ||
* en_US.txt and hunspell/en_US.dic. | ||
* | ||
* I might use it to generate an base english dictionary from both en_US and en_GB. | ||
* But it will need a bit of cleaning up first. | ||
*/ | ||
|
||
const fs = require('fs'); | ||
const path = require('path'); | ||
|
||
function readWords(filename) { | ||
const contents = fs.readFileSync(filename, 'utf8') | ||
return new Set(contents.split('\n').map(s => s.trim()).filter(s => !!s)); | ||
} | ||
|
||
const addedWordsFile = path.join('src', 'en_US.txt'); | ||
const hunspellWordsFile = path.join('src', 'hunspell_words.txt'); | ||
|
||
const added = readWords(addedWordsFile); | ||
const hunspell = readWords(hunspellWordsFile); | ||
|
||
const diff = [...added].filter(w => !hunspell.has(w)); | ||
|
||
fs.writeFileSync(path.join('src', 'diff.txt'), diff.join('\n') + '\n', 'utf8') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
./src/additional_words.txt | ||
./src/en_US.txt | ||
./src/hunspell/en_US.dic |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
# Add words below |
Oops, something went wrong.