Skip to content

Commit

Permalink
Merge c29913c into 331057e
Browse files Browse the repository at this point in the history
  • Loading branch information
Jason3S committed Dec 15, 2019
2 parents 331057e + c29913c commit 3666420
Show file tree
Hide file tree
Showing 19 changed files with 1,195 additions and 262 deletions.
3 changes: 3 additions & 0 deletions cspell.json
Expand Up @@ -29,6 +29,8 @@
"words": [
"DAWG",
"deserialize",
"deserializer",
"deserializers",
"disp",
"gitignore",
"gzipped",
Expand All @@ -37,6 +39,7 @@
"micromatch",
"monorepo",
"restructuredtext",
"serializers",
"streetsidesoftware",
"xxhash"
],
Expand Down
6 changes: 4 additions & 2 deletions packages/cspell-tools/src/app.ts
Expand Up @@ -34,6 +34,7 @@ interface CompileOptions extends CompileCommonOptions {
}

interface CompileTrieOptions extends CompileCommonOptions {
trie3: boolean;
}

export function run(
Expand Down Expand Up @@ -69,9 +70,10 @@ export function run(
.option('-m, --max_depth <limit>', 'Maximum depth to apply suffix rules.')
.option('-M, --merge <target>', 'Merge all files into a single target file (extensions are applied)')
.option('-n, --no-compress', 'By default the files are Gzipped, this will turn that off.')
.option('--trie3', '[Beta] Use file format trie3')
.action((src: string[], options: CompileTrieOptions) => {
const result = processAction(src, '.trie', options, async (words: Sequence<string>, dst) => {
return compileTrie(words, dst);
return compileTrie(words, dst, options);
});
resolve(result);
});
Expand Down Expand Up @@ -130,7 +132,7 @@ async function processAction(
}

function toFilename(name: string, ext: string) {
return path.basename(name).replace(/((\.txt|\.dic|\.aff)(\.gz)?)?$/, '') + ext;
return path.basename(name).replace(/((\.txt|\.dic|\.aff|\.trie)(\.gz)?)?$/, '') + ext;
}

function toTargetFile(filename: string, destination: string | undefined, ext: string) {
Expand Down
4 changes: 2 additions & 2 deletions packages/cspell-tools/src/compiler/wordListCompiler.test.ts
Expand Up @@ -79,7 +79,7 @@ describe('Validate the wordListCompiler', () => {
test('test reading and normalizing to a trie file', async () => {
const sourceName = await streamWordsFromFile(path.join(__dirname, '..', '..', 'Samples', 'cities.txt'), {});
const destName = path.join(__dirname, '..', '..', 'temp', 'cities.trie');
await compileTrie(sourceName, destName);
await compileTrie(sourceName, destName, {});
const srcWords = (await fsp.readFile(destName, 'utf8')).split('\n');
const node = Trie.importTrie(srcWords);
const expected = citiesResult.split('\n').filter(a => !!a).sort();
Expand All @@ -90,7 +90,7 @@ describe('Validate the wordListCompiler', () => {
test('test reading and normalizing to a trie gz file', async () => {
const sourceName = await streamWordsFromFile(path.join(__dirname, '..', '..', 'Samples', 'cities.txt'), {});
const destName = path.join(__dirname, '..', '..', 'temp', 'cities.trie.gz');
await compileTrie(sourceName, destName);
await compileTrie(sourceName, destName, {});
const resultFile = await readFile(destName, UTF8);
const srcWords = resultFile.split('\n');
const node = Trie.importTrie(srcWords);
Expand Down
11 changes: 9 additions & 2 deletions packages/cspell-tools/src/compiler/wordListCompiler.ts
Expand Up @@ -81,11 +81,18 @@ export function normalizeWordsToTrie(words: Sequence<string>): Trie.TrieNode {
return result;
}

export async function compileTrie(words: Sequence<string>, destFilename: string): Promise<void> {
export interface CompileTrieOptions {
base?: number;
trie3?: boolean;
}

export async function compileTrie(words: Sequence<string>, destFilename: string, options: CompileTrieOptions): Promise<void> {
const base = options.base ?? 32;
const version = options.trie3 ? 3 : 1;
const destDir = path.dirname(destFilename);
const pDir = mkdirp(destDir);
const pRoot = normalizeWordsToTrie(words);
const [root] = await Promise.all([pRoot, pDir]);

return writeSeqToFile(Trie.serializeTrie(root, { base: 32, comment: 'Built by cspell-tools.' }), destFilename);
return writeSeqToFile(Trie.serializeTrie(root, { base, comment: 'Built by cspell-tools.', version }), destFilename);
}
53 changes: 53 additions & 0 deletions packages/cspell-trie-lib/Samples/sample.txt
@@ -0,0 +1,53 @@
journal
journalism
journalist
journalistic
journals
journey
journeyer
journeyman
journeymen
joust
jouster
jousting
jovial
joviality
jowl
jowly
joy
joyful
joyfuller
joyfullest
joyfulness
joyless
joylessness
joyous
joyousness
joyridden
joyride
joyrider
joyriding
joyrode
joystick
lift
lifted
lifter
lifting
lifts
talk
talked
talker
talking
talks
walk
walked
walker
walking
walks
Big Apple
New York
apple
big apple
fun journey
long walk
fun walk
100 changes: 100 additions & 0 deletions packages/cspell-trie-lib/Samples/sampleV1.trie
@@ -0,0 +1,100 @@
#!/usr/bin/env cspell-trie reader
TrieXv1
base=10
# Data:
*
c
r
n
g
y
*y
t
s
*r
e
k
d,r
*i1
a3,e3
n4
t5
l6
r,s7
s8
e3
d10
c11
l11
l10
r11
e5
m,t13
*e2,m14
*e2,i15
*i16
e18
e19
d20,e9,i15
i22
*e12,i15,s
a23
p24
o25
n26
s27
y28
t29
l30
*l31,n32
*n32
d33
t34
t35
w36
k35
p37
Y38
r39
*i40,s
a43
l44
s45
i46,o21
f48
49
l50
A51
52
a51
u53
l54
i55
u56
s57
u57
g60
a61
62
w63
64
o65
a66,e41
e69
n71
g73
e74
g75
j76,w36
n77
*f68,l78,o70,r58,s47
i59,o79
i80
i82
83
r84,s42
n89
u90,v67,w17,y85
u91
o92
B87,N81,a51,b88,f93,j94,l86,t72,w72
121 changes: 121 additions & 0 deletions packages/cspell-trie-lib/Samples/sampleV2.trie
@@ -0,0 +1,121 @@
#!/usr/bin/env cspell-trie reader
TrieXv2
base=10
# Sample Words
# Data:
__DATA__
m*
c*
s*
r*
n*
g*
y*
t*
e*
k*
d*
i1
e3
a4
e4
n5
t6
l*6
s7
s2
e*3
d8
c9
e3,10
l9
l8
r9
e6
t*11
m13,14
i15
i16
w17
e3,18
e19
d14
o21
i22
a24
p25
o26
n27
s0,28
y*12,29
t*12,30
l*31
l33
n34
d20,30,35
t37
t*2,23,30
w38
k*2,23,30
p39
Y40
r41
i42
e43
s44
a45
l*46,47
s*47
i48
s49
f50
51
l52
A53
54
a53
u55
l*2,56
i59
u60
s61
u61
r36,62
i64
g65
a66
67
w68
69
o70
a71
v72
f73
e74
o75
n78
t79
w79
g80
e81
g82
j83
n57,84
l87
o89
i92
N93
i94
51,95
r96
y*63,76,86,88,97
l77,98
B99
b101
n102
u58,103
u108
o32,85,104,109
f110
j111
^69,90,91,100,105,106,107,112,113
15 changes: 15 additions & 0 deletions packages/cspell-trie-lib/Samples/sampleV3.trie
@@ -0,0 +1,15 @@
#!/usr/bin/env cspell-trie reader
TrieXv3
base=10
# Sample Words
# Data:
__DATA__
Big Apple$<<<<<<<<<
New York$<<<<<<<<
apple$<<<<<
big apple$<<<<<<<<<
fun journey$<<<<<<<walk$<<<<<<<<
journal$ism$<t$ic$<<<<<s$<<<ey$er$<<man$<<en$<<<<<<<st$er$<<ing$<<<<<<vial$ity$<<<<<<<wl$y$<<<y$ful$ler$<st$<<<<ness$<<<<<<<less$ness$<<<<<<<<ous$ness$<<<<<<<ridden$<<<e$r$<<ing$<<<<<ode$<<<<stick$<<<<<<<<
lift$ed$<r$<<ing$<<<s$<<<<ong walk$<<<<<<<<<
talk$ed$<r$<<ing$<<<s$<<<<<
walk$ed$<r$<<ing$<<<s$<<<<<

0 comments on commit 3666420

Please sign in to comment.