Skip to content


[cspell-tools] add ability to merge files into a single target. (#140)
Browse files Browse the repository at this point in the history
* [cspell-tools] removed unused functions.

* [cspell-tools] Refactor the app so we can merge files.

* Refactor for merge

* Add ability to merge files
  • Loading branch information
Jason3S committed Dec 8, 2019
1 parent 26479aa commit f6d8ada
Show file tree
Hide file tree
Showing 5 changed files with 203 additions and 92 deletions.
30 changes: 30 additions & 0 deletions packages/cspell-tools/src/__snapshots__/app.test.ts.snap
@@ -0,0 +1,30 @@
// Jest Snapshot v1,

exports[`Validate the application test app compile merge 1`] = `
los angeles
mexico city
new amsterdam
new delhi
new york
san francisco
44 changes: 44 additions & 0 deletions packages/cspell-tools/src/app.test.ts
@@ -1,6 +1,7 @@
import * as app from './app';
import * as Commander from 'commander';
import * as path from 'path';
import * as fs from 'fs-extra';

const projectRoot = path.join(__dirname, '..');
const pathSamples = path.join(projectRoot, 'Samples');
Expand All @@ -16,6 +17,24 @@ function getCommander() {

describe('Validate the application', () => {
test('test app compile-trie', async () => {
const commander = getCommander();
const log = jest.spyOn(console, 'log').mockImplementation();
const args = argv('compile-trie', '-n', path.join(pathSamples, 'cities.txt'));
await expect(, args)).resolves.toBeUndefined();

test('test app compile-trie compress', async () => {
const commander = getCommander();
const log = jest.spyOn(console, 'log').mockImplementation();
const args = argv('compile-trie', path.join(pathSamples, 'cities.txt'));
await expect(, args)).resolves.toBeUndefined();

test('test app compile-trie -o', async () => {
const commander = getCommander();
const log = jest.spyOn(console, 'log').mockImplementation();
const args = argv('compile-trie', '-n', path.join(pathSamples, 'cities.txt'), '-o', pathTemp);
Expand All @@ -33,6 +52,31 @@ describe('Validate the application', () => {

test('test app compile with compression', async () => {
const commander = getCommander();
const log = jest.spyOn(console, 'log').mockImplementation();
const args = argv('compile', path.join(pathSamples, 'cities.txt'), '-o', pathTemp);
await expect(, args)).resolves.toBeUndefined();

test('test app compile merge', async () => {
const commander = getCommander();
const targetDir = pathTemp;
const target = 'merge.txt';
const cities = path.join(pathSamples, 'cities.txt');
const exampleHunspell = path.join(pathSamples, 'hunspell', 'example.dic');
const log = jest.spyOn(console, 'log').mockImplementation();
const args = argv('compile', '-n', '-M', target, cities, exampleHunspell, '-o', targetDir);
await expect(, args)).resolves.toBeUndefined();
const words = await fs.readFile(path.join(targetDir, target), 'UTF-8');


test('test app no args', () => {
const commander = getCommander();
const mock = jest.fn();
Expand Down
122 changes: 98 additions & 24 deletions packages/cspell-tools/src/app.ts
Expand Up @@ -8,7 +8,8 @@ import { compileWordList, compileTrie } from './compiler';
import * as path from 'path';
import * as program from 'commander';
import * as glob from 'glob';
import { genSequence } from 'gensequence';
import { genSequence, Sequence } from 'gensequence';
import { streamWordsFromFile } from './compiler/iterateWordsFromFile';
const npmPackage = require(path.join(__dirname, '..', 'package.json'));

function globP(pattern: string): Promise<string[]> {
Expand All @@ -19,6 +20,22 @@ function globP(pattern: string): Promise<string[]> {

interface CompileCommonOptions {
output?: string;
compress: boolean;
case: boolean;
max_depth?: string;
merge: string;

interface CompileOptions extends CompileCommonOptions {
split: boolean;
sort: boolean;

interface CompileTrieOptions extends CompileCommonOptions {

export function run(
program: program.Command,
argv: string[]
Expand All @@ -35,16 +52,12 @@ export function run(
.option('-o, --output <path>', 'Specify the output directory, otherwise files are written back to the same location.')
.option('-n, --no-compress', 'By default the files are Gzipped, this will turn that off.')
.option('-m, --max_depth <limit>', 'Maximum depth to apply suffix rules.')
.option('-M, --merge <target>', 'Merge all files into a single target file (extensions are applied)')
.option('-s, --no-split', 'Treat each line as a dictionary entry, do not split')
.option('--no-sort', 'Do not sort the result')
.action((src: string[], options: { output?: string, compress: boolean, split: boolean, sort: boolean, case: boolean, max_depth?: string }) => {
const { max_depth } = options;
const maxDepth = max_depth !== undefined ? Number.parseInt(max_depth) : undefined;
.action((src: string[], options: CompileOptions) => {
const result = processAction(src, '.txt', options, async (src, dst) => {
console.log('Process "%s" to "%s"', src, dst);
await compileWordList(src, dst, { splitWords: options.split, sort: options.sort, maxDepth }).then(() => src);
console.log('Done "%s" to "%s"', src, dst);
return src;
return compileWordList(src, dst, { splitWords: options.split, sort: options.sort }).then(() => src);
Expand All @@ -54,13 +67,11 @@ export function run(
.description('Compile words lists or Hunspell dictionary into trie files used by cspell.')
.option('-o, --output <path>', 'Specify the output directory, otherwise files are written back to the same location.')
.option('-m, --max_depth <limit>', 'Maximum depth to apply suffix rules.')
.option('-M, --merge <target>', 'Merge all files into a single target file (extensions are applied)')
.option('-n, --no-compress', 'By default the files are Gzipped, this will turn that off.')
.action((src: string[], options: { output?: string, compress: boolean, max_depth?: string }) => {
const { max_depth } = options;
const maxDepth = max_depth !== undefined ? Number.parseInt(max_depth) : undefined;
const result = processAction(src, '.trie', options, async (src, dst) => {
console.log('Process "%s" to "%s"', src, dst);
return compileTrie(src, dst, { maxDepth }).then(() => src);
.action((src: string[], options: CompileTrieOptions) => {
const result = processAction(src, '.trie', options, async (words: Sequence<string>, dst) => {
return compileTrie(words, dst);
Expand All @@ -78,34 +89,97 @@ export function run(

interface FileToProcess {
src: string;
words: Sequence<string>;

async function processAction(
src: string[],
fileExt: '.txt' | '.trie',
options: { output?: string, compress: boolean },
action: (src: string, dst: string) => Promise<any>)
options: CompileCommonOptions,
action: (words: Sequence<string>, dst: string) => Promise<any>)
: Promise<void> {
console.log('Compile:\n output: %s\n compress: %s\n files:\n %s \n\n',
options.output || 'default',
options.compress ? 'true' : 'false',
src.join('\n '));

const ext = fileExt + (options.compress ? '.gz' : '');
const { max_depth } = options;
const maxDepth = max_depth !== undefined ? Number.parseInt(max_depth) : undefined;
const readerOptions = { maxDepth };

const globResults = await Promise.all( => globP(s)));
const toProcess = genSequence(globResults)
const filesToProcess = genSequence(globResults)
.concatMap(files => files)
.map(s => {
const outFilename = path.basename(s).replace(/(\.txt|\.dic|\.aff)?$/, ext);
const dir = options.output ? options.output : path.dirname(s);
return [s, path.join(dir, outFilename)] as [string, string];
.map(([src, dst]) => action(src, dst));
.map(async s => {
const words = await streamWordsFromFile(s, readerOptions);
const f: FileToProcess = {
src: s,
return f;

const r = options.merge
? processFiles(action, filesToProcess, toMergeTargetFile(options.merge, options.output, ext))
: processFilesIndividually(action, filesToProcess, s => toTargetFile(s, options.output, ext));
await r;

function toFilename(name: string, ext: string) {
return path.basename(name).replace(/((\.txt|\.dic|\.aff)(\.gz)?)?$/, '') + ext;

function toTargetFile(filename: string, destination: string | undefined, ext: string) {
const outFileName = toFilename(filename, ext);
const dir = destination ?? path.dirname(filename);
return path.join(dir, outFileName);

function toMergeTargetFile(filename: string, destination: string | undefined, ext: string) {
const outFileName = toFilename(filename, ext);
return path.resolve(destination ?? './', outFileName);

async function processFilesIndividually(
action: (words: Sequence<string>, dst: string) => Promise<any>,
filesToProcess: Sequence<Promise<FileToProcess>>,
srcToTarget: (src: string) => string,
) {
const toProcess = filesToProcess
.map(async pFtp => {
const { src, words } = await pFtp;
const dst = srcToTarget(src);
console.log('Process "%s" to "%s"', src, dst);
await action(words, dst);
console.log('Done "%s" to "%s"', src, dst);

for (const p of toProcess) {
await p;

async function processFiles(
action: (words: Sequence<string>, dst: string) => Promise<any>,
filesToProcess: Sequence<Promise<FileToProcess>>,
mergeTarget: string,
) {
const toProcess = await Promise.all([...filesToProcess]);
const dst = mergeTarget;

const words = genSequence(toProcess)
.map(ftp => {
const { src } = ftp;
console.log('Process "%s" to "%s"', src, dst);
return ftp;
.concatMap( ftp => ftp.words );
await action(words, dst);
console.log('Done "%s"', dst);

if (require.main === module) {
Expand Down
66 changes: 28 additions & 38 deletions packages/cspell-tools/src/compiler/wordListCompiler.test.ts
Expand Up @@ -8,6 +8,7 @@ import * as Trie from 'cspell-trie-lib';
import * as path from 'path';
import { genSequence } from 'gensequence';
import { readFile } from 'cspell-io';
import { streamWordsFromFile } from './iterateWordsFromFile';

const UTF8: BufferEncoding = 'utf8';

Expand Down Expand Up @@ -51,24 +52,20 @@ describe('Validate the wordListCompiler', () => {

test('test reading and normalizing a file', () => {
const sourceName = path.join(__dirname, '..', '..', 'Samples', 'cities.txt');
test('test reading and normalizing a file', async () => {
const sourceName = await streamWordsFromFile(path.join(__dirname, '..', '..', 'Samples', 'cities.txt'), {});
const destName = path.join(__dirname, '..', '..', 'temp', 'cities.txt');
return compileWordList(sourceName, destName, { splitWords: true, sort: true })
.then(() => fsp.readFile(destName, 'utf8'))
.then(output => {
await compileWordList(sourceName, destName, { splitWords: true, sort: true });
const output = await fsp.readFile(destName, 'utf8');

test('test compiling to a file without split', () => {
const sourceName = path.join(__dirname, '..', '..', 'Samples', 'cities.txt');
test('test compiling to a file without split', async () => {
const sourceName = await streamWordsFromFile(path.join(__dirname, '..', '..', 'Samples', 'cities.txt'), {});
const destName = path.join(__dirname, '..', '..', 'temp', 'cities2.txt');
return compileWordList(sourceName, destName, { splitWords: false, sort: true })
.then(() => fsp.readFile(destName, 'utf8'))
.then(output => {
await compileWordList(sourceName, destName, { splitWords: false, sort: true })
const output = await fsp.readFile(destName, 'utf8');

test('tests normalized to a trie', () => {
Expand All @@ -79,22 +76,19 @@ describe('Validate the wordListCompiler', () => {
expect(tWords.sort())[...(new Set(nWords.sort()))]);

test('test reading and normalizing to a trie file', () => {
const sourceName = path.join(__dirname, '..', '..', 'Samples', 'cities.txt');
test('test reading and normalizing to a trie file', async () => {
const sourceName = await streamWordsFromFile(path.join(__dirname, '..', '..', 'Samples', 'cities.txt'), {});
const destName = path.join(__dirname, '..', '..', 'temp', 'cities.trie');
return compileTrie(sourceName, destName)
.then(() => fsp.readFile(destName, UTF8))
.then(output => output.split('\n'))
.then(srcWords => {
const node = Trie.importTrie(srcWords);
const expected = citiesResult.split('\n').filter(a => !!a).sort();
const words = [...Trie.iteratorTrieWords(node)].sort();
await compileTrie(sourceName, destName);
const srcWords = (await fsp.readFile(destName, 'utf8')).split('\n');
const node = Trie.importTrie(srcWords);
const expected = citiesResult.split('\n').filter(a => !!a).sort();
const words = [...Trie.iteratorTrieWords(node)].sort();

test('test reading and normalizing to a trie gz file', async () => {
const sourceName = path.join(__dirname, '..', '..', 'Samples', 'cities.txt');
const sourceName = await streamWordsFromFile(path.join(__dirname, '..', '..', 'Samples', 'cities.txt'), {});
const destName = path.join(__dirname, '..', '..', 'temp', 'cities.trie.gz');
await compileTrie(sourceName, destName);
const resultFile = await readFile(destName, UTF8);
Expand All @@ -106,23 +100,19 @@ describe('Validate the wordListCompiler', () => {

test('test a simple hunspell dictionary depth 0', async () => {
const sourceName = path.join(__dirname, '..', '..', 'Samples', 'hunspell', 'example.dic');
const sourceName = await streamWordsFromFile(path.join(__dirname, '..', '..', 'Samples', 'hunspell', 'example.dic'), { maxDepth: 0});
const destName = path.join(__dirname, '..', '..', 'temp', 'example0.txt');
return compileWordList(sourceName, destName, { splitWords: false, sort: true, maxDepth: 0 })
.then(() => fsp.readFile(destName, 'utf8'))
.then(output => {
await compileWordList(sourceName, destName, { splitWords: false, sort: true });
const output = await fsp.readFile(destName, 'utf8');

test('test a simple hunspell dictionary depth 1', async () => {
const sourceName = path.join(__dirname, '..', '..', 'Samples', 'hunspell', 'example.dic');
const sourceName = await streamWordsFromFile(path.join(__dirname, '..', '..', 'Samples', 'hunspell', 'example.dic'), { maxDepth: 1});
const destName = path.join(__dirname, '..', '..', 'temp', 'example0.txt');
return compileWordList(sourceName, destName, { splitWords: false, sort: true, maxDepth: 1 })
.then(() => fsp.readFile(destName, 'utf8'))
.then(output => {
expect(output.split('\n'))['hello', 'rework', 'tried', 'try', 'work', 'worked', '']);
await compileWordList(sourceName, destName, { splitWords: false, sort: true });
const output = await fsp.readFile(destName, 'utf8');
expect(output.split('\n'))['hello', 'rework', 'tried', 'try', 'work', 'worked', '']);

Expand Down

0 comments on commit f6d8ada

Please sign in to comment.