/
basic.test.ts
57 lines (41 loc) · 1.69 KB
/
basic.test.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import {BasicTextNormalizer} from './basic';
describe('basicTextNormalizer normalize', () => {
it('returns empty string for empty input', () => {
const normalizer = new BasicTextNormalizer();
expect(normalizer.normalize('')).toEqual('');
});
it('converts all letters to lowercase', () => {
const normalizer = new BasicTextNormalizer();
expect(normalizer.normalize('AbCdEf')).toEqual('abcdef');
});
it('removes words between brackets', () => {
const normalizer = new BasicTextNormalizer();
expect(normalizer.normalize('This [word] should be removed.')).toEqual(
'this should be removed'
);
});
it('removes words between parenthesis', () => {
const normalizer = new BasicTextNormalizer();
expect(normalizer.normalize('This (word) should be removed.')).toEqual(
'this should be removed'
);
});
it('removes symbols and diacritics if option is provided', () => {
const normalizer = new BasicTextNormalizer(true);
expect(normalizer.normalize('Résûmé')).toEqual('resume');
});
it('removes symbols but keeps diacritics if option is not provided', () => {
const normalizer = new BasicTextNormalizer();
expect(normalizer.normalize('Café!')).toEqual('café');
});
it('splits letters into individual words if option is provided', () => {
const normalizer = new BasicTextNormalizer(false, true);
expect(normalizer.normalize('abc')).toEqual('a b c');
});
it('replaces successive whitespace characters with a single space', () => {
const normalizer = new BasicTextNormalizer();
expect(normalizer.normalize('This text\nhas, lots of spaces.')).toEqual(
'this text has lots of spaces'
);
});
});