Skip to content

Commit 5045852

Browse files
committed
feat(packages/stdlib): add trigram distance utill
1 parent 7d8f5be commit 5045852

File tree

1 file changed

+49
-0
lines changed
  • packages/stdlib/src/text/trigram-distance

1 file changed

+49
-0
lines changed
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
export type Trigrams = Map<string, number>;
2+
3+
/**
4+
* Extracts trigrams from a text and returns a map of trigram to count
5+
*
6+
* @param {string} text The text to extract trigrams
7+
* @returns {Trigrams} A map of trigram to count
8+
*/
9+
export function trigramProfile(text: string): Trigrams {
10+
text = '\n\n' + text + '\n\n';
11+
12+
const trigrams = new Map<string, number>();
13+
14+
for (let i = 0; i < text.length - 2; i++) {
15+
const trigram = text.slice(i, i + 3);
16+
const count = trigrams.get(trigram) ?? 0;
17+
trigrams.set(trigram, count + 1);
18+
}
19+
20+
return trigrams;
21+
}
22+
23+
/**
24+
* Calculates the trigram distance between two strings
25+
*
26+
* @param {Trigrams} left First text trigram profile
27+
* @param {Trigrams} right Second text trigram profile
28+
* @returns {number} The trigram distance between the two strings
29+
*/
30+
export function trigramDistance(left: Trigrams, right: Trigrams) {
31+
let distance = -4;
32+
let total = -4;
33+
34+
for (const [trigram, left_count] of left) {
35+
total += left_count;
36+
const right_count = right.get(trigram) ?? 0;
37+
distance += Math.abs(left_count - right_count);
38+
}
39+
40+
for (const [trigram, right_count] of right) {
41+
total += right_count;
42+
const left_count = left.get(trigram) ?? 0;
43+
distance += Math.abs(left_count - right_count);
44+
}
45+
46+
if (distance < 0) return 0;
47+
48+
return distance / total;
49+
}

0 commit comments

Comments
 (0)