Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
5ce9398
Bracket Balance
thsubaku9 Oct 7, 2020
050f089
README update
thsubaku9 Oct 7, 2020
1801f4b
README update
thsubaku9 Oct 7, 2020
96784bd
README update
thsubaku9 Oct 7, 2020
0b0d182
Bloom Filter Update!
thsubaku9 Oct 8, 2020
c1f956d
readme changes
thsubaku9 Oct 8, 2020
cd3a235
Merge branch 'main' into work-branch
thsubaku9 Oct 8, 2020
a6b376d
Merge branch 'main' of https://github.com/thsubaku9/string-dsa into w…
thsubaku9 Oct 8, 2020
8549287
spacing
thsubaku9 Oct 8, 2020
2ada1ad
Custom string sort supported
thsubaku9 Oct 10, 2020
5453837
Custom string sort supported
thsubaku9 Oct 10, 2020
b082d22
Custom string sort supported
thsubaku9 Oct 10, 2020
5eccfad
Custom string sort supported
thsubaku9 Oct 10, 2020
247e354
Merge branch 'main' of https://github.com/thsubaku9/string-dsa into w…
thsubaku9 Oct 10, 2020
10f4b66
sorted implemented
thsubaku9 Oct 16, 2020
a6c301b
webpack config
thsubaku9 Oct 17, 2020
11084bd
Edit Distance and 0.3.0 changes
thsubaku9 Oct 19, 2020
2bd2264
Merge branch 'main' into work-branch
thsubaku9 Oct 19, 2020
db92912
Merge branch 'main' into work-branch
thsubaku9 Oct 28, 2020
c8be77f
temp
thsubaku9 Oct 29, 2020
ac3a413
Trie almost done
thsubaku9 Oct 29, 2020
4c66325
README enhancement
thsubaku9 Oct 29, 2020
55fd012
Trie done
thsubaku9 Oct 30, 2020
0790164
tried and tested
thsubaku9 Oct 30, 2020
6cac38f
Trie enhancements
thsubaku9 Nov 4, 2020
2df3a0a
Merge branch 'main' into work-branch
thsubaku9 Nov 4, 2020
ca54462
LCS supported
thsubaku9 Nov 8, 2020
0bd5276
Merge branch 'main' into work-branch
thsubaku9 Nov 8, 2020
d6f55e7
few minor fixes, started linting work
thsubaku9 Nov 10, 2020
7fb45da
Merge branch 'main' into work-branch
thsubaku9 Nov 10, 2020
48eff45
linting ongoing
thsubaku9 Nov 10, 2020
493c473
Minor updates
thsubaku9 Nov 13, 2020
5fd4fde
Merge branch 'main' into work-branch
thsubaku9 Nov 13, 2020
0590e93
merge modification
thsubaku9 Nov 13, 2020
9cb5508
LF update
thsubaku9 Nov 13, 2020
65d609e
LF update
thsubaku9 Nov 13, 2020
6d416a3
Aho Corasick main files
thsubaku9 Nov 18, 2020
3c41b78
Aho Corasick Polishing
thsubaku9 Nov 18, 2020
24e064c
Merge branch 'main' into work-branch
thsubaku9 Nov 18, 2020
e28e34f
Update index.js
thsubaku9 Nov 18, 2020
f220a70
Update index.js
thsubaku9 Nov 18, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .eslintrc.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
"no-restricted-syntax": ["warn"],
"no-shadow": ["off"],
"no-underscore-dangle": ["off"],
"prefer-const": ["off"],
"quotes" :["off"],
"strict": ["off"]
}
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ $ npm install string-dsa

List of supported Data Structures and Algorithms are :

- [Aho Corasick](https://github.com/thsubaku9/string-dsa/blob/main/src/AhoCorasick.js)
- [Bloom Filter](https://github.com/thsubaku9/string-dsa/blob/main/src/BloomFilter.js)
- [Bracket Balance](https://github.com/thsubaku9/string-dsa/blob/main/src/bracketBalance.js)
- [Custom Sort](https://github.com/thsubaku9/string-dsa/blob/main/src/stringSort.js)
Expand Down
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "string-dsa",
"version": "1.2.2",
"version": "1.3.0",
"description": "String Data Structures and Algorithms Library in JavaScript",
"main": "src/index.js",
"files": [
Expand All @@ -21,6 +21,7 @@
"keywords": [
"String Algorithms",
"String Data Structures",
"Aho Corasick",
"Bloom Filter",
"Custom Sort",
"Levenshtein Distance",
Expand Down
4 changes: 2 additions & 2 deletions src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@ const BloomFilter = require('./BloomFilter');
const customSort = require('./stringSort');
const diceCoeff = require('./dice_coeff');
const editDist = require('./edit_distance');
const { kmp } = require('./search');
const { AhoCorasick, kmp, rabinKarp } = require('./search');
const lcs = require('./lcs');
const { rabinKarp } = require('./search');
const Trie = require('./Trie');

module.exports = {
AhoCorasick,
BloomFilter,
bracketBalance,
customSort,
Expand Down
114 changes: 112 additions & 2 deletions src/search/AhoCorasick.js
Original file line number Diff line number Diff line change
@@ -1,17 +1,127 @@
"use strict";

// AC is being implemented as a transition: map<state: int, map<edge: char, state: int>> for lesser restrictions and failure: array[state] and output: array[state]
class AC {
/**
*
* @param {String[]} keywordList the list of keywords that you wish to use for searching
* @param {Number} maxStates the number of states that are allowed (maxStates >= total number of letters in keywordlist). Set as -1 to allow the algorithm to figure out by itself
*/
constructor(keywordList, maxStates = -1) {
this.totalStates = maxStates > 0 ? maxStates : keywordList.reduce((letterCount, str) => letterCount + str.length, 0);
this.totalStates = maxStates > 0 ? maxStates : (keywordList.reduce((letterCount, str) => letterCount + str.length, 0) + 1);
this.keyWords = new Array(...keywordList);
this.states = 0;
// this.stateTransitions = new Map<Number,Map<String,Number>>(); -- for typescript
this.stateTransitions = new Map();

this.stateTransitions.set(0, new Map());

this.failureTransition = new Array(this.totalStates);

this.stateOutput = new Array(this.totalStates);

for (let i = 0; i < this.totalStates; i++) {
this.stateOutput[i] = 0;
}

// Inital insert
for (let i = 0; i < this.keyWords.length; i++) {
this._initialInsert(this.keyWords[i], i);
}

// preprocessing for failure and outputs
this._exploreBFS();
}

_exploreBFS() {
let buffer = [];
// preprocess states for failure on 0 level

this.failureTransition[0] = 0;
for (let x of this.stateTransitions.get(0).keys()) {
if (x != undefined) {
this.failureTransition[this.stateTransitions.get(0).get(x)] = 0;
buffer.push(this.stateTransitions.get(0).get(x));
}
}

while (buffer.length) {
let currentState = buffer.pop();

for (let x of this.stateTransitions.get(currentState).keys()) {
if (x == undefined) continue;

let failureState = this.failureTransition[currentState];
while (this.stateTransitions.get(failureState).get(x) == undefined && failureState != 0) {
failureState = this.failureTransition[failureState];
}
if (this.stateTransitions.get(failureState).get(x) != undefined) failureState = this.stateTransitions.get(failureState).get(x);

this.failureTransition[this.stateTransitions.get(currentState).get(x)] = failureState;
this.stateOutput[this.stateTransitions.get(currentState).get(x)] |= this.stateOutput[failureState];

buffer.push(this.stateTransitions.get(currentState).get(x));
}
}
}

_setOutput(currentStateNumber, keywordIndex) {
this.stateOutput[currentStateNumber] |= (1 << keywordIndex);
}

_initialInsert(keyword, keywordIndex) {
let currentState = this.stateTransitions.get(0);
let transitionToState;

for (let i = 0, c = keyword.charAt(i); i < keyword.length; i++, c = keyword.charAt(i)) {
if (currentState.get(c) == undefined) {
this.states++;
currentState.set(c, this.states);
}

transitionToState = currentState.get(c);
if (this.stateTransitions.get(transitionToState) == undefined) {
this.stateTransitions.set(transitionToState, new Map());
}
currentState = this.stateTransitions.get(transitionToState);
}
this._setOutput(transitionToState, keywordIndex);
}

/**
*
* @param {String} searchSpace the text to be searched over
*
* @returns {Array[]} where each index gives a start position and end position of the term
*/
find(searchSpace) {
return searchSpace;
// while searching, if base state is 0 then send the state back to zero if character state transition does not exist
let currentState = 0;
let resultLocation = [];
for (let i = 0, c = searchSpace[i]; i < searchSpace.length; i++, c = searchSpace[i]) {
while (this.stateTransitions.get(currentState).get(c) == undefined && currentState != 0) {
currentState = this.failureTransition[currentState];
}

if (this.stateTransitions.get(currentState).get(c) != undefined) {
currentState = this.stateTransitions.get(currentState).get(c);
}

if (this.stateOutput[currentState] != 0) {
// store output to resultLocation

for (let myOutputs = this.stateOutput[currentState], index = 0; myOutputs > 0; myOutputs >>= 1, index++) {
if (myOutputs & 0x01) {
let locationPair = [];
locationPair.push(i - (this.keyWords[index].length - 1));
locationPair.push(i);
resultLocation.push(locationPair);
}
}
}
}

return resultLocation;
}
}

Expand Down
2 changes: 2 additions & 0 deletions src/search/index.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
const kmp = require('./kmp');
const rabinKarp = require('./rabin_karp');
const AhoCorasick = require('./AhoCorasick');

module.exports = {
AhoCorasick,
kmp,
rabinKarp,
};
26 changes: 26 additions & 0 deletions test/search/test_AhoCorasick.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
const assert = require("assert");
const { AhoCorasick } = require("../../src");

describe("Aho Corasick tests", () => {
const keywordList = ["lemon", "lemonade", "monday", "zebras"];
const searchSpace = "lemon and lemonday is nice for zebzebras";
const hitLocation = [[0, 4], [10, 14], [12, 17], [34, 39]];
let mySearcher;
it("Insert the necessary keywords", () => {
mySearcher = new AhoCorasick(keywordList);

assert.deepStrictEqual(mySearcher.keyWords, keywordList);
});

it("Should state the maximum number of required states", () => {
assert.strictEqual(mySearcher.totalStates, keywordList.reduce((prevVal, currentVal) => prevVal + currentVal.length, 1));
});

it("Should state the actual number of used states", () => {
assert.strictEqual(mySearcher.totalStates >= mySearcher.states, true);
});

it("Find the matching keywords in given text", () => {
assert.deepStrictEqual(mySearcher.find(searchSpace), hitLocation);
});
});