From 0ce386a3a8dfad02c7a3a7aec13407f38b706cf5 Mon Sep 17 00:00:00 2001 From: Sid N Date: Sat, 17 Mar 2012 21:40:19 -0400 Subject: [PATCH 1/2] A module to compute the Levenshtien distance. --- lib/natural/distance/levenshtein_distance.js | 41 ++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 lib/natural/distance/levenshtein_distance.js diff --git a/lib/natural/distance/levenshtein_distance.js b/lib/natural/distance/levenshtein_distance.js new file mode 100644 index 000000000..ab662f74d --- /dev/null +++ b/lib/natural/distance/levenshtein_distance.js @@ -0,0 +1,41 @@ +/* + * Compute the Levenshtein distance between two strings. + * Algorithm based from Speech and Language Processing - Daniel Jurafsky and James H. Martin. + */ + +function LevenshteinDistance (source, target, options) { + + options = options || {}; + options.insertion_cost = options.insertion_cost || 1; + options.deletion_cost = options.deletion_cost || 1; + options.substitution_cost = options.substitution_cost || 2; + + var sourceLength = source.length; + var targetLength = target.length; + var distanceMatrix = [[0]]; + for (var row = 1; row <= sourceLength; row++) { + distanceMatrix[row] = []; + distanceMatrix[row][0] = distanceMatrix[row-1][0] + options.insertion_cost; + } + for (var column = 1; column <= targetLength; column++) { + distanceMatrix[0][column] = distanceMatrix[column-1][0] + options.deletion_cost; + } + + for (var row = 1; row <= sourceLength; row++) { + for (var column = 1; column <= targetLength; column++) { + var costToInsert = distanceMatrix[row-1][column] + options.insertion_cost; + var costToDelete = distanceMatrix[row][column-1] + options.deletion_cost; + + var sourceElement = source[row-1]; + var targetElement = target[column-1]; + var costToSubstitute = distanceMatrix[row-1][column-1]; + if (sourceElement !== targetElement) { + costToSubstitute = costToSubstitute + options.substitution_cost; + } + distanceMatrix[row][column] = Math.min(costToInsert, costToDelete, costToSubstitute); + } + } + return distanceMatrix[sourceLength][targetLength]; +} + +module.exports = LevenshteinDistance; From 9002aa49f13a7c2b7a9226b2a42c92702a56fb21 Mon Sep 17 00:00:00 2001 From: Sid N Date: Wed, 21 Mar 2012 22:01:26 -0400 Subject: [PATCH 2/2] Exporting module for Levenshtien Distance. --- lib/natural/index.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/natural/index.js b/lib/natural/index.js index 29402554e..9d3ef1278 100644 --- a/lib/natural/index.js +++ b/lib/natural/index.js @@ -39,4 +39,5 @@ exports.TfIdf = require('./tfidf/tfidf'); exports.SentenceAnalyzer = require('./analyzers/sentence_analyzer'); exports.stopwords = require('./util/stopwords').words; exports.NGrams = require('./ngrams/ngrams'); -exports.JaroWinklerDistance = require('./distance/jaro-winkler_distance'); \ No newline at end of file +exports.JaroWinklerDistance = require('./distance/jaro-winkler_distance'); +exports.LevenshteinDistance = require('./distance/levenshtein_distance'); \ No newline at end of file