Skip to content
Browse files

broken but moved back to faster pronunciation offset memoization

  • Loading branch information...
1 parent 2e656d0 commit e78ae74cc62e76ff684b2786527e201ad8ccb5ef @substack committed Apr 29, 2011
Showing with 56 additions and 23 deletions.
  1. +1 −1 data/fetch.sh
  2. +55 −22 index.js
View
2 data/fetch.sh
@@ -1,2 +1,2 @@
#!/bin/bash
-wget https://cmusphinx.svn.sourceforge.net/svnroot/cmusphinx/trunk/cmudict/cmudict.0.7a
+wget 'https://cmusphinx.svn.sourceforge.net/svnroot/cmusphinx/trunk/cmudict/cmudict.0.7a'
View
77 index.js
@@ -1,44 +1,77 @@
var fs = require('fs');
var Lazy = require('lazy');
+var dictFile = __dirname + '/data/cmudict.0.7a';
var offsets = {};
-exports.pronounce = function (word, cb) {
+function eachLine (stream, cb) {
+ var letter = null;
+ var offset = 0;
+
+ Lazy(stream).lines.map(String).forEach(function (line) {
+ var x = line[0];
+ if (x.match(/^[A-Z]/i)) {
+ if (letter !== x) { // letter transition
+ if (!offsets[x]) offsets[x] = offset;
+ }
+ var w = line.match(/^([A-Z][A-Z'-]*)/i);
+ cb(line, w[0]);
+ }
+
+ offset += line.length + 1;
+ });
+}
+
+var exports = module.exports = function (word, cb) {
+ var s = fs.createReadStream(dictFile);
+ var rhymes = [];
+
+ s.once('end', function () {
+ cb(rhymes);
+ });
+
+ var x = active(pronounce(word));
+ eachLine(s, function (line, w) {
+ var y = active(line.split(/\s+/).slice(1));
+ if (x === y) rhymes.push(w);
+ });
+};
+
+exports.rhyme = exports;
+
+var pronounce = exports.pronounce = function (word, cb) {
word = word.toUpperCase();
var start = offsets[word[0]];
var end = offsets[String.fromCharCode(word[0].charCodeAt(0) + 1)];
- var s = fs.createReadStream(__dirname + '/data/cmudict.0.7a', {
+ var s = fs.createReadStream(dictFile, {
start : start,
end : end,
});
s.once('end', function () {
- cb(null, found);
+ cb(found);
});
- var letter = null;
- var offset = 0;
var found = [];
- Lazy(s).lines.forEach(function (line) {
- var x = String.fromCharCode(line[0]);
- if (x.match(/^[A-Z]/i)) {
- if (letter !== x) { // letter transition
- if (!offsets[x]) offsets[x] = offset;
- }
-
- var str = line.toString();
- var w = str.match(/^(\w[\w-']*)/);
- if (w && w[1] == word) {
- found.push(str.split(/\s+/).slice(1));
- }
- else if (found.length > 0) {
- s.emit('end');
- }
+ eachLine(s, function (line, w) {
+ if (w == word) {
+ found.push(line.split(/\s+/).slice(1));
+ }
+ else if (found.length > 0) {
+ s.emit('end');
}
-
- offset += line.length + 1;
});
};
+
+function active (ws) {
+ // active rhyming region: slice off the leading consonants
+ for (
+ var i = 0;
+ i < ws.length && ws[i].match(/^[^AEIOU]/i);
+ i++
+ );
+ return ws.slice(i).join(' ');
+}

0 comments on commit e78ae74

Please sign in to comment.
Something went wrong with that request. Please try again.