diff --git a/actions/what.js b/actions/what.js index 4661f8b..1658df5 100644 --- a/actions/what.js +++ b/actions/what.js @@ -8,7 +8,9 @@ var lang = require("../brain/language") ; module.exports = function what (a) { - + + if (a.owner === a.subject) a.subject = "definition"; + var nodebot = this , owner = a.owner , subject = a.subject || "definition" @@ -55,8 +57,9 @@ module.exports = function what (a) { return nodebot.request(); } - nodebot.lexico[nowner] = nodebot.lexicon[owner] || {}; + nodebot.lexicon[owner] = nodebot.lexicon[owner] || {}; nodebot.lexicon[owner][subject] = text; + nodebot.say("Great, now I know!"); return nodebot.request(); diff --git a/brain/language/tagger.js b/brain/language/tagger.js index 270666d..1e28203 100644 --- a/brain/language/tagger.js +++ b/brain/language/tagger.js @@ -1,4 +1,14 @@ // Tagger.js +// +// Breaks up speech into components and assists with +// classifying things such as the subject, ownership, +// and action for a statement +// +// Note: I am not a linguist, this is the result of +// blood, sweat, and tears! +// +// Please help me make this better: +// https://github.com/nhunzaker/nodebot // -------------------------------------------------- // var lev = require("levenshtein") @@ -21,7 +31,7 @@ var closest = module.exports.closest = function(string, words) { } words.forEach(function(word) { - + var distance = lev(string, word); if (distance < shortest) { @@ -36,39 +46,48 @@ var closest = module.exports.closest = function(string, words) { // Checks if a string is fileish var isFile = module.exports.isFile = function(string) { + string = string || ""; return (string.replace(/\s/g, "").match(fileEx) !== null); }; // Returns the part of speech for a particular word var getType = module.exports.getType = function (string) { - return tagger.tag(lexer.lex(string))[0][1]; + + if (string) { + return tagger.tag(lexer.lex(string))[0][1]; + } else { + return undefined; + } + }; // Finds all words between the last of the first and last // of two types -var getBetween = module.exports.getBetween = function(lex, type1, type2) { +var getBetween = module.exports.getBetween = function(lex, type1, type2, form) { var tagged = tagger.tag(lex) - , filter1 = filter2 = []; + , filter1 = filter2 = start = end = []; + + form = form || "outside" type1 = (typeof type1 === 'string') ? [type1] : type1; type2 = (typeof type2 === 'string') ? [type2] : type2; filter1 = tagged.filter(function(i) { return type1.indexOf(i[1]) !== -1 }) || []; filter2 = tagged.filter(function(i) { return type2.indexOf(i[1]) !== -1 }) || []; - - var start = (filter1[0]) ? filter1[0][0] : undefined - , end = (filter2.slice(-1)[0]) ? filter2.slice(-1)[0][0] : undefined; - - if (start || end) { - return lex.slice(lex.indexOf(start) + 1, lex.indexOf(end) + 1); + if (form === "outside") { + start = (filter1[0]) ? filter1[0][0] : undefined } else { - return []; + start = (filter1.slice(-1)[0]) ? filter1.slice(-1)[0][0] : undefined } + end = (filter2.slice(-1)[0]) ? filter2.slice(-1)[0][0] : undefined; + + + return (start || end) ? lex.slice(lex.indexOf(start) + 1, lex.indexOf(end) + 1) : []; }; @@ -90,19 +109,24 @@ var getTypes = module.exports.getTypes = function (array, string, strict) { }; -var classify = module.exports.classify = function(speech) { +var classify = module.exports.classify = function(speech, debug) { var text = speech || process.argv.slice(2).join(" ") , words = lexer.lex(text) , tagged = tagger.tag(words) , action = subject = owner = false; + if (debug) { + console.log(tagged); + } + + // Classify! // -------------------------------------------------- // var verbs = getTypes(tagged, "VB") , nouns = getTypes(tagged, "NN") - , pronouns = getTypes(tagged, "P") + , pronouns = getTypes(tagged, "PRP") // finds all posessive pronouns , actions = getTypes(tagged, "W") , adverbs = getTypes(tagged, "R") , adjectives = getTypes(tagged, "JJ") @@ -141,116 +165,144 @@ var classify = module.exports.classify = function(speech) { var posession = tagged.filter(function(i) { return i[1] === "PRP$" || i[1] === "PRP"; }); - // If there is posession, then use it - if (posession.length > 0) { + // If there is posession and we have an action, then + // the owner is the posessive word + if (posession.length > 0 && action) { + owner = posession[0][0]; + + // More bulletproofing, if the owner word is further + // in the sentence than the action, then we need to igore + // all of the verbs/posessives before the action + // + // ex: "Do you know what the current directory is?" + if (words.indexOf(owner) < words.indexOf(action)) { + owner = getBetween(words, ["DT"], "NN").join(" "); + } + } // No ? Let's try between a preposition and // determiners/nouns else if (determiners.length > 0 && preps.length > 0) { - - owner = getBetween(words, "IN", ["DT", "NN"]); + + owner = getBetween(words, ["IN"], ["DT", "NN", "."]); // Strip accidental determinates if (getType(owner[0]) === "DT") owner = owner.slice(1); - - owner = owner.join(" "); + + // Strip accidental punctuation + if (getType(owner.slice(-1)[0]) === ".") owner = owner.slice(0, -1); + + owner = owner.join(" ").trim(); } + // At this point, we can really only guess that + // the owner is between the verb and the end of the + // statement else if (verbs.length > 0) { - + owner = getBetween(words, ["VBZ", "VBP"], ".").slice(0, -1) // Strip accidental determinates if (getType(owner[0]) === "DT") owner = owner.slice(1); - - owner = owner.join(" "); + + // Strip accidental puncuation + if (getType(owner[0]) === ".") owner = owner.slice(1); + + owner = owner.join(" ").trim(); + } // SUBJECT - // Answers : "What should the nodebot's action target?" + // Answers : "What is this statement about?" // -------------------------------------------------- // - // If ownership, then the and the next word is a noun then - // the subject is the noun - if (owner) { - subject = getBetween(words, ["DT", "PRP$"], ["IN", "."]).slice(0, -1).join(" "); + // If there is a file within the statement, it's probably + // the subject + if (speech.match(fileEx) !== null) { + subject = speech.match(fileEx)[0].trim(); } - // If there are no nouns and there is an owner, the subject is the owner - else if (nouns.length === 0 && owner) { - subject = owner; - } - // Okay, if that isn't true and we have prepositions - // then the subject will be the words following + // If there is a website within the statement, it's probably + // the subject + else if (websites.length > 0) { + subject = websites[0].trim() + } - // Start with the word after the preposition - // end with the next adjective or prep we see + // If ownership and there are prepositions, scan for words beween + // prepositions, determinates, and posessive words and + // prepositions, nouns, and puncuation + else if (owner && preps.length > 0) { + + debug && console.log("fire"); + + // To account for more than one preposition, we need to be able to filter between + // either the inside or outside preposition + if (preps.length === 1) { + subject = getBetween(words, ["IN", "DT", "PRP$"], ["IN", "NN", "."], "outside"); + } else { + subject = getBetween(words, ["IN", "DT", "PRP$"], ["IN", "NN", "."], "inside"); + } - else if (preps.length > 0) { - subject = getBetween("IN", ["IN", "VBZ", "."]); - } + // Autocorrect for trailing punctuation + if (getType(subject.slice(-1)[0]) === ".") { + subject = subject.slice(0, -1); + } - // Cute, at this point we check for determiners - // (the, some...) + // Autocorrect for trailing ownership + if (subject.slice(-1)[0] === owner) { + subject = subject.slice(0, -1); + } + + // Autocorrect for trailing prepositions + if (getType(subject.slice(-1)[0]) === "IN") { + subject = subject.slice(0, -1); + } - else if (determiners.length > 0) { - var det = determiners.slice(-1)[0]; - subject = words[words.indexOf(det) + 1]; - } - - // Now let's check if the first verb is - // present-tense, then it's probably between - // the first verb and the action - - else if (getType(verbs[0]) === "VBZ") { + subject = subject.join(" ").trim(); - var start = words.indexOf(verbs[0]) + 1 - , end = words.indexOf(action); - - subject = words.slice(start, end).join(" "); - } - // Autocorrect for files - // we didn't accidently add whitespace - subject = (isFile(subject)) ? subject.replace(/\s/g, "").match(fileEx)[0] : subject - owner = (isFile(owner)) ? owner.replace(/\s/g, "").match(fileEx)[0] : owner - + // Okay, last chance. If there *is* ownership, and there are no prepositions + // then the subject is inside the owner/determinate/verb and the last noun + // (*phew...*) + else if (owner && preps.length === 0) { + subject = getBetween(words, ["DT", "VBP", "PRP$"], "NN", "inside").join(" "); + } + // Now that everything is properly classified, // let's filter the ownership switch(owner) { - + + // Reverse user possession case "me": case "my": case "i": case "I": owner = "user"; break; - + + // Reverse nodebot possession case "your": case "you": owner = "nodebot"; break; - case undefined: case "it": case "its": + // Tweak other non-specific possession cases to the last + // recorded context + case "": case "it": case "its": case "they": case "their": case "he": case "she": case "his": case "hers": - owner = this.memory.context; + owner = Nodebot.memory.context; break; } - // If the subject is the same as the owner, make a last - // minute correction - - if (subject === owner) subject = "definition"; - // Return what we find // -------------------------------------------------- // var ret = { - action : action, + action : (action) ? action.toLowerCase() : undefined, owner : owner, subject : subject, tokens : words diff --git a/nodebot.js b/nodebot.js index 5ed0908..01e7885 100644 --- a/nodebot.js +++ b/nodebot.js @@ -37,6 +37,10 @@ require("./brain/interaction")(Nodebot); var command = process.argv.slice(2).join(" ").trim(); // Take the proper initial action -(command !== "") ? Nodebot.analyze(command) : Nodebot.request(); + +if (!module.parent) { + (command !== "") ? Nodebot.analyze(command) : Nodebot.request(); +} + diff --git a/test/pos-test.js b/test/pos-test.js new file mode 100644 index 0000000..5e22e8c --- /dev/null +++ b/test/pos-test.js @@ -0,0 +1,272 @@ +require("../nodebot"); +var tagger = require("../brain/language/tagger"); +var vows = require('vows'), +assert = require('assert'); + + +// -------------------------------------------------- // + + +vows.describe("Tagger Helper Functions").addBatch({ + + 'It should calculate the best fit for a word within a group' : { + + topic: tagger.closest("valid", ["who", "what", "when", "validate"]), + + "the best fit should be 'validate'": function(closest) { + assert.equal(closest, "validate"); + } + + }, + + 'It should determine if a word is a file' : { + + topic: tagger.isFile("/public/application.js"), + + "it should be a file": function(isFile) { + assert.equal(isFile, true); + } + + }, + + 'It should be able to tag a word' : { + + topic: tagger.getType("kittens"), + + "it should be within the noun tag namespace": function(word) { + assert.equal(word.slice(0,2), "NN"); + } + + }, + + 'It should be able to sift between words of particular types' : { + + topic: tagger.getBetween(["I", "like", "to", "eat", "cheese"], "VB", "NN").join(" "), + + "it should find 'cheese'": function(between) { + assert.equal(between, "cheese"); + } + + } + +}).export(module); + + + + +vows.describe('Decipher speech').addBatch({ + + 'When asked, "What is your name?"': { + + topic: tagger.classify("What is your name?"), + + "it should correctly determine the owner": function(topic) { + assert.equal(topic.owner, "nodebot"); + }, + + 'it should correctly determine the subject': function (topic) { + assert.equal(topic.subject, "name"); + } + + }, + + 'When asked, "What is the regular expression for email"': { + + topic: tagger.classify("What is the regular expression for email?"), + + 'it should correctly determine the owner': function (topic) { + assert.equal(topic.owner, "email"); + }, + 'it should correctly determine the subject': function (topic) { + assert.equal(topic.subject, "regular expression"); + }, + 'it should correctly determine the action': function (topic) { + assert.equal(topic.action, "what"); + } + + }, + + 'When asked, "What is the current directory?"': { + + topic: tagger.classify("What is the current directory?"), + + 'it should correctly determine the subject': function (topic) { + assert.equal(topic.subject, "current directory"); + } + + }, + + + 'When asked, "Who is the king of France?"': { + + topic: tagger.classify("Who is the king of France?"), + + 'it should correctly identify the action': function (topic) { + assert.equal(topic.action, "who"); + }, + + 'it should correctly determine ownership': function (topic) { + assert.equal(topic.owner, "France"); + }, + + 'it should correctly determine the subject': function (topic) { + assert.equal(topic.subject, "king"); + } + + }, + + + 'When asked, "Validate application.js"': { + + topic: tagger.classify("validate application.js"), + + 'the subject should be "application.js"': function (topic) { + assert.equal(topic.subject, "application.js"); + }, + + 'the action should be "validate"': function (topic) { + assert.equal(topic.action, "validate"); + } + + }, + + 'When asked, "Is application.js valid?"': { + + topic: tagger.classify("Is application.js valid?"), + + 'the subject should be "application.js"': function (topic) { + assert.equal(topic.subject, "application.js"); + }, + + 'the action should be "valid"': function (topic) { + assert.equal(topic.action, "valid"); + } + + }, + + 'When asked, "Is there anything wrong with application.js?"': { + + topic: tagger.classify("Is there anything wrong with application.js?"), + + 'the action should be "wrong"': function (topic) { + assert.equal(topic.action, "wrong"); + }, + + 'the subject should be "application.js"': function (topic) { + assert.equal(topic.subject, "application.js"); + } + + }, + + 'When asked, "Is there anything wrong with http://www.google.com?"': { + + topic: tagger.classify("Is there anything wrong with http://www.google.com?"), + + 'the action should be "wrong"': function (topic) { + assert.equal(topic.action, "wrong"); + }, + + 'the subject should be " http://www.google.com"': function (topic) { + assert.equal(topic.subject, "http://www.google.com"); + } + + }, + + + 'When asked, "Validate http://www.google.com"': { + + topic: tagger.classify("Validate http://www.google.com"), + + 'the action should be "validate"': function (topic) { + assert.equal(topic.action, "validate"); + }, + + 'the subject should be " http://www.google.com"': function (topic) { + assert.equal(topic.subject, "http://www.google.com"); + } + + }, + + 'When asked, "Is there anything wrong with http://localhost:4000?"': { + + topic: tagger.classify("Is there anything wrong with http://localhost.dev:4000?"), + + 'the action should be "wrong"': function (topic) { + assert.equal(topic.action, "wrong"); + }, + + 'the subject should be "http://localhost.dev:4000?': function (topic) { + assert.equal(topic.subject, "http://localhost.dev:4000"); + } + + }, + + + 'When asked, "Validate http://localhost.dev:4000?"': { + + topic: tagger.classify("Validate http://localhost.dev:4000?"), + + 'the action should be "validate"': function (topic) { + assert.equal(topic.action, "validate"); + }, + + 'the subject should be "http://localhost.dev:4000?"': function (topic) { + assert.equal(topic.subject, "http://localhost.dev:4000"); + } + + }, + + + 'When asked, "Who am I?"': { + + topic: tagger.classify("Who am I?"), + + 'the action should be "who"': function (topic) { + assert.equal(topic.action, "who"); + }, + + 'the ownership should belong to "user"': function (topic) { + assert.equal(topic.owner, "user"); + }, + + 'the subject should be "I"': function (topic) { + assert.equal(topic.subject, "I"); + } + + }, + + + 'When asked, "Do you know what the current directory is?"': { + + topic: tagger.classify("Do you know what the current directory is?", true), + + 'the action should be "what"': function (topic) { + assert.equal(topic.action, "what"); + }, + + 'the owner should be "current directory"': function (topic) { + assert.equal(topic.owner, "current directory"); + }, + + 'the subject should be "current directory"': function (topic) { + assert.equal(topic.subject, "current directory"); + } + + }, + + 'When asked, "What is it?"': { + + topic: tagger.classify("What is it?", true), + + 'the action should be "what"': function (topic) { + assert.equal(topic.action, "what"); + }, + + 'the owner should be the last context': function (topic) { + assert.equal(topic.owner, Nodebot.memory.context); + } + + } + + +}).export(module); // Run it