Large diffs are not rendered by default.

@@ -0,0 +1,12 @@
.
ADJ
ADP
ADV
CONJ
DET
NOUN
NUM
PRON
PRT
VERB
X
@@ -15797,7 +15797,7 @@ tijuana NOUN
chopped VERB
truthfulness NOUN
organize VERB
sentence NOUN
sentenceV1 NOUN
racketeer NOUN
i. NOUN
crooning VERB
@@ -45181,7 +45181,7 @@ leninism-marxism NOUN
authorize VERB
poussins NOUN
peering VERB
sentence-structure NOUN
sentenceV1-structure NOUN
buses NOUN
clapping VERB NOUN
letitia NOUN
@@ -0,0 +1,8 @@
[why]
because
due to

[where]
from
to

@@ -1,16 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>
<form xmlns="http://www.intellij.com/uidesigner/form/" version="1" bind-to-class="chatbot.ChatbotGUI">
<grid id="27dc6" binding="mainPanel" layout-manager="GridLayoutManager" row-count="3" column-count="3" same-size-horizontally="false" same-size-vertically="false" hgap="5" vgap="5">
<grid id="27dc6" binding="mainPanel" layout-manager="GridLayoutManager" row-count="5" column-count="3" same-size-horizontally="false" same-size-vertically="false" hgap="5" vgap="5">
<margin top="10" left="10" bottom="10" right="10"/>
<constraints>
<xy x="20" y="20" width="263" height="400"/>
<xy x="20" y="20" width="338" height="400"/>
</constraints>
<properties/>
<border type="none"/>
<children>
<component id="5921b" class="javax.swing.JButton" binding="sendButton">
<constraints>
<grid row="2" column="2" row-span="1" col-span="1" vsize-policy="0" hsize-policy="3" anchor="0" fill="1" indent="0" use-parent-layout="false"/>
<grid row="4" column="2" row-span="1" col-span="1" vsize-policy="0" hsize-policy="3" anchor="0" fill="1" indent="0" use-parent-layout="false"/>
</constraints>
<properties>
<hideActionText value="false"/>
@@ -22,7 +22,7 @@
</component>
<component id="d6093" class="javax.swing.JTextField" binding="messageField">
<constraints>
<grid row="2" column="1" row-span="1" col-span="1" vsize-policy="0" hsize-policy="6" anchor="8" fill="1" indent="0" use-parent-layout="false">
<grid row="4" column="1" row-span="1" col-span="1" vsize-policy="0" hsize-policy="6" anchor="8" fill="1" indent="0" use-parent-layout="false">
<preferred-size width="150" height="-1"/>
</grid>
</constraints>
@@ -33,12 +33,13 @@
<grid row="0" column="1" row-span="1" col-span="1" vsize-policy="0" hsize-policy="0" anchor="8" fill="0" indent="0" use-parent-layout="false"/>
</constraints>
<properties>
<alignmentY value="0.5"/>
<text value="Chatbot"/>
</properties>
</component>
<scrollpane id="43aa0" binding="messageAreaScrollPane">
<constraints>
<grid row="1" column="1" row-span="1" col-span="1" vsize-policy="7" hsize-policy="7" anchor="0" fill="3" indent="0" use-parent-layout="false"/>
<grid row="1" column="1" row-span="3" col-span="1" vsize-policy="7" hsize-policy="7" anchor="0" fill="3" indent="0" use-parent-layout="false"/>
</constraints>
<properties/>
<border type="none"/>
@@ -52,6 +53,32 @@
</component>
</children>
</scrollpane>
<grid id="3c703" layout-manager="GridLayoutManager" row-count="2" column-count="1" same-size-horizontally="false" same-size-vertically="false" hgap="-1" vgap="-1">
<margin top="0" left="0" bottom="0" right="0"/>
<constraints>
<grid row="1" column="2" row-span="3" col-span="1" vsize-policy="3" hsize-policy="3" anchor="0" fill="3" indent="0" use-parent-layout="false"/>
</constraints>
<properties/>
<border type="none"/>
<children>
<component id="5cb02" class="javax.swing.JButton" binding="forgetButton">
<constraints>
<grid row="0" column="0" row-span="1" col-span="1" vsize-policy="0" hsize-policy="3" anchor="0" fill="1" indent="0" use-parent-layout="false"/>
</constraints>
<properties>
<text value="Forget memory"/>
</properties>
</component>
<component id="18533" class="javax.swing.JButton" binding="clearButton">
<constraints>
<grid row="1" column="0" row-span="1" col-span="1" vsize-policy="0" hsize-policy="3" anchor="0" fill="1" indent="0" use-parent-layout="false"/>
</constraints>
<properties>
<text value="Clear Text Area"/>
</properties>
</component>
</children>
</grid>
</children>
</grid>
</form>
@@ -19,6 +19,8 @@ public class ChatbotGUI implements WindowListener {
private JPanel mainPanel;
private JScrollPane messageAreaScrollPane;
private JTextArea messageArea;
private JButton forgetButton;
private JButton clearButton;

public ChatbotGUI() {
chatbot = new Chatbot();
@@ -34,13 +36,22 @@ public ChatbotGUI() {
} else {
messageArea.append(String.format("[%s] You: ", Utils.getCurrentTimeInString()) + message + "\n");
System.out.println("---------------------------------------------------------------------------");
message = chatbot.getReply(message);
message = chatbot.getReplyV2(message);
System.out.println("---------------------------------------------------------------------------");
messageArea.append(String.format("[%s] Bot: ", Utils.getCurrentTimeInString()) + message + "\n");
String[] splitByNL = message.split("\n");

for (String m : splitByNL)
messageArea.append(String.format("[%s] Bot: ", Utils.getCurrentTimeInString()) + m + "\n");
}
};

ActionListener forgetActionListener = e -> {
chatbot.resetMemory();
JOptionPane.showMessageDialog(null, "Memory has been reset!");
};

sendButton.addActionListener(replyActionListener);
forgetButton.addActionListener(forgetActionListener);
messageField.addActionListener(replyActionListener);
}

@@ -4,7 +4,10 @@
import chatbot.engine.nn.model.POSNeuralNetwork;
import chatbot.engine.nn.model.TextClassificationNeuralNetwork;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;

public class Chatbot {
private Memory memory;
@@ -14,7 +17,11 @@ public class Chatbot {

public Chatbot() {
memory = new Memory();
posNeuralNetwork = POSNeuralNetwork.loadPosWeight32(1);
posNeuralNetwork = POSNeuralNetwork.loadPosWeight16(3);
}

public void resetMemory() {
memory.reset();
}

public void save() {
@@ -37,112 +44,59 @@ public String getReply(String message) {
Phrase[] subjects = InformationAnalyzer.findSubject(posTags, isQuestion);

for (Phrase where : adjectiveNoun) {
System.out.println("adjectiveNoun: " + where);
System.out.println("adjectiveNoun: " + where.getPhrase());
}

for (Phrase where : whenWhere) {
System.out.println("whenWhere: " + where);
System.out.println("whenWhere: " + where.getPhrase());
}

for (Phrase where : subjects) {
System.out.println("subjects: " + where);
System.out.println("subjects: " + where.getPhrase());
}


Sentence msgSentence = new Sentence(message);
msgSentence.setSubjects(subjects);
msgSentence.setLocations(whenWhere);
msgSentence.setAdjectiveNouns(adjectiveNoun);
SentenceV1 msgSentenceV1 = new SentenceV1(message);
msgSentenceV1.setSubjects(subjects);
msgSentenceV1.setLocations(whenWhere);
msgSentenceV1.setAdjectiveNouns(adjectiveNoun);

memory.add(msgSentence, isQuestion);
memory.add(msgSentenceV1, isQuestion);
if (isYesNoQuestion) {
System.out.println("");
// todo:
return "Yes";
} else if (isQuestion) {
return getAnswerV2(msgSentence, importantWords);
String ans = getAnswerV2(msgSentenceV1, importantWords);
if (ans == null || ans.length() == 0)
return "It seems like my database doesn't know about your question, can you please tell me more?";
else
return ans;
} else {
// todo: randomize affirmative answer
return "I see!";
}
}

public String getAnswerV2(Sentence sentence, String[] importantWords) {
// 1. search tfidf (sorted)
// 2. for each tfidf result sentence, search tf in the sentence, return highest tf
String message = sentence.getSentence().toLowerCase();
Sentence[] tfidfResults = memory.findTFIDF(importantWords);

if (tfidfResults.length == 0) {
// todo: randomize more answer
return "Sorry I don't understand what you mean, please tell me about that!";
} else {
StringBuilder sb = new StringBuilder();

for (int i = 0; i < tfidfResults.length; i++) {
Sentence t = tfidfResults[i];

System.out.println("found -> " + t);

boolean[] takeResult = TFIDF.findTF(t, importantWords);

System.out.println("take result -> " + Arrays.toString(takeResult));

if (i != 0 && i < tfidfResults.length - 1)
sb.append(", ");
else if (i != 0)
sb.append(" and ");

// when where
if (message.contains("when") || message.contains("where")) {
if (takeResult[2]) {
Phrase[] locations = t.getLocations();
for (int j = 0; j < locations.length; j++) {
if (j != 0 && j < locations.length - 1)
sb.append(", ");
else if (j != 0)
sb.append(" and ");

sb.append(locations[j].getPhrase());
}
}
} else {
System.out.println("before takeResult[0] -> " + sb.toString());

if (takeResult[0]) {
Phrase[] subjects = t.getSubjects();
for (int j = 0; j < subjects.length; j++) {
System.out.println("j -> " + j);
if (j != 0 && j < subjects.length - 1)
sb.append(", ");
else if (j != 0)
sb.append(" and ");

sb.append(subjects[j].getPhrase());
}
}

System.out.println("combining1 ->" + sb.toString());

if (takeResult[1]) {
if (takeResult[0])
sb.append(" or it can be ");

Phrase[] adjectiveNouns = t.getAdjectiveNouns();
for (int j = 0; j < adjectiveNouns.length; j++) {
if (j != 0 && j < adjectiveNouns.length - 1)
sb.append(", ");
else if (j != 0)
sb.append(" and ");

sb.append(adjectiveNouns[j].getPhrase());
}
}
public String getReplyV2(String message) {
String[] tokenized = Engine.tokenize(message);
POS[] posTags = posNeuralNetwork.predictPOS(tokenized);
String[] importantWords = Engine.removeUnimportantWords(posTags);
boolean isYesNoQuestion = Engine.isYesNoQuestion(tokenized);
boolean isQuestion = isYesNoQuestion || Engine.isQuestion(tokenized);

System.out.println("combining2 ->" + sb.toString());
}
}
SentenceV2 msgSentence = new SentenceV2(message, posTags);
memory.add(msgSentence, isQuestion);

return sb.toString();
if (isQuestion) {
String ans = getAnswerV3(msgSentence, importantWords, isYesNoQuestion);
if (ans == null || ans.length() == 0)
return "It seems like my database doesn't know about your question, can you please tell me more?";
else
return ans;
} else {
// todo: randomize affirmative answer
return "I see!";
}
}

@@ -153,9 +107,9 @@ public String getAnswer(String message, String[] importantWords, Phrase[] questi
// 2. for each tfidf result, identify if the question is asking about location
// 3. if the location phrase is not the important word, then return the location
// 4. for e.g. The river is by the school, because if i am asking, where is the school, i should return something
Sentence[] tfidfResults = memory.findTFIDF(importantWords);
SentenceV1[] tfidfResults = memory.findTFIDF(importantWords, false);

for (Sentence t : tfidfResults) {
for (SentenceV1 t : tfidfResults) {
System.out.println("found -> " + t);
}

@@ -164,7 +118,7 @@ public String getAnswer(String message, String[] importantWords, Phrase[] questi
return "Sorry I don't understand what you mean, please tell me about that!";
} else {
for (int i = 0; i < tfidfResults.length; i++) {
Sentence result = tfidfResults[i];
SentenceV1 result = tfidfResults[i];

if (message.toLowerCase().contains("when") || message.toLowerCase().contains("where")) {
Phrase[] locations = result.getLocations();
@@ -241,4 +195,139 @@ public String getAnswer(String message, String[] importantWords, Phrase[] questi
return tfidfResults[0].getSentence();
}
}

public String getAnswerV2(SentenceV1 sentenceV1, String[] importantWords) {
// 1. search tfidf (sorted)
// 2. for each tfidf result sentenceV1, search tf in the sentenceV1, return highest tf
String message = sentenceV1.getSentence().toLowerCase();
SentenceV1[] tfidfResults = memory.findTFIDF(importantWords, false);

if (tfidfResults.length == 0) {
// todo: randomize more answer
return "Sorry I don't understand what you mean, please tell me about that!";
} else {
StringBuilder sb = new StringBuilder();

for (int i = 0; i < tfidfResults.length; i++) {
SentenceV1 t = tfidfResults[i];

System.out.println("found -> " + t);

boolean[] takeResult = TFIDF.findTF(t, importantWords);

System.out.println("take result -> " + Arrays.toString(takeResult));

if (i != 0 && i < tfidfResults.length - 1)
sb.append(", ");
else if (i != 0)
sb.append(" and ");

// when where
if (message.contains("when") || message.contains("where")) {
if (takeResult[2]) {
Phrase[] locations = t.getLocations();
for (int j = 0; j < locations.length; j++) {
if (j != 0 && j < locations.length - 1)
sb.append(", ");
else if (j != 0)
sb.append(" and ");

sb.append(locations[j].getPhrase());
}
}
} else {
System.out.println("before takeResult[0] -> " + sb.toString());

if (takeResult[0]) {
Phrase[] subjects = t.getSubjects();
for (int j = 0; j < subjects.length; j++) {
System.out.println("j -> " + j);
if (j != 0 && j < subjects.length - 1)
sb.append(", ");
else if (j != 0)
sb.append(" and ");

sb.append(subjects[j].getPhrase());
}
}

System.out.println("combining1 ->" + sb.toString());

if (takeResult[1]) {
if (takeResult[0])
sb.append(" or it can be ");

Phrase[] adjectiveNouns = t.getAdjectiveNouns();
for (int j = 0; j < adjectiveNouns.length; j++) {
if (j != 0 && j < adjectiveNouns.length - 1)
sb.append(", ");
else if (j != 0)
sb.append(" and ");

sb.append(adjectiveNouns[j].getPhrase());
}
}

System.out.println("combining2 ->" + sb.toString());
}
}

// todo: if empty, return whole answer
return sb.toString();
}
}

public String getAnswerV3(SentenceV2 sentenceV2, String[] importantWords, boolean isYesNoQuestion) {
String message = sentenceV2.getSentence().toLowerCase();
SentenceV2[] tfidfResults = memory.findTFIDFV2(importantWords, false);

if (tfidfResults.length == 0) {
// todo: randomize more answer
if (!isYesNoQuestion)
return "Sorry I don't understand what you mean, please tell me about that!";
else
return "No";
} else {
StringBuilder sb = new StringBuilder();
String[] answers = new String[]{};

if (message.contains("why")) {
answers = Processor.processWhy(tfidfResults, sentenceV2, importantWords);
} else if (message.contains("where")) {
answers = Processor.processWhere(tfidfResults, sentenceV2, importantWords);
} else if (message.contains("who")) {
answers = Processor.processWho(tfidfResults, sentenceV2, importantWords);
} else if (message.contains("what")) {
answers = Processor.processWhat(tfidfResults, sentenceV2, importantWords);
} else if (message.contains("when")) {
answers = Processor.processWhen(tfidfResults, sentenceV2, importantWords);
} else if (message.contains("did") || message.contains("do") || message.contains("does")) {
answers = Processor.processYesNo(tfidfResults, sentenceV2, importantWords);
}

ArrayList<String> distinct = new ArrayList<>();

Collections.addAll(distinct, answers);

distinct = new ArrayList<>(new HashSet<>(distinct));

answers = distinct.toArray(new String[distinct.size()]);

if (answers.length != 0)
sb.append("I found the below answers: \n");

for (int i = 0; i < answers.length; i++) {
String answer = answers[i];

sb.append(i + 1).append(". ").append(answer.trim());

if (i < answers.length - 1)
sb.append("\n");
}

return sb.toString();
}
}


}
@@ -3,7 +3,6 @@
import chatbot.engine.math.MathFunction;
import chatbot.engine.nlp.POS;
import chatbot.engine.nlp.Stemmer;
import chatbot.inputs.LexiconDatabase;
import chatbot.inputs.Stopwords;

import java.util.ArrayList;
@@ -53,37 +52,59 @@ public static String[] tokenize(String sentence) {
public static String stem(String input) {
Stemmer stemmer = new Stemmer();

for (char c : input.toCharArray())
for (char c : input.toLowerCase().toCharArray())
stemmer.add(c);

stemmer.stem();

return stemmer.toString();
}

public static String[] stem(String[] inputs) {
ArrayList<String> list = new ArrayList<>();

for (String input : inputs) {
Stemmer stemmer = new Stemmer();

for (char c : input.toLowerCase().toCharArray())
stemmer.add(c);

stemmer.stem();

list.add(stemmer.toString());
}

return list.toArray(new String[list.size()]);
}

public static String[] removeUnimportantWords(POS[] tokenized) {
ArrayList<String> filtered = new ArrayList<>();

int i = 0;
int maxCount = 2; // consider only first 2 words are unimportant words
for (POS s : tokenized) {
// check tag
boolean firstFilter = s.isVerb() || s.isNoun() || s.isAdj() || s.isAdv();
// first filter should eliminate all punctuation & stopwords dy, do below just in case tagging is wrong.
boolean notPunctuation = firstFilter && !punctuation.contains(s.getTerm());
// check stopwords
boolean notStopword = notPunctuation && !Stopwords.contains(s.getTerm());
// in lexicon
// boolean inLexicon = notStopword && LexiconDatabase.contains(s.getTerm());

if (notStopword) {
filtered.add(s.getTerm());
boolean firstFilter = s.isVerb() || s.isNoun() || s.isAdj() || s.isAdv() || s.isAdp() || s.isNum() || s.isPron() || s.isDet();
if (i < maxCount) {
// first filter should eliminate all punctuation & stopwords dy, do below just in case tagging is wrong.
boolean notPunctuation = firstFilter && !punctuation.contains(s.getTerm());
// check stopwords
boolean notStopword = notPunctuation && !Stopwords.contains(s.getTerm());

i++;

if (notStopword) {
filtered.add(s.getTerm());
}
} else {
if (firstFilter) // only condition stated above
filtered.add(s.getTerm());
}
}

return filtered.toArray(new String[filtered.size()]);
}

public static boolean isQuestion(String[] tokenized) {
// for (int i = 0; i < tokenized.length; i++) {
int i = 0;
boolean isWhat = tokenized[i].equalsIgnoreCase("what");
boolean isWho = isWhat || tokenized[i].equalsIgnoreCase("who");
@@ -94,17 +115,17 @@ public static boolean isQuestion(String[] tokenized) {
boolean containQuestionMark = isHow || tokenized[tokenized.length - 1].equalsIgnoreCase("?");

return containQuestionMark;
// if (containQuestionMark)
// return true;
//
// return false;
}

public static boolean isYesNoQuestion(String[] tokenized) {
int i = 0;

boolean isIs = tokenized[i].equalsIgnoreCase("is");
boolean isAre = isIs || tokenized[i].equalsIgnoreCase("are");
boolean isDid = isAre || tokenized[i].equalsIgnoreCase("did");
boolean isDo = isDid || tokenized[i].equalsIgnoreCase("do");
boolean isDoes = isDo || tokenized[i].equalsIgnoreCase("does");

return isAre;
return isDoes;
}
}
@@ -1,9 +1,9 @@
package chatbot.engine;

import chatbot.engine.nlp.EquivalentSentence;
import chatbot.engine.nlp.Question;
import chatbot.engine.nlp.Sentence;
import chatbot.engine.nlp.SentenceV1;
import chatbot.engine.nlp.SentenceV2;
import chatbot.engine.nlp.TFIDF;
import chatbot.utils.Logger;

import javax.swing.*;
import java.io.*;
@@ -12,30 +12,54 @@
public class Memory {

private final String path = "data/memory.csv";
private ArrayList<Sentence> memory;
private ArrayList<Question> askedQuestions;
private final String pathV2 = "data/memoryV2.csv";
private ArrayList<SentenceV1> memory;
private ArrayList<SentenceV2> memoryV2;

public Memory() {
memory = new ArrayList<>();
askedQuestions = new ArrayList<>();
memoryV2 = new ArrayList<>();
retrieveMemory();
}

public void add(Sentence sentence, boolean isQuestion) {
public void reset() {
Logger.println("Reset memory...");
memory.clear();
memoryV2.clear();
}

public void add(SentenceV2 sentenceV2, boolean isQuestion) {
if (!isQuestion && !memoryV2.contains(sentenceV2)) {
memoryV2.add(sentenceV2);
}
}

public void add(SentenceV1 sentenceV1, boolean isQuestion) {
// classify if its a question or information, if repeated question, can reply something funny.
if (!isQuestion) {
if (!memory.contains(sentence))
memory.add(sentence);
if (!memory.contains(sentenceV1))
memory.add(sentenceV1);
}
}

public Sentence[] findTFIDF(String[] filteredTarget) {
return TFIDF.calculate(memory, filteredTarget);
public SentenceV1[] findTFIDF(String[] filteredTarget, boolean stemming) {
return TFIDF.calculate(memory, filteredTarget, stemming);
}

public SentenceV2[] findTFIDFV2(String[] filteredTarget, boolean stemming) {
return TFIDF.calculateV2(memoryV2, filteredTarget, stemming);
}

public void saveMemory() {
try {
BufferedWriter f = new BufferedWriter(new FileWriter(path));
for (Sentence s : memory) {
for (SentenceV1 s : memory) {
f.write(s.toString() + "\n");
}
f.close();

f = new BufferedWriter(new FileWriter(pathV2));
for (SentenceV2 s : memoryV2) {
f.write(s.toString() + "\n");
}
f.close();
@@ -47,21 +71,25 @@ public void saveMemory() {
public void retrieveMemory() {
try {
if (!new File(path).exists()) {
System.out.println("First time running will not load memory");
Logger.println("First time running will not load memory");
return;
}

BufferedReader f = new BufferedReader(new FileReader(path));
String s;

while ((s = f.readLine()) != null) {
memory.add(Sentence.parseString(s));
memory.add(SentenceV1.parseString(s));
}

f = new BufferedReader(new FileReader(pathV2));

while ((s = f.readLine()) != null) {
memoryV2.add(SentenceV2.parseString(s));
}
} catch (IOException e) {
JOptionPane.showMessageDialog(null, "Cannot read file due to " + e.getMessage(), "Error!", JOptionPane.ERROR_MESSAGE);
} catch (Exception e) {
System.out.println(e.getMessage());
throw new RuntimeException(e.getMessage());
}
}
@@ -0,0 +1,37 @@
package chatbot.engine.nlp;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;

public class CFGConfig {

private HashMap<String, ArrayList<String>> cfgMap;

public CFGConfig() {
cfgMap = new HashMap<>();
setup();
}

private void setup() {
cfgMap.put("S", generateList("VP"));
cfgMap.put("S", generateList("AUX", "NP", "VP"));
cfgMap.put("S", generateList("NP", "VP"));
cfgMap.put("S", generateList("NP", "VERB"));
cfgMap.put("VP", generateList("VP", "PP"));
cfgMap.put("VP", generateList("VERB", "NP"));
cfgMap.put("VP", generateList("VERB", "PP"));
cfgMap.put("VP", generateList("VERB", "NOUN"));
cfgMap.put("VP", generateList("VERB", "VP"));
cfgMap.put("VP", generateList("VERB"));
cfgMap.put("NP", generateList("DET", "NOUN"));
cfgMap.put("NP", generateList("NP", "NOUN"));
cfgMap.put("NP", generateList("NOUN"));
cfgMap.put("PP", generateList("IN", "NP"));
}

private ArrayList<String> generateList(String... s) {
return (ArrayList<String>) Arrays.asList(s);
}

}

This file was deleted.

@@ -0,0 +1,52 @@
package chatbot.engine.nlp;

public class GrammarLibrary {
public static final String[] towardWords = {
"to",
"towards",
"onto",
"into",
"beside",
"at",
"after",
"on",
"above",
"below",
"under",
"against",
"along",
"by",
"between",
"inside",
"over"
};

public static final String[] conjunctions = ("while\n" +
"as soon as\n" +
"although\n" +
"before\n" +
"even if\n" +
"because\n" +
"no matter how\n" +
"whether\n" +
"wherever\n" +
"when\n" +
"until\n" +
"after\n" +
"as if\n" +
"how\n" +
"if\n" +
"provided\n" +
"in that\n" +
"once\n" +
"supposing\n" +
"while\n" +
"unless\n" +
"in case\n" +
"as far as\n" +
"now that\n" +
"as\n" +
"so that\n" +
"though\n" +
"since").split("\n");
}
@@ -60,7 +60,6 @@ public class POS {
static {
specialVerb.add("is");
specialVerb.add("are");

}

private String term;
@@ -78,8 +77,8 @@ public POS(String term, String tag, double prob) {
this.tag = tag;
this.prob = prob;

if (this.prob < 0.8)
this.tag = NOUN;
// if (this.prob < 0.6)
// this.tag = NOUN;
}

public int getTermLength() {

Large diffs are not rendered by default.

This file was deleted.

@@ -1,128 +1,9 @@
package chatbot.engine.nlp;

import java.util.ArrayList;
import java.util.Arrays;
public interface Sentence {

public class Sentence {
String getSentence();

private String sentence;
private Phrase[] subjects = new Phrase[0];
private Phrase[] adjectiveNouns = new Phrase[0];
private Phrase[] locations = new Phrase[0];
String toString();

public Sentence(String sentence) {
this.sentence = sentence;
}

public void setSubjects(Phrase[] subjects) {
if (subjects != null)
this.subjects = subjects;
}

public void setAdjectiveNouns(Phrase[] adjectiveNouns) {
if (adjectiveNouns != null)
this.adjectiveNouns = adjectiveNouns;
}

public void setLocations(Phrase[] locations) {
if (locations != null)
this.locations = locations;
}

public String getSentence() {
return sentence;
}

public Phrase[] getSubjects() {
return subjects;
}

public Phrase[] getAdjectiveNouns() {
return adjectiveNouns;
}

public Phrase[] getLocations() {
return locations;
}

public String toString() {
// sentence\tn\t...\tn\t...\tn
StringBuilder s = new StringBuilder(sentence);
s.append("\t").append(subjects.length).append("\t");
for (int i = 0; i < subjects.length; i++) {
Phrase phrase = subjects[i];
s.append(phrase.getPhrase()).append("\t");
}
s.append(adjectiveNouns.length).append("\t");
for (int i = 0; i < adjectiveNouns.length; i++) {
Phrase phrase = adjectiveNouns[i];
s.append(phrase.getPhrase()).append("\t");
}

s.append(locations.length);

if (locations.length != 0)
s.append("\t");

for (int i = 0; i < locations.length; i++) {
Phrase phrase = locations[i];
s.append(phrase.getPhrase());
if (i != locations.length - 1)
s.append("\t");
}
return s.toString();
}

public static Sentence parseString(String s) {
String[] splitByTab = s.split("\t");
int n;
int i = 0;
String sentenceString = splitByTab[i++];
n = Integer.parseInt(splitByTab[i++]);
Phrase[] subjects = new Phrase[n];
for (int j = 0; j < n; j++) {
subjects[j] = Phrase.parseString(splitByTab[i++]);
}
n = Integer.parseInt(splitByTab[i++]);
Phrase[] adjectiveNouns = new Phrase[n];
for (int j = 0; j < n; j++) {
adjectiveNouns[j] = Phrase.parseString(splitByTab[i++]);
}
n = Integer.parseInt(splitByTab[i++]);
Phrase[] locations = new Phrase[n];
for (int j = 0; j < n; j++) {
locations[j] = Phrase.parseString(splitByTab[i++]);
}
Sentence sentence = new Sentence(sentenceString);
sentence.setSubjects(subjects);
sentence.setAdjectiveNouns(adjectiveNouns);
sentence.setLocations(locations);

return sentence;
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;

Sentence sentence1 = (Sentence) o;

if (!sentence.equals(sentence1.sentence)) return false;
// Probably incorrect - comparing Object[] arrays with Arrays.equals
if (!Arrays.equals(subjects, sentence1.subjects)) return false;
// Probably incorrect - comparing Object[] arrays with Arrays.equals
if (!Arrays.equals(adjectiveNouns, sentence1.adjectiveNouns)) return false;
// Probably incorrect - comparing Object[] arrays with Arrays.equals
return Arrays.equals(locations, sentence1.locations);
}

@Override
public int hashCode() {
int result = sentence.hashCode();
result = 31 * result + Arrays.hashCode(subjects);
result = 31 * result + Arrays.hashCode(adjectiveNouns);
result = 31 * result + Arrays.hashCode(locations);
return result;
}
}
@@ -0,0 +1,129 @@
package chatbot.engine.nlp;

import java.util.Arrays;

public class SentenceV1 implements Sentence {

private String sentence;
private Phrase[] subjects = new Phrase[0];
private Phrase[] adjectiveNouns = new Phrase[0];
private Phrase[] locations = new Phrase[0];

public SentenceV1(String sentence) {
this.sentence = sentence;
}

public void setSubjects(Phrase[] subjects) {
if (subjects != null)
this.subjects = subjects;
}

public void setAdjectiveNouns(Phrase[] adjectiveNouns) {
if (adjectiveNouns != null)
this.adjectiveNouns = adjectiveNouns;
}

public void setLocations(Phrase[] locations) {
if (locations != null)
this.locations = locations;
}

public String getSentence() {
return sentence;
}

public Phrase[] getSubjects() {
return subjects;
}

public Phrase[] getAdjectiveNouns() {
return adjectiveNouns;
}

public Phrase[] getLocations() {
return locations;
}

public String toString() {
// sentence\tn\t...\tn\t...\tn
StringBuilder s = new StringBuilder(sentence);
s.append("\t").append(subjects.length).append("\t");
for (int i = 0; i < subjects.length; i++) {
Phrase phrase = subjects[i];
s.append(phrase.toString()).append("\t");
}
s.append(adjectiveNouns.length).append("\t");
for (int i = 0; i < adjectiveNouns.length; i++) {
Phrase phrase = adjectiveNouns[i];
s.append(phrase.toString()).append("\t");
}

s.append(locations.length);

if (locations.length != 0)
s.append("\t");

for (int i = 0; i < locations.length; i++) {
Phrase phrase = locations[i];
s.append(phrase.toString());
if (i != locations.length - 1)
s.append("\t");
}
return s.toString();
}

public static SentenceV1 parseString(String s) {
String[] splitByTab = s.split("\t");
int n;
int i = 0;
String sentenceString = splitByTab[i++];
n = Integer.parseInt(splitByTab[i++]);
Phrase[] subjects = new Phrase[n];
for (int j = 0; j < n; j++) {
subjects[j] = Phrase.parseString(splitByTab[i++]);
}

n = Integer.parseInt(splitByTab[i++]);
Phrase[] adjectiveNouns = new Phrase[n];
for (int j = 0; j < n; j++) {
adjectiveNouns[j] = Phrase.parseString(splitByTab[i++]);
}

n = Integer.parseInt(splitByTab[i++]);
Phrase[] locations = new Phrase[n];
for (int j = 0; j < n; j++) {
locations[j] = Phrase.parseString(splitByTab[i++]);
}
SentenceV1 sentenceV1 = new SentenceV1(sentenceString);
sentenceV1.setSubjects(subjects);
sentenceV1.setAdjectiveNouns(adjectiveNouns);
sentenceV1.setLocations(locations);

return sentenceV1;
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;

SentenceV1 sentenceV11 = (SentenceV1) o;

if (!sentence.equals(sentenceV11.sentence)) return false;
// Probably incorrect - comparing Object[] arrays with Arrays.equals
if (!Arrays.equals(subjects, sentenceV11.subjects)) return false;
// Probably incorrect - comparing Object[] arrays with Arrays.equals
if (!Arrays.equals(adjectiveNouns, sentenceV11.adjectiveNouns)) return false;
// Probably incorrect - comparing Object[] arrays with Arrays.equals
return Arrays.equals(locations, sentenceV11.locations);
}

@Override
public int hashCode() {
int result = sentence.hashCode();
result = 31 * result + Arrays.hashCode(subjects);
result = 31 * result + Arrays.hashCode(adjectiveNouns);
result = 31 * result + Arrays.hashCode(locations);
return result;
}
}
@@ -0,0 +1,122 @@
package chatbot.engine.nlp;

import chatbot.utils.Logger;

import java.util.Arrays;

public class SentenceV2 implements Sentence {

private String sentence;
private POS[] posTags;

public SentenceV2(String sentence, POS[] posTags) {
this.sentence = sentence;
this.posTags = posTags;
}

public POS[] getPosTags() {
return posTags;
}

public String getSentence() {
return sentence;
}

public String toString() {
// sentence\tn\tpos1\tpos2
StringBuilder s = new StringBuilder(sentence);
int n = posTags.length;
s.append("\t").append(n).append("\t");
for (int i = 0; i < posTags.length; i++) {
s.append(posTags[i].toString());
if (i < posTags.length - 1)
s.append("\t");
}
return s.toString();
}

public boolean containsIgnoreCase(String word) {
word = word.toLowerCase();
return sentence.toLowerCase().contains(word);
}

public int indexOf(String word, boolean ignoreCase) {
String target = sentence;
if (ignoreCase)
target = target.toLowerCase();
return target.indexOf(word);
}

public int indexPosOf(String word, boolean ignoreCase) {
for (int i = 0; i < posTags.length; i++) {
POS pos = posTags[i];
String term = pos.getTerm();
if (ignoreCase) {
term = term.toLowerCase();
word = word.toLowerCase();
}

double similarity = StringSimilarity.similarity(term, word);
// Logger.println("Similarity of " + term + " & " + word + ": " + similarity);
if (similarity >= 0.6) {
return i;
}
}

return -1;
}

public POS posOf(String word, boolean ignoreCase) {
for (POS pos : posTags) {
String term = pos.getTerm();
if (ignoreCase) {
term = term.toLowerCase();
word = word.toLowerCase();
}

double similarity = StringSimilarity.similarity(term, word);
if (similarity >= 0.6) {
return pos;
}
}

return null;
}

public static SentenceV2 parseString(String s) {
Logger.println("Parsing string into SentenceV2...");
String[] splitByTab = s.split("\t");
int n;
int i = 0;
String sentenceString = splitByTab[i++];
n = Integer.parseInt(splitByTab[i++]);
POS[] posTags = new POS[n];

for (int j = 0; j < n; j++) {
posTags[j] = POS.parseString(splitByTab[i++]);
}
Logger.println("Done...");

return new SentenceV2(sentenceString, posTags);
}


@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;

SentenceV2 that = (SentenceV2) o;

if (!sentence.equals(that.sentence)) return false;
// Probably incorrect - comparing Object[] arrays with Arrays.equals
return Arrays.equals(posTags, that.posTags);
}

@Override
public int hashCode() {
int result = sentence.hashCode();
result = 31 * result + Arrays.hashCode(posTags);
return result;
}
}
@@ -18,7 +18,12 @@ public static double similarity(String s1, String s2) {
return (longerLength - editDistance(longer, shorter)) / (double) longerLength;
}

public static int editDistance(String s1, String s2) {
/**
* Source
* 1. https://stackoverflow.com/questions/955110/similarity-string-comparison-in-java
* 2. http://rosettacode.org/wiki/Levenshtein_distance#Java
*/
private static int editDistance(String s1, String s2) {
s1 = s1.toLowerCase();
s2 = s2.toLowerCase();

@@ -32,8 +37,7 @@ public static int editDistance(String s1, String s2) {
if (j > 0) {
int newValue = costs[j - 1];
if (s1.charAt(i - 1) != s2.charAt(j - 1))
newValue = Math.min(Math.min(newValue, lastValue),
costs[j]) + 1;
newValue = Math.min(Math.min(newValue, lastValue), costs[j]) + 1;
costs[j - 1] = lastValue;
lastValue = newValue;
}
@@ -45,4 +49,13 @@ public static int editDistance(String s1, String s2) {
return costs[s2.length()];
}

public static void main(String[] args) {
System.out.println("Result abcdefg & gfedcba: " + similarity("abcdefg", "gfedcba"));
System.out.println("Result ran & run: " + similarity("ran", "run"));
System.out.println("Result width & wdth: " + similarity("width", "wdth"));
System.out.println("Result sing & song: " + similarity("sing", "song"));
System.out.println("Result Chiayi & Yukang: " + similarity("Chiayi", "Yukang"));
System.out.println("Result compare & compare: " + similarity("compare", "compare"));
}

}
@@ -0,0 +1,18 @@
package chatbot.engine.nlp;

import chatbot.engine.Engine;

public class StringUtils {
public static String replaceAll(String s, String oldWord, String newWord) {
return "";
}

public static boolean containsWord(String s, String word) {
String[] tokenized = Engine.tokenize(s);
for (String t : tokenized) {
if (t.equalsIgnoreCase(word))
return true;
}
return false;
}
}
@@ -2,12 +2,10 @@

import chatbot.engine.Engine;
import chatbot.engine.math.MathFunction;
import chatbot.engine.math.MatrixFunction;
import chatbot.utils.Logger;

import java.lang.reflect.Array;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;

public class TFIDF {
private static int[] getNonZeroSortedIndex(double[] tfScores) {
@@ -30,7 +28,7 @@ private static int[] getNonZeroSortedIndex(double[] tfScores) {
// selection sort
for (int i = 0; i < nonZeroScores.length; i++) {
for (int j = 0; j < nonZeroScores.length - i - 1; j++) {
if (nonZeroScores[j] > nonZeroScores[j + 1]) {
if (nonZeroScores[j] < nonZeroScores[j + 1]) { // reverse sorting!
double temp = nonZeroScores[j];
nonZeroScores[j] = nonZeroScores[j + 1];
nonZeroScores[j + 1] = temp;
@@ -52,10 +50,10 @@ private static int[] getNonZeroSortedIndex(double[] tfScores) {
return r;
}

public static Sentence[] calculate(ArrayList<Sentence> documents, String[] target_) {
public static SentenceV1[] calculate(ArrayList<SentenceV1> documents, String[] target_, boolean stemming) {
// if filtered target words are empty, then return empty array
if (target_.length == 0) {
return new Sentence[0];
return new SentenceV1[0];
}

String[] target = new String[target_.length];
@@ -65,7 +63,7 @@ public static Sentence[] calculate(ArrayList<Sentence> documents, String[] targe
target[i] = target_[i].toLowerCase();
}

System.out.println("tfidf input -> " + Arrays.toString(target));
Logger.println("TFIDF input -> " + Arrays.toString(target));

double[] tfScores = new double[documents.size()];
double idfCount = 0;
@@ -76,21 +74,27 @@ public static Sentence[] calculate(ArrayList<Sentence> documents, String[] targe
String sentence = documents.get(i).getSentence().toLowerCase();
String[] tokenized = Engine.tokenize(sentence);

if (stemming)
tokenized = Engine.stem(tokenized);


for (String word : tokenized) {
for (String t : target) {
if (t.equalsIgnoreCase(word))
if (StringSimilarity.similarity(t, word) >= 0.6) // more than 60% similar
tfCount++;
// if (t.equalsIgnoreCase(word))
// tfCount++;
}
}

int count = 0;
for (String t : target) {
if (sentence.contains(t)) {
count++;
}
}
// int count = 0;
// for (String t : target) {
// if (sentence.contains(t)) {
// count++;
// }
// }

idfCount += (double) count / target.length;
idfCount += (double) tfCount / target.length;

double tfScore = (double) tfCount / tokenized.length;

@@ -104,9 +108,9 @@ public static Sentence[] calculate(ArrayList<Sentence> documents, String[] targe
tfScores[i] *= idfScore;
}

System.out.println("tfidf score ->" + Arrays.toString(tfScores));
Logger.println("TFIDF score ->" + Arrays.toString(tfScores));
// find sorted indices
ArrayList<Sentence> filtered = new ArrayList<>();
ArrayList<SentenceV1> filtered = new ArrayList<>();
int[] sortedIndices = getNonZeroSortedIndex(tfScores);
int getTop = 5; // get only top 5 results

@@ -116,11 +120,11 @@ public static Sentence[] calculate(ArrayList<Sentence> documents, String[] targe
filtered.add(documents.get(sortedIndices[i]));
}

return filtered.toArray(new Sentence[filtered.size()]);
return filtered.toArray(new SentenceV1[filtered.size()]);
}

public static boolean[] findTF(Sentence sentence, String[] importantWords) {
Phrase[][] phrases = {sentence.getSubjects(), sentence.getAdjectiveNouns(), sentence.getLocations()};
public static boolean[] findTF(SentenceV1 sentenceV1, String[] importantWords) {
Phrase[][] phrases = {sentenceV1.getSubjects(), sentenceV1.getAdjectiveNouns(), sentenceV1.getLocations()};

boolean[] canTake = {true, true, true};
int[] countTF = {0, 0, 0}; // check tf category
@@ -136,4 +140,74 @@ public static boolean[] findTF(Sentence sentence, String[] importantWords) {

return canTake;
}

public static SentenceV2[] calculateV2(ArrayList<SentenceV2> documents, String[] target_, boolean stemming) {
// if filtered target words are empty, then return empty array
if (target_.length == 0) {
return new SentenceV2[0];
}

String[] target = new String[target_.length];

// copy and to lower case
for (int i = 0; i < target_.length; i++) {
target[i] = target_[i].toLowerCase();
}

Logger.println("TFIDF input -> " + Arrays.toString(target));

double[] tfScores = new double[documents.size()];
double idfCount = 0;

// count tf and idf
for (int i = 0; i < documents.size(); i++) {
int tfCount = 0;
String sentence = documents.get(i).getSentence().toLowerCase();
String[] tokenized = Engine.tokenize(sentence);

if (stemming)
tokenized = Engine.stem(tokenized);

for (String word : tokenized) {
for (String t : target) {
if (StringSimilarity.similarity(t.toLowerCase(), word.toLowerCase()) >= 0.63) {// more than 63% similar
tfCount++;
}
}
}

Logger.println(sentence + " " + tfCount);

if (tfCount < 2) // minimum is 2 element
tfCount = 0;

idfCount += (double) tfCount / target.length;

double tfScore = (double) tfCount / tokenized.length;

tfScores[i] = tfScore;
}

// find the score
double idfScore = MathFunction.log10((double) (documents.size() + 1) / (idfCount + 1e-9)); // +1 is to avoid first information being ignored

for (int i = 0; i < documents.size(); i++) {
tfScores[i] *= idfScore;
}

Logger.println("TFIDF score ->" + Arrays.toString(tfScores));
// find sorted indices
ArrayList<SentenceV2> filtered = new ArrayList<>();
int[] sortedIndices = getNonZeroSortedIndex(tfScores);
int getTop = 5; // get only top 5 results

for (int i = 0; i < sortedIndices.length; i++) {
if (i == getTop)
break;
filtered.add(documents.get(sortedIndices[i]));
}

return filtered.toArray(new SentenceV2[filtered.size()]);
}

}
@@ -2,6 +2,7 @@

import chatbot.engine.nn.optimizer.Optimizer;
import chatbot.inputs.dataset.Dataset;
import chatbot.utils.Logger;
import chatbot.utils.json.parser.ParseException;

import java.io.IOException;
@@ -41,7 +42,7 @@ public Double[][] getWeight(String name) {
}

public void setWeight(String name, Double[][] weight) {
System.out.println("Set weight for -> " + name + " with shape (" + weight.length + "," + weight[0].length + ")");
Logger.println("Set weight for -> " + name + " with shape (" + weight.length + "," + weight[0].length + ")");
memory.put(name, weight);
}

@@ -1,13 +1,14 @@
package chatbot.engine.nn.model;

import chatbot.utils.ArrayUtils;
import chatbot.engine.math.MatrixFunction;
import chatbot.engine.nn.core.TrainParam;
import chatbot.engine.nlp.POS;
import chatbot.engine.nn.core.NeuralNetwork;
import chatbot.engine.nn.core.TrainParam;
import chatbot.engine.nn.optimizer.GradientDescentOptimizer;
import chatbot.engine.nlp.POS;
import chatbot.inputs.POSInput;
import chatbot.inputs.dataset.POSDataset;
import chatbot.utils.ArrayUtils;
import chatbot.utils.Logger;
import chatbot.utils.Utils;
import chatbot.utils.json.parser.ParseException;

@@ -152,7 +153,7 @@ public void printOutputShape() {
}

public void load(String filePath) throws IOException, ClassNotFoundException, ParseException {
System.out.println("Loading weights...");
Logger.println("Loading weights...");
HashMap map = Utils.deserializeHashMap(filePath);

Double[][] weight1 = (Double[][]) map.get("weight1");
@@ -175,19 +176,23 @@ public void load(String filePath) throws IOException, ClassNotFoundException, Pa
memory.put("bias2", bias2);
memory.put("bias3", bias3);

System.out.println("Loaded weights");
Logger.println("Loaded weights");
}

public POS[] predictPOS(String[] tokenized) {
long startTime = System.currentTimeMillis();
Logger.println("Predicting POS...");

Double[][] inputs = POSInput.convertToInput(POSInput.convertToFeatures(tokenized));
Double[][] pred = predict(inputs);
int[] index = MatrixFunction.argmax(pred);
POS[] pos = new POS[index.length];

for (int i = 0; i < index.length; i++) {
pos[i] = new POS(tokenized[i], POSDataset.decodeClass(index[i]), pred[i][index[i]]);
Logger.println(pos[i].getTerm() + " -> " + pos[i].getTag() + String.format(" -> %.2f", pos[i].getProb() * 100.0));
}

Logger.printf("Predicting POS used %.2f seconds\n", (System.currentTimeMillis() - startTime) / 1000.0);
return pos;
}

@@ -200,6 +205,13 @@ private static ArrayList<String> getFileList(int weightHiddenUnits, int v) {
fileList.add("data/weights32/weight32_fc2_bias.csv");
fileList.add("data/weights32/weight32_dense_1_kernel.csv");
fileList.add("data/weights32/weight32_dense_1_bias.csv");
} else if (v == 3 && weightHiddenUnits == 16) {
fileList.add("data/weights16_v3_half/weight16_fc1_kernel_v3_half.csv");
fileList.add("data/weights16_v3_half/weight16_fc1_bias_v3_half.csv");
fileList.add("data/weights16_v3_half/weight16_fc2_kernel_v3_half.csv");
fileList.add("data/weights16_v3_half/weight16_fc2_bias_v3_half.csv");
fileList.add("data/weights16_v3_half/weight16_dense_1_kernel_v3_half.csv");
fileList.add("data/weights16_v3_half/weight16_dense_1_bias_v3_half.csv");
} else if (v == 1) {
fileList.add("data/weights64/weight_fc1_kernel.csv");
fileList.add("data/weights64/weight_fc1_bias.csv");
@@ -226,21 +238,37 @@ public static POSNeuralNetwork loadPosWeight32(int v) {
throw new RuntimeException("32 hidden units only have version 1");
}

public static POSNeuralNetwork loadPosWeight16(int v) {
if (v == 3)
return loadPosWeight(16, v);
else
throw new RuntimeException("16 hidden units only have version 3");
}

public static POSNeuralNetwork loadPosWeight64(int v) {
return loadPosWeight(64, v);
}

private static POSNeuralNetwork loadPosWeight(int weightHiddenUnits, int v) {
if (v != 1 && v != 2)
throw new RuntimeException("Version 1 or 2 only: " + v);
if (v != 1 && v != 2 && v != 3)
throw new RuntimeException("Version 1, 2 or 3 only: " + v);

if (v == 1)
POSDataset.useV1();
else
else if (v == 2)
POSDataset.useV2();
else
POSDataset.useV3();

Logger.println("===============POS Neural network settings===============");

POSNeuralNetwork nn = new POSNeuralNetwork(POSDataset.getVocabSize(), POSDataset.getClassesSize());

Logger.println("Version: " + v);
Logger.println("Hidden units: " + weightHiddenUnits);
Logger.println("Vocab Size: " + POSDataset.getVocabSize());
Logger.println("Classes Size: " + POSDataset.getClassesSize());

try {
ArrayList<String> fileList = getFileList(weightHiddenUnits, v);

@@ -278,6 +306,8 @@ private static POSNeuralNetwork loadPosWeight(int weightHiddenUnits, int v) {
throw new RuntimeException();
}

Logger.println("===============POS Neural network settings===============");

return nn;
}
}
@@ -11,9 +11,10 @@ public static Double[] createInput(String word, ArrayList<String> words) {
String[] inputWords = Engine.tokenize(word);
Double[] bag = new Double[words.size()];
// stem words
for (int j = 0; j < inputWords.length; j++) {
inputWords[j] = Engine.stem(inputWords[j].toLowerCase());
}
inputWords = Engine.stem(inputWords);
// for (int j = 0; j < inputWords.length; j++) {
// inputWords[j] = Engine.stem(inputWords[j].toLowerCase());
// }
// set input vector
List<String> inputWordsList = Arrays.asList(inputWords);

@@ -3,12 +3,14 @@
import chatbot.engine.math.MathFunction;
import chatbot.engine.math.MatrixFunction;
import chatbot.inputs.dataset.POSDataset;
import chatbot.utils.Logger;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;

public class POSInput {

@@ -107,12 +109,17 @@ public static String[][] convertToFeatures(String[] sentenceTerms) {
'capitals_inside': sentence_terms[index][1:].lower() != sentence_terms[index][1:]
}
*/
Logger.println("Converting input to features...");

String[][] out = new String[sentenceTerms.length][POSDataset.getVocabSize()];

for (int index = 0; index < sentenceTerms.length; index++) {
out[index] = convertToFeatures(sentenceTerms, index);
Logger.println(sentenceTerms[index] + " -> " + Arrays.toString(out[index]));
}

Logger.println("Done...");

return out;
}

@@ -1,7 +1,7 @@
package chatbot.inputs.dataset;

import chatbot.utils.ArrayUtils;
import chatbot.engine.math.MatrixFunction;
import chatbot.utils.ArrayUtils;

import java.io.BufferedReader;
import java.io.FileInputStream;
@@ -65,6 +65,10 @@ private static void loadV2() {
load("data/inputVocab_v2.csv", "data/posClasses_v2.csv");
}

private static void loadV3() {
load("data/inputVocab_v3.csv", "data/posClasses_v3.csv");
}

public static void useV1() {
version = 1;
loadV1();
@@ -75,6 +79,11 @@ public static void useV2() {
loadV2();
}

public static void useV3() {
version = 3;
loadV3();
}

public static int getVocabSize() {
return vocab.size();
}
@@ -0,0 +1,21 @@
package chatbot.utils;

public class Logger {

public static boolean debugMode = true;

public static void println(String s) {
if (!debugMode)
return;

System.out.println(s);
}

public static void printf(String format, Object... args) {
if (!debugMode)
return;

System.out.printf(format, args);
}

}