From 9eb979c10905e5962e6fc11fb01ebeb8ba0344ae Mon Sep 17 00:00:00 2001 From: Siva Reddy Date: Tue, 20 Dec 2016 11:57:44 +0000 Subject: [PATCH] Embedding similarity --- .../cli/CcgParseToUngroundedGraphs.java | 4 +- .../cli/RunGraphToQueryTrainingMain.java | 61 ++++---- .../cli/RunPrintDomainLexicon.java | 15 +- .../learning/GraphToQueryTraining.java | 61 ++++---- .../learning/GraphToQueryTrainingMain.java | 61 ++++---- .../parsing/GroundTestSentences.java | 26 ++-- .../graphparser/parsing/GroundedGraphs.java | 137 ++++++++++++++---- .../util/CrossLingualEmbeddingSimilarity.java | 41 ++++++ ...serOracleUsingGoldMidAndGoldRelations.java | 8 +- .../scripts/RunGraphParserKunData.java | 4 +- ...teGroundedGraphsFromSemanticParseTest.java | 16 +- .../parsing/CreateGroundedGraphsTest.java | 4 +- .../parsing/CreateGroundedLexiconTest.java | 4 +- 13 files changed, 296 insertions(+), 146 deletions(-) create mode 100644 src/in/sivareddy/graphparser/util/CrossLingualEmbeddingSimilarity.java diff --git a/src/in/sivareddy/graphparser/cli/CcgParseToUngroundedGraphs.java b/src/in/sivareddy/graphparser/cli/CcgParseToUngroundedGraphs.java index 6160d44..381c4a5 100644 --- a/src/in/sivareddy/graphparser/cli/CcgParseToUngroundedGraphs.java +++ b/src/in/sivareddy/graphparser/cli/CcgParseToUngroundedGraphs.java @@ -88,11 +88,11 @@ public CcgParseToUngroundedGraphs(String dataFolder, String languageCode, GroundedLexicon groundedLexicon = new GroundedLexicon(null); graphCreator = new GroundedGraphs(schema, kb, groundedLexicon, normalCcgAutoLexicon, questionCcgAutoLexicon, - relationLexicalIdentifiers, relationTypingIdentifiers, null, 1, false, + relationLexicalIdentifiers, relationTypingIdentifiers, null, null, 1, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, - false, false, false, 10.0, 1.0, 0.0, 0.0, 0.0); + false, false, false, false, false, 10.0, 1.0, 0.0, 0.0, 0.0); } public List> processText(String line) diff --git a/src/in/sivareddy/graphparser/cli/RunGraphToQueryTrainingMain.java b/src/in/sivareddy/graphparser/cli/RunGraphToQueryTrainingMain.java index 93a238e..173575c 100644 --- a/src/in/sivareddy/graphparser/cli/RunGraphToQueryTrainingMain.java +++ b/src/in/sivareddy/graphparser/cli/RunGraphToQueryTrainingMain.java @@ -51,6 +51,7 @@ public class RunGraphToQueryTrainingMain extends AbstractCli { // Log File private OptionSpec logFile; private OptionSpec loadModelFromFile; + private OptionSpec embeddingFile; private OptionSpec lexicon; private OptionSpec cachedKB; private OptionSpec testFile; @@ -119,6 +120,7 @@ public class RunGraphToQueryTrainingMain extends AbstractCli { private OptionSpec argumentStemMatchingFlag; private OptionSpec argumentStemGrelPartMatchingFlag; private OptionSpec ngramStemMatchingFlag; + private OptionSpec useEmbeddingSimilarityFlag; // Graph features private OptionSpec graphIsConnectedFlag; @@ -278,6 +280,12 @@ public void initializeOptions(OptionParser parser) { .accepts("loadModelFromFile", "Load model from serialized model file").withRequiredArg() .ofType(String.class).defaultsTo(""); + + embeddingFile = + parser + .accepts("embeddingFile", + "Load word embeddings from file").withRequiredArg() + .ofType(String.class).defaultsTo(""); lexicon = parser.accepts("lexicon", "lexicon containing nl to grounded mappings") @@ -476,6 +484,10 @@ public void initializeOptions(OptionParser parser) { .accepts("ngramStemMatchingFlag", "use stem overlaps between words in the sentence and grounded edges") .withRequiredArg().ofType(Boolean.class).defaultsTo(false); + useEmbeddingSimilarityFlag = parser + .accepts("useEmbeddingSimilarityFlag", + "use embedding similarity between grounded edges and words or ungronded edges") + .withRequiredArg().ofType(Boolean.class).defaultsTo(false); // Graph features graphIsConnectedFlag = @@ -671,21 +683,18 @@ public void run(OptionSet options) { KnowledgeBase kb = null; if (!options.valueOf(cachedKB).equals("")) { - kb = - new KnowledgeBaseCached(options.valueOf(cachedKB), - relationTypesFileName); + kb = new KnowledgeBaseCached(options.valueOf(cachedKB), + relationTypesFileName); } else { KnowledgeBaseOnline.TYPE_KEY = options.valueOf(typeKey); - kb = - new KnowledgeBaseOnline(options.valueOf(endpoint), String.format( - "http://%s:8890/sparql", options.valueOf(endpoint)), "dba", - "dba", 50000, schemaObj); + kb = new KnowledgeBaseOnline(options.valueOf(endpoint), + String.format("http://%s:8890/sparql", options.valueOf(endpoint)), + "dba", "dba", 50000, schemaObj); } - RdfGraphTools rdfGraphTools = - new RdfGraphTools(options.valueOf(endpoint), String.format( - "http://%s:8890/sparql", options.valueOf(endpoint)), "dba", - "dba", options.valueOf(timeout)); + RdfGraphTools rdfGraphTools = new RdfGraphTools(options.valueOf(endpoint), + String.format("http://%s:8890/sparql", options.valueOf(endpoint)), + "dba", "dba", options.valueOf(timeout)); GraphToSparqlConverter.TYPE_KEY = options.valueOf(typeKey); GroundedGraphs.CONTENT_WORD_POS = Sets.newHashSet(Splitter.on(";").trimResults().omitEmptyStrings() @@ -694,15 +703,13 @@ public void run(OptionSet options) { List kbGraphUri = Lists.newArrayList(Splitter.on(";").split(options.valueOf(domain))); - CcgAutoLexicon normalCcgAutoLexicon = - new CcgAutoLexicon(options.valueOf(ccgIndexedMapping), - options.valueOf(unaryRules), options.valueOf(binaryRules), - options.valueOf(ccgLexicon)); + CcgAutoLexicon normalCcgAutoLexicon = new CcgAutoLexicon( + options.valueOf(ccgIndexedMapping), options.valueOf(unaryRules), + options.valueOf(binaryRules), options.valueOf(ccgLexicon)); - CcgAutoLexicon questionCcgAutoLexicon = - new CcgAutoLexicon(options.valueOf(ccgIndexedMapping), - options.valueOf(unaryRules), options.valueOf(binaryRules), - options.valueOf(ccgLexiconQuestions)); + CcgAutoLexicon questionCcgAutoLexicon = new CcgAutoLexicon( + options.valueOf(ccgIndexedMapping), options.valueOf(unaryRules), + options.valueOf(binaryRules), options.valueOf(ccgLexiconQuestions)); GroundedLexicon groundedLexicon = new GroundedLexicon(options.valueOf(lexicon)); @@ -718,6 +725,7 @@ public void run(OptionSet options) { String logfile = options.valueOf(logFile); String loadModelFromFileVal = options.valueOf(loadModelFromFile); + String embeddingFileVal = options.valueOf(embeddingFile); boolean debugEnabled = options.valueOf(debugEnabledFlag); int threadCount = options.valueOf(nthreads); @@ -771,6 +779,8 @@ public void run(OptionSet options) { boolean argumentStemGrelPartMatchingFlagVal = options.valueOf(argumentStemGrelPartMatchingFlag); boolean ngramStemMatchingFlagVal = options.valueOf(ngramStemMatchingFlag); + boolean useEmbeddingSimilarityFlagVal = + options.valueOf(useEmbeddingSimilarityFlag); // Graph features boolean graphIsConnectedFlagVal = options.valueOf(graphIsConnectedFlag); @@ -831,8 +841,8 @@ public void run(OptionSet options) { options.valueOf(groundTrainingCorpusInTheEnd); // Set pointWiseF1Threshold for learning. IMPORTANT. - GraphToQueryTraining.setPointWiseF1Threshold(options - .valueOf(pointWiseF1Threshold)); + GraphToQueryTraining + .setPointWiseF1Threshold(options.valueOf(pointWiseF1Threshold)); GraphToQueryTrainingMain graphToQueryModel = new GraphToQueryTrainingMain( schemaObj, kb, groundedLexicon, normalCcgAutoLexicon, @@ -840,7 +850,7 @@ public void run(OptionSet options) { supervisedTrainingFile, corupusTrainingFile, groundInputCorporaFiles, semanticParseKeyString, goldParsesFileVal, mostFrequentTypesFileVal, debugEnabled, groundTrainingCorpusInTheEndVal, - trainingSampleSizeCount, logfile, loadModelFromFileVal, + trainingSampleSizeCount, logfile, loadModelFromFileVal, embeddingFileVal, nBestTrainSyntacticParsesVal, nBestTestSyntacticParsesVal, nbestEdgesVal, nbestGraphsVal, forestSizeVal, ngramLengthVal, useSchemaVal, useKBVal, groundFreeVariablesVal, @@ -852,9 +862,10 @@ public void run(OptionSet options) { questionTypeGrelPartFlagVal, stemMatchingFlagVal, mediatorStemGrelPartMatchingFlagVal, argumentStemMatchingFlagVal, argumentStemGrelPartMatchingFlagVal, ngramStemMatchingFlagVal, - graphIsConnectedFlagVal, graphHasEdgeFlagVal, countNodesFlagVal, - edgeNodeCountFlagVal, duplicateEdgesFlagVal, grelGrelFlagVal, - useLexiconWeightsRelVal, useLexiconWeightsTypeVal, validQueryFlagVal, + useEmbeddingSimilarityFlagVal, graphIsConnectedFlagVal, + graphHasEdgeFlagVal, countNodesFlagVal, edgeNodeCountFlagVal, + duplicateEdgesFlagVal, grelGrelFlagVal, useLexiconWeightsRelVal, + useLexiconWeightsTypeVal, validQueryFlagVal, useAnswerTypeQuestionWordFlagVal, useNbestGraphsVal, addBagOfWordsGraphVal, addOnlyBagOfWordsGraphVal, handleNumbersFlagVal, entityScoreFlagVal, entityWordOverlapFlagVal, diff --git a/src/in/sivareddy/graphparser/cli/RunPrintDomainLexicon.java b/src/in/sivareddy/graphparser/cli/RunPrintDomainLexicon.java index 0bdc715..67331df 100644 --- a/src/in/sivareddy/graphparser/cli/RunPrintDomainLexicon.java +++ b/src/in/sivareddy/graphparser/cli/RunPrintDomainLexicon.java @@ -169,14 +169,13 @@ public void run(OptionSet options) { String[] relationTypingIdentifiers = {}; GroundedLexicon groundedLexicon = new GroundedLexicon(null); - GroundedGraphs graphCreator = - new GroundedGraphs(schemaObj, kb, groundedLexicon, - normalCcgAutoLexicon, questionCcgAutoLexicon, - relationLexicalIdentifiers, relationTypingIdentifiers, null, 1, - false, false, false, false, false, false, false, false, false, - false, false, false, false, false, false, false, false, false, false, - false, false, false, false, false, false, false, false, false, - false, false, false, false, false, false, 0.0, 0.0, 0.0, 0.0, 0.0); + GroundedGraphs graphCreator = new GroundedGraphs(schemaObj, kb, + groundedLexicon, normalCcgAutoLexicon, questionCcgAutoLexicon, + relationLexicalIdentifiers, relationTypingIdentifiers, null, null, 1, + false, false, false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, false, false, + false, false, false, false, false, 0.0, 0.0, 0.0, 0.0, 0.0); CreateGroundedLexicon engine = new CreateGroundedLexicon(graphCreator, kb, semanticParseKeyString, diff --git a/src/in/sivareddy/graphparser/learning/GraphToQueryTraining.java b/src/in/sivareddy/graphparser/learning/GraphToQueryTraining.java index ef12067..c989993 100644 --- a/src/in/sivareddy/graphparser/learning/GraphToQueryTraining.java +++ b/src/in/sivareddy/graphparser/learning/GraphToQueryTraining.java @@ -7,6 +7,7 @@ import in.sivareddy.graphparser.parsing.LexicalGraph; import in.sivareddy.graphparser.parsing.LexicalGraph.AnswerTypeQuestionWordFeature; import in.sivareddy.graphparser.parsing.LexicalGraph.ValidQueryFeature; +import in.sivareddy.graphparser.util.CrossLingualEmbeddingSimilarity; import in.sivareddy.graphparser.util.GroundedLexicon; import in.sivareddy.graphparser.util.RdfGraphTools; import in.sivareddy.graphparser.util.Schema; @@ -70,6 +71,7 @@ public class GraphToQueryTraining { private static double POINTWISE_F1_THRESHOLD = 0.90; private StructuredPercepton learningModel; + private CrossLingualEmbeddingSimilarity embeddings; private Schema schema; private KnowledgeBase kb; private GroundedLexicon groundedLexicon; @@ -118,7 +120,8 @@ public GraphToQueryTraining(Schema schema, KnowledgeBase kb, boolean useSchema, boolean useKB, boolean groundFreeVariables, boolean groundEntityVariableEdges, boolean groundEntityEntityEdges, boolean useEmtpyTypes, boolean ignoreTypes, - StructuredPercepton learningModel, boolean urelGrelFlag, + StructuredPercepton learningModel, + CrossLingualEmbeddingSimilarity embeddings, boolean urelGrelFlag, boolean urelPartGrelPartFlag, boolean utypeGtypeFlag, boolean gtypeGrelFlag, boolean grelGrelFlag, boolean ngramGrelPartFlag, boolean wordGrelPartFlag, boolean wordGrelFlag, boolean argGrelPartFlag, @@ -127,20 +130,20 @@ public GraphToQueryTraining(Schema schema, KnowledgeBase kb, boolean mediatorStemGrelPartMatchingFlag, boolean argumentStemMatchingFlag, boolean argumentStemGrelPartMatchingFlag, boolean ngramStemMatchingFlag, - boolean graphIsConnectedFlag, boolean graphHasEdgeFlag, - boolean countNodesFlag, boolean edgeNodeCountFlag, - boolean useLexiconWeightsRel, boolean useLexiconWeightsType, - boolean duplicateEdgesFlag, boolean validQueryFlag, - boolean useAnswerTypeQuestionWordFlag, boolean useNbestSurrogateGraphs, - boolean addBagOfWordsGraph, boolean addOnlyBagOfWordsGraph, - boolean handleNumbers, boolean entityScoreFlag, - boolean entityWordOverlapFlag, boolean paraphraseScoreFlag, - boolean paraphraseClassifierScoreFlag, boolean allowMerging, - boolean useGoldRelations, boolean evaluateOnlyTheFirstBest, - boolean handleEventEventEdges, boolean useExpand, boolean useHyperExpand, - double initialEdgeWeight, double initialTypeWeight, - double initialWordWeight, double mergeEdgeWeight, - double stemFeaturesWeight, + boolean useEmbeddingSimilarityFlag, boolean graphIsConnectedFlag, + boolean graphHasEdgeFlag, boolean countNodesFlag, + boolean edgeNodeCountFlag, boolean useLexiconWeightsRel, + boolean useLexiconWeightsType, boolean duplicateEdgesFlag, + boolean validQueryFlag, boolean useAnswerTypeQuestionWordFlag, + boolean useNbestSurrogateGraphs, boolean addBagOfWordsGraph, + boolean addOnlyBagOfWordsGraph, boolean handleNumbers, + boolean entityScoreFlag, boolean entityWordOverlapFlag, + boolean paraphraseScoreFlag, boolean paraphraseClassifierScoreFlag, + boolean allowMerging, boolean useGoldRelations, + boolean evaluateOnlyTheFirstBest, boolean handleEventEventEdges, + boolean useExpand, boolean useHyperExpand, double initialEdgeWeight, + double initialTypeWeight, double initialWordWeight, + double mergeEdgeWeight, double stemFeaturesWeight, RdfGraphTools rdfGraphTools, List kbGraphUri) throws IOException { String[] relationLexicalIdentifiers = {"lemma"}; String[] relationTypingIdentifiers = {}; @@ -165,6 +168,7 @@ public GraphToQueryTraining(Schema schema, KnowledgeBase kb, this.useAnswerTypeQuestionWordFlag = useAnswerTypeQuestionWordFlag; this.learningModel = learningModel; + this.embeddings = embeddings; this.schema = schema; this.kb = kb; this.groundedLexicon = groundedLexicon; @@ -188,19 +192,20 @@ public GraphToQueryTraining(Schema schema, KnowledgeBase kb, this.graphCreator = new GroundedGraphs(this.schema, this.kb, this.groundedLexicon, normalCcgAutoLexicon, questionCcgAutoLexicon, relationLexicalIdentifiers, relationTypingIdentifiers, - this.learningModel, ngramLength, urelGrelFlag, urelPartGrelPartFlag, - utypeGtypeFlag, gtypeGrelFlag, grelGrelFlag, ngramGrelPartFlag, - wordGrelPartFlag, wordGrelFlag, argGrelPartFlag, argGrelFlag, - questionTypeGrelPartFlag, eventTypeGrelPartFlag, stemMatchingFlag, - mediatorStemGrelPartMatchingFlag, argumentStemMatchingFlag, - argumentStemGrelPartMatchingFlag, ngramStemMatchingFlag, - graphIsConnectedFlag, graphHasEdgeFlag, countNodesFlag, - edgeNodeCountFlag, useLexiconWeightsRel, useLexiconWeightsType, - duplicateEdgesFlag, ignorePronouns, handleNumbers, entityScoreFlag, - entityWordOverlapFlag, paraphraseScoreFlag, - paraphraseClassifierScoreFlag, allowMerging, handleEventEventEdges, - useExpand, useHyperExpand, initialEdgeWeight, initialTypeWeight, - initialWordWeight, mergeEdgeWeight, stemFeaturesWeight); + this.learningModel, this.embeddings, ngramLength, urelGrelFlag, + urelPartGrelPartFlag, utypeGtypeFlag, gtypeGrelFlag, grelGrelFlag, + ngramGrelPartFlag, wordGrelPartFlag, wordGrelFlag, argGrelPartFlag, + argGrelFlag, questionTypeGrelPartFlag, eventTypeGrelPartFlag, + stemMatchingFlag, mediatorStemGrelPartMatchingFlag, + argumentStemMatchingFlag, argumentStemGrelPartMatchingFlag, + ngramStemMatchingFlag, useEmbeddingSimilarityFlag, graphIsConnectedFlag, + graphHasEdgeFlag, countNodesFlag, edgeNodeCountFlag, + useLexiconWeightsRel, useLexiconWeightsType, duplicateEdgesFlag, + ignorePronouns, handleNumbers, entityScoreFlag, entityWordOverlapFlag, + paraphraseScoreFlag, paraphraseClassifierScoreFlag, allowMerging, + handleEventEventEdges, useExpand, useHyperExpand, initialEdgeWeight, + initialTypeWeight, initialWordWeight, mergeEdgeWeight, + stemFeaturesWeight); } diff --git a/src/in/sivareddy/graphparser/learning/GraphToQueryTrainingMain.java b/src/in/sivareddy/graphparser/learning/GraphToQueryTrainingMain.java index 3c65461..db681ef 100644 --- a/src/in/sivareddy/graphparser/learning/GraphToQueryTrainingMain.java +++ b/src/in/sivareddy/graphparser/learning/GraphToQueryTrainingMain.java @@ -1,6 +1,7 @@ package in.sivareddy.graphparser.learning; import in.sivareddy.graphparser.ccg.CcgAutoLexicon; +import in.sivareddy.graphparser.util.CrossLingualEmbeddingSimilarity; import in.sivareddy.graphparser.util.GroundedLexicon; import in.sivareddy.graphparser.util.RdfGraphTools; import in.sivareddy.graphparser.util.Schema; @@ -53,6 +54,7 @@ public class GraphToQueryTrainingMain { private int nBestTrainSyntacticParses; private String semanticParseKey; private StructuredPercepton currentIterationModel; + private CrossLingualEmbeddingSimilarity embeddings = null; private StructuredPercepton bestModelSoFar; private boolean currentModelIsTheBestModel; private Double highestPerformace = 0.0; @@ -66,7 +68,7 @@ public GraphToQueryTrainingMain(Schema schema, KnowledgeBase kb, String groundInputCorpora, String sematicParseKey, String goldParsesFile, String mostFrequentTypesFile, boolean debugEnabled, boolean groundTrainingCorpusInTheEndVal, int trainingSampleSize, - String logFile, String loadModelFromFile, int nBestTrainSyntacticParses, + String logFile, String loadModelFromFile, String embeddingFile, int nBestTrainSyntacticParses, int nBestTestSyntacticParses, int nbestBestEdges, int nbestGraphs, int forrestSize, int ngramLength, boolean useSchema, boolean useKB, boolean groundFreeVariables, boolean groundEntityVariableEdges, @@ -79,20 +81,21 @@ public GraphToQueryTrainingMain(Schema schema, KnowledgeBase kb, boolean stemMatchingFlag, boolean mediatorStemGrelPartMatchingFlag, boolean argumentStemMatchingFlag, boolean argumentStemGrelPartMatchingFlag, boolean ngramStemMatchingFlag, - boolean graphIsConnectedFlag, boolean graphHasEdgeFlag, - boolean countNodesFlag, boolean edgeNodeCountFlag, - boolean duplicateEdgesFlag, boolean grelGrelFlag, - boolean useLexiconWeightsRel, boolean useLexiconWeightsType, - boolean validQueryFlag, boolean useAnswerTypeQuestionWordFlag, - boolean useNbestGraphs, boolean addBagOfWordsGraph, - boolean addOnlyBagOfWordsGraph, boolean handleNumbers, - boolean entityScoreFlag, boolean entityWordOverlapFlag, - boolean paraphraseScoreFlag, boolean paraphraseClassifierScoreFlag, - boolean allowMerging, boolean useGoldRelations, - boolean evaluateOnlyTheFirstBest, boolean handleEventEventEdges, - boolean useExpand, boolean useHyperExpand, double initialEdgeWeight, - double initialTypeWeight, double initialWordWeight, - double mergeEdgeWeight, double stemFeaturesWeight) throws IOException { + boolean useEmbeddingSimilarityFlag, boolean graphIsConnectedFlag, + boolean graphHasEdgeFlag, boolean countNodesFlag, + boolean edgeNodeCountFlag, boolean duplicateEdgesFlag, + boolean grelGrelFlag, boolean useLexiconWeightsRel, + boolean useLexiconWeightsType, boolean validQueryFlag, + boolean useAnswerTypeQuestionWordFlag, boolean useNbestGraphs, + boolean addBagOfWordsGraph, boolean addOnlyBagOfWordsGraph, + boolean handleNumbers, boolean entityScoreFlag, + boolean entityWordOverlapFlag, boolean paraphraseScoreFlag, + boolean paraphraseClassifierScoreFlag, boolean allowMerging, + boolean useGoldRelations, boolean evaluateOnlyTheFirstBest, + boolean handleEventEventEdges, boolean useExpand, boolean useHyperExpand, + double initialEdgeWeight, double initialTypeWeight, + double initialWordWeight, double mergeEdgeWeight, + double stemFeaturesWeight) throws IOException { this.semanticParseKey = sematicParseKey; this.nBestTestSyntacticParses = nBestTestSyntacticParses; @@ -125,18 +128,23 @@ public GraphToQueryTrainingMain(Schema schema, KnowledgeBase kb, } else { currentIterationModel = new StructuredPercepton(); } + + if (embeddingFile != null && !embeddingFile.equals("")) { + embeddings = new CrossLingualEmbeddingSimilarity(embeddingFile); + } + graphToQuery = new GraphToQueryTraining(schema, kb, groundedLexicon, normalCcgAutoLexicon, questionCcgAutoLexicon, semanticParseKey, goldParsesFile, mostFrequentTypesFile, this.nBestTrainSyntacticParses, this.nBestTestSyntacticParses, nbestBestEdges, nbestGraphs, forrestSize, ngramLength, useSchema, useKB, groundFreeVariables, groundEntityVariableEdges, groundEntityEntityEdges, useEmtpyTypes, - ignoreTypes, currentIterationModel, urelGrelFlag, urelPartGrelPartFlag, + ignoreTypes, currentIterationModel, embeddings, urelGrelFlag, urelPartGrelPartFlag, utypeGtypeFlag, gtypeGrelFlag, grelGrelFlag, ngramGrelPartFlag, wordGrelPartFlag, wordGrelFlag, argGrelPartFlag, argGrelFlag, questionTypeGrelPartFlag, eventTypeGrelPartFlag, stemMatchingFlag, mediatorStemGrelPartMatchingFlag, argumentStemMatchingFlag, - argumentStemGrelPartMatchingFlag, ngramStemMatchingFlag, + argumentStemGrelPartMatchingFlag, ngramStemMatchingFlag, useEmbeddingSimilarityFlag, graphIsConnectedFlag, graphHasEdgeFlag, countNodesFlag, edgeNodeCountFlag, useLexiconWeightsRel, useLexiconWeightsType, duplicateEdgesFlag, validQueryFlag, useAnswerTypeQuestionWordFlag, @@ -485,6 +493,7 @@ public static void main_func(Schema schema, KnowledgeBase kb, String logFile = "working/sup_easyccg.log.txt"; String loadModelFromFile = null; + String embeddingFile = null; String semanticParseKey = "synPars"; String goldParsesFile = null; @@ -531,6 +540,7 @@ public static void main_func(Schema schema, KnowledgeBase kb, boolean argumentStemMatchingFlag = true; boolean argumentStemGrelPartMatchingFlag = true; boolean ngramStemMatchingFlag = false; + boolean useEmbeddingSimilarityFlag = false; // Graph features boolean graphIsConnectedFlag = false; @@ -582,17 +592,18 @@ public static void main_func(Schema schema, KnowledgeBase kb, supervisedTrainingFile, corupusTrainingFile, groundInputCorpora, mostFrequentTypesFile, semanticParseKey, goldParsesFile, debugEnabled, groundTrainingCorpusInTheEndVal, trainingSampleSize, logFile, - loadModelFromFile, nBestTrainSyntacticParses, nBestTestSyntacticParses, - nbestBestEdges, nbestGraphs, forrestSize, ngramLength, useSchema, useKB, - groundFreeVariables, groundEntityVariableEdges, groundEntityEntityEdges, - useEmtpyTypes, ignoreTypes, urelGrelFlag, urelPartGrelPartFlag, - utypeGtypeFlag, gtypeGrelFlag, ngramGrelPartFlag, wordGrelPartFlag, - wordGrelFlag, eventTypeGrelPartFlag, argGrelPartFlag, argGrelFlag, + loadModelFromFile, embeddingFile, nBestTrainSyntacticParses, + nBestTestSyntacticParses, nbestBestEdges, nbestGraphs, forrestSize, + ngramLength, useSchema, useKB, groundFreeVariables, + groundEntityVariableEdges, groundEntityEntityEdges, useEmtpyTypes, + ignoreTypes, urelGrelFlag, urelPartGrelPartFlag, utypeGtypeFlag, + gtypeGrelFlag, ngramGrelPartFlag, wordGrelPartFlag, wordGrelFlag, + eventTypeGrelPartFlag, argGrelPartFlag, argGrelFlag, questionTypeGrelPartFlag, stemMatchingFlag, mediatorStemGrelPartMatchingFlag, argumentStemMatchingFlag, argumentStemGrelPartMatchingFlag, ngramStemMatchingFlag, - graphIsConnectedFlag, graphHasEdgeFlag, countNodesFlag, - edgeNodeCountFlag, duplicateEdgesFlag, grelGrelFlag, + useEmbeddingSimilarityFlag, graphIsConnectedFlag, graphHasEdgeFlag, + countNodesFlag, edgeNodeCountFlag, duplicateEdgesFlag, grelGrelFlag, useLexiconWeightsRel, useLexiconWeightsType, validQueryFlag, useAnswerTypeQuestionWordFlag, useNbestGraphs, addBagOfWordsGraph, addOnlyBagOfWordsGraph, handleNumbers, entityScoreFlag, diff --git a/src/in/sivareddy/graphparser/parsing/GroundTestSentences.java b/src/in/sivareddy/graphparser/parsing/GroundTestSentences.java index 6a82a00..890b228 100644 --- a/src/in/sivareddy/graphparser/parsing/GroundTestSentences.java +++ b/src/in/sivareddy/graphparser/parsing/GroundTestSentences.java @@ -1,6 +1,7 @@ package in.sivareddy.graphparser.parsing; import in.sivareddy.graphparser.ccg.CcgAutoLexicon; +import in.sivareddy.graphparser.util.CrossLingualEmbeddingSimilarity; import in.sivareddy.graphparser.util.GroundedLexicon; import in.sivareddy.graphparser.util.RdfGraphTools; import in.sivareddy.graphparser.util.Schema; @@ -63,6 +64,7 @@ public static void run(Schema schema, KnowledgeBaseCached kb, boolean argumentStemMatchingFlag = true; boolean argumentStemGrelPartMatchingFlag = true; boolean ngramStemMatchingFlag = false; + boolean useEmbeddingSimilarityFlag = false; boolean graphIsConnectedFlag = false; boolean graphHasEdgeFlag = false; @@ -93,24 +95,26 @@ public static void run(Schema schema, KnowledgeBaseCached kb, double stemFeaturesWeight = 0.0; StructuredPercepton learningModel = new StructuredPercepton(); + CrossLingualEmbeddingSimilarity embeddings = null; // GroundedLexicon groundedLexicon = null; GroundedGraphs graphCreator = new GroundedGraphs(schema, kb, groundedLexicon, normalCcgAutoLexicon, questionCcgAutoLexicon, relationLexicalIdentifiers, relationTypingIdentifiers, learningModel, - ngramLength, urelGrelFlag, urelPartGrelPartFlag, utypeGtypeFlag, - gtypeGrelFlag, grelGrelFlag, ngramGrelPartFlag, wordGrelPartFlag, - wordGrelFlag, argGrelPartFlag, argGrelFlag, questionTypeGrelPartFlag, - wordBigramGrelPartFlag, stemMatchingFlag, + embeddings, ngramLength, urelGrelFlag, urelPartGrelPartFlag, + utypeGtypeFlag, gtypeGrelFlag, grelGrelFlag, ngramGrelPartFlag, + wordGrelPartFlag, wordGrelFlag, argGrelPartFlag, argGrelFlag, + questionTypeGrelPartFlag, wordBigramGrelPartFlag, stemMatchingFlag, mediatorStemGrelPartMatchingFlag, argumentStemMatchingFlag, argumentStemGrelPartMatchingFlag, ngramStemMatchingFlag, - graphIsConnectedFlag, graphHasEdgeFlag, countNodesFlag, - edgeNodeCountFlag, useLexiconWeightsRel, useLexiconWeightsType, - duplicateEdgesFlag, ignorePronouns, handleNumbers, entityScoreFlag, - entityWordOverlapFlag, paraphraseScoreFlag, - paraphraseClassifierScoreFlag, allowMerging, handleEventEventEdges, - useExpand, useHyperExpand, initialEdgeWeight, initialTypeWeight, - initialWordWeight, mergeEdgeWeight, stemFeaturesWeight); + useEmbeddingSimilarityFlag, graphIsConnectedFlag, graphHasEdgeFlag, + countNodesFlag, edgeNodeCountFlag, useLexiconWeightsRel, + useLexiconWeightsType, duplicateEdgesFlag, ignorePronouns, + handleNumbers, entityScoreFlag, entityWordOverlapFlag, + paraphraseScoreFlag, paraphraseClassifierScoreFlag, allowMerging, + handleEventEventEdges, useExpand, useHyperExpand, initialEdgeWeight, + initialTypeWeight, initialWordWeight, mergeEdgeWeight, + stemFeaturesWeight); JsonParser jsonParser = new JsonParser(); // BufferedReader br = new BufferedReader(new // FileReader("data/cai-yates-2013/question-and-logical-form-917/acl2014_domains/business_parse.txt")); diff --git a/src/in/sivareddy/graphparser/parsing/GroundedGraphs.java b/src/in/sivareddy/graphparser/parsing/GroundedGraphs.java index 11f81a3..a297f40 100644 --- a/src/in/sivareddy/graphparser/parsing/GroundedGraphs.java +++ b/src/in/sivareddy/graphparser/parsing/GroundedGraphs.java @@ -18,6 +18,7 @@ import org.apache.commons.lang3.tuple.Pair; import org.apache.log4j.Logger; +import org.ejml.simple.SimpleMatrix; import com.google.common.base.CharMatcher; import com.google.common.base.Joiner; @@ -74,6 +75,7 @@ import in.sivareddy.graphparser.parsing.LexicalGraph.UtypeGtypeFeature; import in.sivareddy.graphparser.parsing.LexicalGraph.WordGrelFeature; import in.sivareddy.graphparser.parsing.LexicalGraph.WordGrelPartFeature; +import in.sivareddy.graphparser.util.CrossLingualEmbeddingSimilarity; import in.sivareddy.graphparser.util.GroundedLexicon; import in.sivareddy.graphparser.util.Schema; import in.sivareddy.graphparser.util.graph.Edge; @@ -93,6 +95,7 @@ public class GroundedGraphs { private KnowledgeBase kb; private CcgParser normalCcgParser; private CcgParser questionCcgParser; + private final CrossLingualEmbeddingSimilarity embeddings; private static Set lexicalPosTags = Sets.newHashSet("NNP", "NNPS", "PROPN"); @@ -113,6 +116,7 @@ public class GroundedGraphs { private boolean argumentStemMatchingFlag = true; private boolean argumentStemGrelPartMatchingFlag = true; private boolean ngramStemMatchingFlag = false; + private boolean useEmbeddingSimilarity = false; private boolean graphIsConnectedFlag = false; private boolean graphHasEdgeFlag = false; private boolean countNodesFlag = false; @@ -130,6 +134,8 @@ public class GroundedGraphs { private boolean useExpand = false; private boolean useHyperExpand = false; + private String defaultKBLanguage = "en:"; + private StructuredPercepton learningModel; private int ngramLength = 2; public double initialEdgeWeight; @@ -140,30 +146,34 @@ public class GroundedGraphs { public Logger logger; private Map stems = Maps.newConcurrentMap(); + private Map edgeEmbeddings = Maps.newConcurrentMap(); + private Map, Double> edgeEmbeddingSimilarities = Maps.newConcurrentMap(); public GroundedGraphs(Schema schema, KnowledgeBase kb, GroundedLexicon groundedLexicon, CcgAutoLexicon normalCcgAutoLexicon, CcgAutoLexicon questionCcgAutoLexicon, String[] relationLexicalIdentifiers, String[] relationTypingIdentifiers, - StructuredPercepton learningModel, int ngramLength, boolean urelGrelFlag, - boolean urelPartGrelPartFlag, boolean utypeGtypeFlag, - boolean gtypeGrelPartFlag, boolean grelGrelFlag, boolean ngramGrelFlag, - boolean wordGrelPartFlag, boolean wordGrelFlag, boolean argGrelPartFlag, - boolean argGrelFlag, boolean questionTypeGrelPartFlag, - boolean eventTypeGrelPartFlag, boolean stemMatchingFlag, - boolean mediatorStemGrelPartMatchingFlag, + StructuredPercepton learningModel, + CrossLingualEmbeddingSimilarity embeddings, int ngramLength, + boolean urelGrelFlag, boolean urelPartGrelPartFlag, + boolean utypeGtypeFlag, boolean gtypeGrelPartFlag, boolean grelGrelFlag, + boolean ngramGrelFlag, boolean wordGrelPartFlag, boolean wordGrelFlag, + boolean argGrelPartFlag, boolean argGrelFlag, + boolean questionTypeGrelPartFlag, boolean eventTypeGrelPartFlag, + boolean stemMatchingFlag, boolean mediatorStemGrelPartMatchingFlag, boolean argumentStemMatchingFlag, boolean argumentStemGrelPartMatchingFlag, boolean ngramStemMatchingFlag, - boolean graphIsConnectedFlag, boolean graphHasEdgeFlag, - boolean countNodesFlag, boolean edgeNodeCountFlag, - boolean useLexiconWeightsRel, boolean useLexiconWeightsType, - boolean duplicateEdgesFlag, boolean ignorePronouns, boolean handleNumbers, - boolean entityScoreFlag, boolean entityWordOverlapFlag, - boolean paraphraseScoreFlag, boolean paraphraseClassifierScoreFlag, - boolean allowMerging, boolean handleEventEventEdges, boolean useExpand, - boolean useHyperExpand, double initialEdgeWeight, - double initialTypeWeight, double initialWordWeight, - double mergeEdgeWeight, double stemFeaturesWeight) throws IOException { + boolean useEmbeddingSimilarity, boolean graphIsConnectedFlag, + boolean graphHasEdgeFlag, boolean countNodesFlag, + boolean edgeNodeCountFlag, boolean useLexiconWeightsRel, + boolean useLexiconWeightsType, boolean duplicateEdgesFlag, + boolean ignorePronouns, boolean handleNumbers, boolean entityScoreFlag, + boolean entityWordOverlapFlag, boolean paraphraseScoreFlag, + boolean paraphraseClassifierScoreFlag, boolean allowMerging, + boolean handleEventEventEdges, boolean useExpand, boolean useHyperExpand, + double initialEdgeWeight, double initialTypeWeight, + double initialWordWeight, double mergeEdgeWeight, + double stemFeaturesWeight) throws IOException { // ccg parser initialisation String[] argumentLexicalIdenfiers = {"mid"}; @@ -180,6 +190,7 @@ public GroundedGraphs(Schema schema, KnowledgeBase kb, this.schema = schema; this.learningModel = learningModel != null ? learningModel : new StructuredPercepton(); + this.embeddings = embeddings; this.urelGrelFlag = urelGrelFlag; this.urelPartGrelPartFlag = urelPartGrelPartFlag; @@ -198,6 +209,7 @@ public GroundedGraphs(Schema schema, KnowledgeBase kb, this.argumentStemMatchingFlag = argumentStemMatchingFlag; this.argumentStemGrelPartMatchingFlag = argumentStemGrelPartMatchingFlag; this.ngramStemMatchingFlag = ngramStemMatchingFlag; + this.useEmbeddingSimilarity = useEmbeddingSimilarity; this.graphIsConnectedFlag = graphIsConnectedFlag; this.graphHasEdgeFlag = graphHasEdgeFlag; @@ -753,9 +765,13 @@ public List getDependencyUngroundedGraph( // "VBG", "NNP", "NNPS"); public static List getNgrams(List words, int nGram) { + return getNgrams(words, nGram, false); + } + + public static List getNgrams(List words, int nGram, boolean useWord) { List wordStrings = new ArrayList<>(); for (LexicalItem word : words) { - if (!word.getLemma().equals(word.getMid()) && !word.getMid().equals("x")) { + if (word.isEntity() && !word.getMid().equals("x")) { // Current word is an entity. continue; } @@ -766,7 +782,7 @@ public static List getNgrams(List words, int nGram) { continue; } - String wordString = word.getLemma(); + String wordString = useWord ? word.getWord() : word.getLemma(); if (stopWordsUniversal.contains(wordString) || punctuation.matcher(wordString).matches() || SentenceKeys.PUNCTUATION_TAGS.contains(word.getPos())) { @@ -2237,6 +2253,7 @@ private List getAdditionalTypeFeatures( } public boolean stringContainsWord(String grelLeftStripped, String modifierWord) { + modifierWord = modifierWord.replaceFirst(defaultKBLanguage, ""); if (!stems.containsKey(modifierWord)) stems.put(modifierWord, PorterStemmer.getStem(modifierWord)); String modifierStem = stems.get(modifierWord); @@ -3360,16 +3377,33 @@ private List getEdgeFeatures(LexicalGraph gGraph, LexicalItem node1, fromIndex = toIndex - 2 < 0 ? 0 : toIndex - 2; grelRightInverse = Joiner.on(".").join(parts.subList(fromIndex, toIndex)); - for (String unigram : getNgrams(uGraph.getActualNodes(), 1)) { - if ((stringContainsWord(grelLeftStripped, unigram) - || stringContainsWord(grelLeftInverse, unigram)) - && (stringContainsWord(grelRightStripped, unigram) - || stringContainsWord(grelRightInverse, unigram))) { - NgramStemMatchingFeature s = - new NgramStemMatchingFeature(2.0 / (Math.max( - uGraph.getEdges(node1).size() + uGraph.getEdges(node2).size(), - 2.0))); - features.add(s); + if (!useEmbeddingSimilarity) { + for (String unigram : getNgrams(uGraph.getActualNodes(), 1)) { + if ((stringContainsWord(grelLeftStripped, unigram) + || stringContainsWord(grelLeftInverse, unigram)) + && (stringContainsWord(grelRightStripped, unigram) + || stringContainsWord(grelRightInverse, unigram))) { + NgramStemMatchingFeature s = new NgramStemMatchingFeature( + 2.0 / (Math.max(uGraph.getEdges(node1).size() + + uGraph.getEdges(node2).size(), 2.0))); + features.add(s); + } + } + } else if (embeddings != null){ + double sim = 0.0; + for (String unigram : getNgrams(uGraph.getActualNodes(), 1, true)) { + sim += computeEdgeSimilarity(unigram, grelLeftStripped); + sim += computeEdgeSimilarity(unigram, grelLeftInverse); + sim += computeEdgeSimilarity(unigram, grelRightStripped); + sim += computeEdgeSimilarity(unigram, grelRightInverse); + + if (sim > 0) { + sim = sim/2.0; + NgramStemMatchingFeature s = new NgramStemMatchingFeature( + sim / (Math.max(uGraph.getEdges(node1).size() + + uGraph.getEdges(node2).size(), 2.0))); + features.add(s); + } } } } @@ -3377,6 +3411,51 @@ private List getEdgeFeatures(LexicalGraph gGraph, LexicalItem node1, return features; } + private double computeEdgeSimilarity(String word, String subEdge) { + Pair key = Pair.of(word, subEdge); + if (edgeEmbeddingSimilarities.containsKey(key)) + return edgeEmbeddingSimilarities.get(key); + + SimpleMatrix wordEmbedding = embeddings.get(word); + if (wordEmbedding == null) { + edgeEmbeddingSimilarities.put(key, 0.0); + return 0.0; + } + + SimpleMatrix edgeEmbedding = getEdgeEmbedding(subEdge); + if (edgeEmbedding == null) { + edgeEmbeddingSimilarities.put(key, 0.0); + return 0.0; + } + + double sim = CrossLingualEmbeddingSimilarity.cosine(edgeEmbedding, wordEmbedding); + edgeEmbeddingSimilarities.put(key, sim); + return sim; + } + + private SimpleMatrix getEdgeEmbedding(String subEdge) { + if (edgeEmbeddings.containsKey(subEdge)) + return edgeEmbeddings.get(subEdge); + Iterator it = + Splitter.on(CharMatcher.anyOf("._")).trimResults().omitEmptyStrings() + .split(subEdge).iterator(); + SimpleMatrix edgeEmbedding = null; + while (it.hasNext()) { + String part = String.format("%s%s", defaultKBLanguage, it.next()); + SimpleMatrix partEmbedding = embeddings.get(part); + if (partEmbedding != null) { + if (edgeEmbedding != null) { + edgeEmbedding = edgeEmbedding.plus(partEmbedding); + } else { + edgeEmbedding = partEmbedding; + } + } + } + edgeEmbeddings.put(subEdge, edgeEmbedding); + return edgeEmbedding; + } + + private double countMediator(LexicalItem mediator, TreeSet> edges) { double count = 0.0; diff --git a/src/in/sivareddy/graphparser/util/CrossLingualEmbeddingSimilarity.java b/src/in/sivareddy/graphparser/util/CrossLingualEmbeddingSimilarity.java new file mode 100644 index 0000000..c557299 --- /dev/null +++ b/src/in/sivareddy/graphparser/util/CrossLingualEmbeddingSimilarity.java @@ -0,0 +1,41 @@ +package in.sivareddy.graphparser.util; + +import org.ejml.simple.SimpleMatrix; + +import edu.stanford.nlp.neural.Embedding; +import edu.stanford.nlp.neural.NeuralUtils; + +public class CrossLingualEmbeddingSimilarity extends Embedding { + public CrossLingualEmbeddingSimilarity(String embeddingFile) { + super(embeddingFile); + } + + public double cosine(String word1, String word2) { + SimpleMatrix embedding1 = this.get(word1); + if (embedding1 == null) return 0.0; + + SimpleMatrix embedding2 = this.get(word2); + if (embedding2 == null) return 0.0; + + double sim = NeuralUtils.cosine(embedding1, embedding2); + if (sim < 0.0) + return 0.0; + return sim; + } + + public static double cosine(SimpleMatrix vector1, SimpleMatrix vector2) { + double sim = + NeuralUtils.dot(vector1, vector2) / (vector1.normF() * vector2.normF()); + return sim < 0.0 ? 0.0 : sim; + } + + public static void main(String[] args) { + CrossLingualEmbeddingSimilarity embeddings = new CrossLingualEmbeddingSimilarity("data/en-es-de.translation_invariance.emb"); + System.out.println(embeddings.cosine("de:männer", "en:men")); + System.out.println(embeddings.cosine("de:männer", "es:hombres")); + System.out.println(embeddings.cosine("de:männer", "es:mujer")); + System.out.println(embeddings.cosine("en:house", "en:city")); + System.out.println(embeddings.cosine("en:home", "en:city")); + } + +} diff --git a/src/in/sivareddy/scripts/EvaluateGraphParserOracleUsingGoldMidAndGoldRelations.java b/src/in/sivareddy/scripts/EvaluateGraphParserOracleUsingGoldMidAndGoldRelations.java index 9d20237..814d6ff 100644 --- a/src/in/sivareddy/scripts/EvaluateGraphParserOracleUsingGoldMidAndGoldRelations.java +++ b/src/in/sivareddy/scripts/EvaluateGraphParserOracleUsingGoldMidAndGoldRelations.java @@ -112,11 +112,11 @@ public EvaluateGraphParserOracleUsingGoldMidAndGoldRelations( this.graphCreator = new GroundedGraphs(schema, kb, groundedLexicon, normalCcgAutoLexicon, questionCcgAutoLexicon, relationLexicalIdentifiers, relationTypingIdentifiers, - new StructuredPercepton(), 1, true, true, true, true, true, true, true, + new StructuredPercepton(), null, 1, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, true, true, true, - allowMerging, handleEventEventEdges, useExpand, useHyperExpand, 10.0, - 1.0, 0.0, 0.0, 0.0); + true, true, true, true, true, true, true, true, true, true, true, true, + true, allowMerging, handleEventEventEdges, useExpand, useHyperExpand, + 10.0, 1.0, 0.0, 0.0, 0.0); logger.setLevel(Level.DEBUG); logger.removeAllAppenders(); diff --git a/src/in/sivareddy/scripts/RunGraphParserKunData.java b/src/in/sivareddy/scripts/RunGraphParserKunData.java index e959f5f..aacf24e 100644 --- a/src/in/sivareddy/scripts/RunGraphParserKunData.java +++ b/src/in/sivareddy/scripts/RunGraphParserKunData.java @@ -106,11 +106,11 @@ public RunGraphParserKunData(Logger logger, String dataFolder, int nbestParses) GroundedLexicon groundedLexicon = new GroundedLexicon(null); graphCreator = new GroundedGraphs(schema, kb, groundedLexicon, normalCcgAutoLexicon, questionCcgAutoLexicon, - relationLexicalIdentifiers, relationTypingIdentifiers, null, 1, false, + relationLexicalIdentifiers, relationTypingIdentifiers, null, null, 1, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, - false, false, false, 10.0, 1.0, 0.0, 0.0, 0.0); + false, false, false, false, false, 10.0, 1.0, 0.0, 0.0, 0.0); } public void processSentence(JsonObject sentence) { diff --git a/test/in/sivareddy/graphparser/parsing/CreateGroundedGraphsFromSemanticParseTest.java b/test/in/sivareddy/graphparser/parsing/CreateGroundedGraphsFromSemanticParseTest.java index 23a3bc0..47d481f 100644 --- a/test/in/sivareddy/graphparser/parsing/CreateGroundedGraphsFromSemanticParseTest.java +++ b/test/in/sivareddy/graphparser/parsing/CreateGroundedGraphsFromSemanticParseTest.java @@ -79,10 +79,10 @@ public void setUp() throws Exception { graphCreator = new GroundedGraphs(schema, kb, groundedLexicon, normalCcgAutoLexicon, questionCcgAutoLexicon, relationLexicalIdentifiers, relationTypingIdentifiers, - new StructuredPercepton(), 1, true, true, true, true, true, true, true, + new StructuredPercepton(), null, 1, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, - true, true, false, 10.0, 1.0, 0.0, 0.0, 0.0); + true, true, true, true, false, 10.0, 1.0, 0.0, 0.0, 0.0); logger.setLevel(Level.DEBUG); Appender stdoutAppender = new ConsoleAppender(layout); @@ -462,10 +462,10 @@ public void testHyperExpand() throws IOException { graphCreator = new GroundedGraphs(schema, kb, groundedLexicon, normalCcgAutoLexicon, questionCcgAutoLexicon, relationLexicalIdentifiers, relationTypingIdentifiers, - new StructuredPercepton(), 1, true, true, true, true, true, true, true, + new StructuredPercepton(), null, 1, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, false, false, false, - false, false, false, true, 10.0, 1.0, 0.0, 0.0, 0.0); + true, true, true, true, true, true, true, true, true, true, false, + false, false, false, false, false, true, 10.0, 1.0, 0.0, 0.0, 0.0); JsonObject sentence = jsonParser .parse( @@ -497,10 +497,10 @@ public void testHyperExpandWithCount() throws IOException { graphCreator = new GroundedGraphs(schema, kb, groundedLexicon, normalCcgAutoLexicon, questionCcgAutoLexicon, relationLexicalIdentifiers, relationTypingIdentifiers, - new StructuredPercepton(), 1, true, true, true, true, true, true, true, + new StructuredPercepton(), null, 1, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, false, false, false, - false, false, false, true, 10.0, 1.0, 0.0, 0.0, 0.0); + true, true, true, true, true, true, true, true, true, true, false, + false, false, false, false, false, true, 10.0, 1.0, 0.0, 0.0, 0.0); JsonObject sentence = jsonParser .parse( diff --git a/test/in/sivareddy/graphparser/parsing/CreateGroundedGraphsTest.java b/test/in/sivareddy/graphparser/parsing/CreateGroundedGraphsTest.java index 84e32c3..bb5352d 100644 --- a/test/in/sivareddy/graphparser/parsing/CreateGroundedGraphsTest.java +++ b/test/in/sivareddy/graphparser/parsing/CreateGroundedGraphsTest.java @@ -58,11 +58,11 @@ public void testGroundedGraphs() throws IOException { String[] relationTypingIdentifiers = {}; GroundedGraphs graphCreator = new GroundedGraphs(schema, kb, groundedLexicon, normalCcgAutoLexicon, questionCcgAutoLexicon, - relationLexicalIdentifiers, relationTypingIdentifiers, null, 1, false, + relationLexicalIdentifiers, relationTypingIdentifiers, null, null, 1, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, - false, false, false, 10.0, 1.0, 0.0, 0.0, 0.0); + false, false, false, false, false, 10.0, 1.0, 0.0, 0.0, 0.0); JsonParser parser = new JsonParser(); diff --git a/test/in/sivareddy/graphparser/parsing/CreateGroundedLexiconTest.java b/test/in/sivareddy/graphparser/parsing/CreateGroundedLexiconTest.java index 9fb97ba..0b6097e 100644 --- a/test/in/sivareddy/graphparser/parsing/CreateGroundedLexiconTest.java +++ b/test/in/sivareddy/graphparser/parsing/CreateGroundedLexiconTest.java @@ -33,11 +33,11 @@ public void testCreateGroundedLexicon() throws IOException, GroundedLexicon groundedLexicon = new GroundedLexicon(null); GroundedGraphs graphCreator = new GroundedGraphs(schemaObj, kb, groundedLexicon, normalCcgAutoLexicon, normalCcgAutoLexicon, - lexicalFields, relationTypingFeilds, null, 1, false, false, false, + lexicalFields, relationTypingFeilds, null, null, 1, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, - false, 0.0, 0.0, 0.0, 0.0, 0.0); + false, false, 0.0, 0.0, 0.0, 0.0, 0.0); CreateGroundedLexicon engine = new CreateGroundedLexicon(graphCreator, kb, "dependency_lambda", true,