added logic for videoquestion in questionasker

narain280493 · Mar 28, 2015 · bbacb49 · bbacb49
2 parents 91b09e8 + 9a57af4
commit bbacb49
Show file tree

Hide file tree

Showing 21 changed files with 357 additions and 76 deletions.
diff --git a/README.txt b/README.txt
@@ -1,3 +1,27 @@
+-------------------------------------------------------------------------------------------------------
+To make the distractor generation to work, you have to start the following servers
+The following 1) and 2) can be started via eclipse
+1) POSTagger sever running in  port 8080
+2) Supersense tagger running in port 8081
+
+The following 3) should be started from the command line
+3) Python wordnet server running in port 8030
+U need bottle framework for that
+Install the latest stable release with "sudo pip install bottle "," easy_install -U bottle" 
+Run it using the command
+:~/workspace/QuestionGeneration$ cd pythonscripts/
+:~/workspace/QuestionGeneration/pythonscripts$ python python_wordnet_server.py
+
+******
+
+To make the wikipedia topic extraction to work
+You need nodejs and related dependencies
+
+and then run the test.js file
+using command : phantomjs test.js
+
+******
+-------------------------------------------------------------------------------------------------------
 Question Generation via Overgenerating Transformations and Ranking
 Michael Heilman and Noah A. Smith
 

diff --git a/pythonscripts/Summary/getsummary.py b/pythonscripts/Summary/getsummary.py
@@ -11,7 +11,8 @@
 
 count =0
 inputText =" "
-
+f= open("/home/narain/workspace/questiongeneration/summarycontent.txt","wb")
+g= open("/home/narain/workspace/questiongeneration/transcripttext.txt","wb")
 with open("/home/narain/workspace/questiongeneration/transcript.json") as json_file:
     json_data = json.load(json_file)
 
@@ -27,6 +28,7 @@
 #print "After expanding contractions:\n",content
 content = regex(content)
 #print "After processing: \n",content
+g.write(content)
 st = SummaryTool()
 sentences_dic = st.get_senteces_ranks(content)
 summary = st.get_distractors(content,sentences_dic,1)
@@ -44,6 +46,8 @@
     	inputText = paragraph[count-1]
         break
 #print "paragraph:",inputText
+inputText = firstToThirdPerson(inputText)
+
 outputList = []
 tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
 sentences = tokenizer.tokenize(inputText)
@@ -55,7 +59,8 @@
    #     print "Less than 55"
         outputList.append(sentence)
 for sentence in outputList:
-    print sentence
+	f.write(sentence)
+	#print sentence
 
 
 
diff --git a/pythonscripts/Summary/personchange.txt b/pythonscripts/Summary/personchange.txt
@@ -1,4 +1,4 @@
-I:The author
+I:the speaker
 my:his
 is:was
 are:were

diff --git a/pythonscripts/Summary/regex.py b/pythonscripts/Summary/regex.py
@@ -66,4 +66,4 @@ def replace(match):
 
 text = " So what I did during my job is grow plants in the greenhouse, different ones, different milkweeds. Some were toxic, including the tropical milkweed, with very high concentrations of these cardenolides. And some were not toxic. And then I fed them to monarchs. Some of the monarchs were healthy. They had no disease. But some of the monarchs were sick, and what I found is that some of these milkweeds are medicinal, meaning they reduce the disease symptoms in the monarch butterflies, meaning these monarchs can live longer when they are infected when feeding on these medicinal plants."
 
-print firstToThirdPerson(text)
+#print firstToThirdPerson(text)
diff --git a/pythonscripts/Summary/regex.pyc b/pythonscripts/Summary/regex.pyc
diff --git a/pythonscripts/Summary/summary.pyc b/pythonscripts/Summary/summary.pyc
diff --git a/pythonscripts/Summary/summarycontent.txt b/pythonscripts/Summary/summarycontent.txt
diff --git a/src/ComprehensionQuestionGeneration/VocabularyQuestion.java b/src/ComprehensionQuestionGeneration/VocabularyQuestion.java
@@ -266,7 +266,7 @@ public static void generateMatchTheDefinitionQuestion(String inputFilePath){
 	*/
 
 	public static void main(String[] args) {
-		String fileName="earthquake.txt";
+		String fileName="input.txt";
 		populateTagMap("/home/vishnu/workspace/QuestionGeneration/"+fileName);
 		generateMatchTheSynonymQuestion("/home/vishnu/workspace/QuestionGeneration/"+fileName);
 		generateMatchTheDefinitionQuestion("/home/vishnu/workspace/QuestionGeneration/"+fileName);

diff --git a/src/TopicExtraction/ParseHtml.java b/src/TopicExtraction/ParseHtml.java
@@ -31,7 +31,7 @@ public static Set<Topic> parse(String url) {
         Element topicList = topicDiv.select("ul#topics").first();
         Elements topics = topicList.select("li");
         for(Element topic:topics){
-        	System.out.println(topic.text()+" "+topic.select("a").attr("linkprob"));
+        //	System.out.println(topic.text()+" "+topic.select("a").attr("linkprob"));
         	list.add(new Topic(topic.text(),Double.valueOf(topic.select("a").attr("linkprob"))));
         }
         return list;

diff --git a/src/TopicExtraction/WikipediaMinerAPI.java b/src/TopicExtraction/WikipediaMinerAPI.java
@@ -5,8 +5,10 @@
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
+import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Map;
 import java.util.Set;
 
 
@@ -23,25 +25,55 @@ public int compare(Topic o1, Topic o2) {
 }
 public class WikipediaMinerAPI {
 
-
+	public static List<String> splitEqually(String text, int size) {
+	    // Give the list the right capacity to start with. You could use an array
+	    // instead if you wanted.
+	    List<String> ret = new ArrayList<String>((text.length() + size - 1) / size);
+
+	    for (int start = 0; start < text.length(); start += size) {
+	        ret.add(text.substring(start, Math.min(text.length(), start + size)));
+	    }
+	    return ret;
+
+	}	  
    //@input list of words
    //@output probability of occurrence of that words
-public static List<Topic> getTopics (String text) {
+	public static List<Topic> getTopics (String text) {
 
       String urlString="";
-      Set<Topic> topicList=new HashSet<Topic>();
-      List<Topic> list=new ArrayList<Topic>();
-      try {
-				urlString = "http://localhost:8000/?query="+URLEncoder.encode(text,"UTF-8");
-				topicList=ParseHtml.parse(urlString);
-    		} catch (UnsupportedEncodingException e) {
+      List<Topic> topicList=new ArrayList<Topic>();
+      Set<Topic> responseList=new HashSet<Topic>();
+      Map<String,Double> topicMap=new HashMap<String,Double>();
+      List<String> textList=splitEqually(text,1500);
+      for(String textChunk:textList){
+    	  try {
+				urlString = "http://localhost:8000/?query="+URLEncoder.encode(textChunk,"UTF-8");
+				responseList=ParseHtml.parse(urlString);
+    	  } catch (UnsupportedEncodingException e) {
 				// TODO Auto-generated catch block
 				e.printStackTrace();
-			}
-	     list.addAll(topicList);
-      	Collections.sort(list,new TopicComparator());
-
-          return list;
-   }  
+    	  }
+    	 for(Topic topic:responseList){ 
+    		 if(topicMap.containsKey(topic.topicName)){
+    			 Double existingProbability=topicMap.get(topic.topicName);
+    			 if(topic.probability>existingProbability){
+    				topicMap.put(topic.topicName, topic.probability);
+    			 }
+    		 }
+    		 else{
+    			 topicMap.put(topic.topicName, topic.probability);
+    		 }
+    	}
+
+      }
+      //convert topic map to topic list
+      for (Map.Entry<String, Double> entry : topicMap.entrySet())
+      {
+    	  topicList.add(new Topic(entry.getKey(), entry.getValue()));
+      }
+      Collections.sort(topicList,new TopicComparator());
+      return topicList;
+   }
+
 
 } // end of class definition
diff --git a/src/TopicExtraction/WikipediaMinerAPIDriver.java b/src/TopicExtraction/WikipediaMinerAPIDriver.java
@@ -11,6 +11,7 @@ public static void main(String[] args) {
 		String string=sc.nextLine();
 		System.out.println("Detected topics and their probability");
 		List<Topic> list=WikipediaMinerAPI.getTopics(string);
+		System.out.println("Number of topics found :"+list.size());
 		for(Topic topic:list){
 			System.out.println(topic.topicName+" "+topic.probability);
 

diff --git a/src/Utility/MiscellaneousHelper.java b/src/Utility/MiscellaneousHelper.java
@@ -0,0 +1,21 @@
+package Utility;
+
+import java.util.Random;
+
+public class MiscellaneousHelper {
+	//returns a random number
+	//min inclusive and max exclusive
+	public static int getRandomNumber(int min, int max) {
+
+	    // NOTE: Usually this should be a field rather than a method
+	    // variable so that it is not re-seeded every call.
+
+	    Random rand = new Random();
+
+	    // nextInt is normally exclusive of the top value,
+	    // so add 1 to make it inclusive
+	    int randomNum = rand.nextInt((max - min) + 1) + min;
+
+	    return randomNum;
+	}
+}
diff --git a/src/Utility/VideoClipper.java b/src/Utility/VideoClipper.java
@@ -13,6 +13,7 @@ public class VideoClipper {
 	public static ArrayList<String> ClipVideo()
 	{
 		ArrayList<String> paragraph =new ArrayList<String>();
+
 		try {
 			Process p = Runtime.getRuntime().exec("python"+" "+Configuration.SUMMARY_PYTHON_SCRIPT_PATH);
 			BufferedReader in = new BufferedReader(new InputStreamReader(p.getInputStream()));
@@ -29,12 +30,12 @@ public static ArrayList<String> ClipVideo()
 			 System.out.println("Start time:"+start_time);
 			 System.out.println("End time:"+end_time);
 
-			 p=Runtime.getRuntime().exec("python"+" "+Configuration.VIDEO_CROPPER_SCRIPT_PATH+" "+start_time+" "+end_time);
+			/* p=Runtime.getRuntime().exec("python"+" "+Configuration.VIDEO_CROPPER_SCRIPT_PATH+" "+start_time+" "+end_time);
 			BufferedReader inn=new BufferedReader(new InputStreamReader(p.getInputStream()));
 			 while((clippingStatus = inn.readLine())!=null){
 				 	System.out.println(clippingStatus);
 				 	
-			 }
+			 }*/
 			 return paragraph;
 		} 
 

diff --git a/src/distractorgeneration/DistractorFilter.java b/src/distractorgeneration/DistractorFilter.java
@@ -14,15 +14,39 @@
 
 public class DistractorFilter {
 	public static Set<String> filterWords=new HashSet<String>();
-	public static List<String> applyFiltersToDistractorList(String resolvedAnswerPhrase,String originalAnswerPhrase,List<String> distractorList){
+	public static List<String> applyFiltersToDistractorList(String resolvedAnswerPhrase,String answerSentence,List<String> distractorList){
 		List<String> removedList=new ArrayList<String>();
 
 		//Filter 1:
 		//converting all words in answerPhrase to lowercase
-		originalAnswerPhrase=originalAnswerPhrase.toLowerCase();
-		originalAnswerPhrase = originalAnswerPhrase.replaceAll("[!?,]", "");
-		String[] strs = originalAnswerPhrase.split("\\s+");
-		filterWords=new HashSet<String>(Arrays.asList(strs));
+		answerSentence=answerSentence.toLowerCase();
+		answerSentence = answerSentence.replaceAll("[!?,]", "");
+	//	System.out.println(answerSentence);
+		String[] strs = answerSentence.split("\\s+");
+	//	System.out.println(strs);
+		int maximumDistractorWordCount = 1;
+		for(String str:distractorList){
+			maximumDistractorWordCount=Math.max(maximumDistractorWordCount, str.split("\\s+").length);
+		}
+
+		System.out.println("Filter words for answerSentence :"+answerSentence);
+		System.out.println("maximumDistractorWordCount :"+maximumDistractorWordCount);
+		for(int i=0;i<strs.length;i++){
+			String filterWord="";
+			int spaceRequired=0;
+			for(int j=i;j<i+maximumDistractorWordCount&&j<strs.length;j++){
+				if(spaceRequired==0){
+					spaceRequired=1;
+				}
+				else{
+					filterWord+=" ";
+				}
+				filterWord+=strs[j];
+				//filterWord=filterWord.trim();
+			//	System.out.println("w:"+filterWord+":w");
+				filterWords.add(filterWord);
+			}
+		}
 
 		//Filter 2:
 		//also remove the stemmed word of the resolvedAnswerPhrase
@@ -36,17 +60,23 @@ public static List<String> applyFiltersToDistractorList(String resolvedAnswerPhr
 		// B cannot be distractor and thus should be added to filterWords list
 		// a) if A's synonym set contain B
 		// b) intersection of A's synonym set and B's synonym set is not null
-		String sstOfResolvedAnswerPhrase = SuperSenseTagHelper.getSSTForGivenWord(Configuration.INPUT_FILE_PATH+Configuration.INPUT_FILE_NAME,resolvedAnswerPhrase);
-		Set<String> synonymsOfResolvedAnswerPhrase=new HashSet<String>(WordNetPythonAPI.getResponse("synonym", resolvedAnswerPhrase,sstOfResolvedAnswerPhrase));
-		filterWords.addAll(synonymsOfResolvedAnswerPhrase);
-		//the following for loop is for filter 3 subtask b
-		for(String distractor:distractorList){
-			String sstOfDistractor=SuperSenseTagHelper.getSSTForGivenWord(Configuration.INPUT_FILE_PATH+Configuration.INPUT_FILE_NAME,distractor);
-			List<String> distractorSynonyms = WordNetPythonAPI.getResponse("synonym",distractor,sstOfDistractor);
-			for(String str:distractorSynonyms){
-				if(synonymsOfResolvedAnswerPhrase.contains(str)){
-					filterWords.add(distractor);
-					break;
+		if(Configuration.INPUT_FILE_NAME==null){
+			System.out.println("Input file name is missing .Cannot use filter 2");
+
+		}
+		else{
+			String sstOfResolvedAnswerPhrase = SuperSenseTagHelper.getSSTForGivenWord(Configuration.INPUT_FILE_PATH+Configuration.INPUT_FILE_NAME,resolvedAnswerPhrase);
+			Set<String> synonymsOfResolvedAnswerPhrase=new HashSet<String>(WordNetPythonAPI.getResponse("synonym", resolvedAnswerPhrase,sstOfResolvedAnswerPhrase));
+			filterWords.addAll(synonymsOfResolvedAnswerPhrase);
+			//the following for loop is for filter 3 subtask b
+			for(String distractor:distractorList){
+				String sstOfDistractor=SuperSenseTagHelper.getSSTForGivenWord(Configuration.INPUT_FILE_PATH+Configuration.INPUT_FILE_NAME,distractor);
+				List<String> distractorSynonyms = WordNetPythonAPI.getResponse("synonym",distractor,sstOfDistractor);
+				for(String str:distractorSynonyms){
+					if(!str.equals("NO_RESPONSE")&&synonymsOfResolvedAnswerPhrase.contains(str)){
+						filterWords.add(distractor);
+						break;
+					}
 				}
 			}
 		}
@@ -55,8 +85,21 @@ public static List<String> applyFiltersToDistractorList(String resolvedAnswerPhr
 			distractorList.set(i,distractorList.get(i).toLowerCase());
 		}
 		removedList.addAll(distractorList);
+
 		removedList.removeAll(filterWords);
-
+	/*	System.out.println("Before filtering");
+		for(String str:removedList){
+			System.out.println(str);
+		}
+		System.out.println("Filter words ");
+		for(String str:filterWords){
+			System.out.println(str);
+		}
+		System.out.println("After filtering");
+		for(String str:removedList){
+			System.out.println(str);
+		}
+		*/
 		return removedList;
 	}
 
@@ -70,9 +113,13 @@ public static List<String> removeSSTDistractorsFromPOSDistractorList(List<String
 	}
 	public static void main(String[] args) {
 		List<String> list=new ArrayList<String>();
-		list.add("member");
-		list.add("Carpenter");
-		list=applyFiltersToDistractorList("farmer","an excellent farmer and carpenter", list);
+		list.add("Hardin County");
+		list.add("vellore");
+		list.add("chennai");
+		list.add("lincoln");
+		list=applyFiltersToDistractorList("Abraham","Abraham Lincoln was born on February 12 , 1809 , in Hardin County , Kentucky , to Thomas and Nancy Lincoln in their one roomlog", list);
+		System.out.println();
+		System.out.println();
 		System.out.println("After removing:");
 		for(String word:list)
 			System.out.println(word);

diff --git a/src/edu/cmu/ark/AnalysisUtilities.java b/src/edu/cmu/ark/AnalysisUtilities.java
@@ -320,7 +320,6 @@ public ParseResult parseSentence(String sentence) {
 			System.out.println("Result of parser.parse(sentence)"+parser.parse(sentence));
 			if(parser.parse(sentence)){
 				parse = parser.getBestParse();
-				System.out.println("This fucking block was executed");
 				//remove all the parent annotations (this is a hacky way to do it)
 				String ps = parse.toString().replaceAll("\\[[^\\]]+/[^\\]]+\\]", "");
 				System.out.println("ps="+ps);
@@ -332,7 +331,6 @@ public ParseResult parseSentence(String sentence) {
 		}catch(Exception e){
 			System.out.println("Exception-"+e);
 		}
-		System.out.println("This fucking block was executed-Exception"); //this is getting executed  :(
 		parse = readTreeFromString("(ROOT (. .))");
         parseScore = -99999.0;
         return new ParseResult(false, parse, parseScore);