Skip to content

Commit

Permalink
added logic for videoquestion in questionasker
Browse files Browse the repository at this point in the history
  • Loading branch information
narain2804 committed Mar 28, 2015
2 parents 91b09e8 + 9a57af4 commit bbacb49
Show file tree
Hide file tree
Showing 21 changed files with 357 additions and 76 deletions.
24 changes: 24 additions & 0 deletions README.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,27 @@
-------------------------------------------------------------------------------------------------------
To make the distractor generation to work, you have to start the following servers
The following 1) and 2) can be started via eclipse
1) POSTagger sever running in port 8080
2) Supersense tagger running in port 8081

The following 3) should be started from the command line
3) Python wordnet server running in port 8030
U need bottle framework for that
Install the latest stable release with "sudo pip install bottle "," easy_install -U bottle"
Run it using the command
:~/workspace/QuestionGeneration$ cd pythonscripts/
:~/workspace/QuestionGeneration/pythonscripts$ python python_wordnet_server.py

******

To make the wikipedia topic extraction to work
You need nodejs and related dependencies

and then run the test.js file
using command : phantomjs test.js

******
-------------------------------------------------------------------------------------------------------
Question Generation via Overgenerating Transformations and Ranking
Michael Heilman and Noah A. Smith

Expand Down
9 changes: 7 additions & 2 deletions pythonscripts/Summary/getsummary.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@

count =0
inputText =" "

f= open("/home/narain/workspace/questiongeneration/summarycontent.txt","wb")
g= open("/home/narain/workspace/questiongeneration/transcripttext.txt","wb")
with open("/home/narain/workspace/questiongeneration/transcript.json") as json_file:
json_data = json.load(json_file)

Expand All @@ -27,6 +28,7 @@
#print "After expanding contractions:\n",content
content = regex(content)
#print "After processing: \n",content
g.write(content)
st = SummaryTool()
sentences_dic = st.get_senteces_ranks(content)
summary = st.get_distractors(content,sentences_dic,1)
Expand All @@ -44,6 +46,8 @@
inputText = paragraph[count-1]
break
#print "paragraph:",inputText
inputText = firstToThirdPerson(inputText)

outputList = []
tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
sentences = tokenizer.tokenize(inputText)
Expand All @@ -55,7 +59,8 @@
# print "Less than 55"
outputList.append(sentence)
for sentence in outputList:
print sentence
f.write(sentence)
#print sentence



2 changes: 1 addition & 1 deletion pythonscripts/Summary/personchange.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
I:The author
I:the speaker
my:his
is:was
are:were
Expand Down
2 changes: 1 addition & 1 deletion pythonscripts/Summary/regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,4 +66,4 @@ def replace(match):

text = " So what I did during my job is grow plants in the greenhouse, different ones, different milkweeds. Some were toxic, including the tropical milkweed, with very high concentrations of these cardenolides. And some were not toxic. And then I fed them to monarchs. Some of the monarchs were healthy. They had no disease. But some of the monarchs were sick, and what I found is that some of these milkweeds are medicinal, meaning they reduce the disease symptoms in the monarch butterflies, meaning these monarchs can live longer when they are infected when feeding on these medicinal plants."

print firstToThirdPerson(text)
#print firstToThirdPerson(text)
Binary file modified pythonscripts/Summary/regex.pyc
Binary file not shown.
Binary file modified pythonscripts/Summary/summary.pyc
Binary file not shown.
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ public static void generateMatchTheDefinitionQuestion(String inputFilePath){
*/

public static void main(String[] args) {
String fileName="earthquake.txt";
String fileName="input.txt";
populateTagMap("/home/vishnu/workspace/QuestionGeneration/"+fileName);
generateMatchTheSynonymQuestion("/home/vishnu/workspace/QuestionGeneration/"+fileName);
generateMatchTheDefinitionQuestion("/home/vishnu/workspace/QuestionGeneration/"+fileName);
Expand Down
2 changes: 1 addition & 1 deletion src/TopicExtraction/ParseHtml.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ public static Set<Topic> parse(String url) {
Element topicList = topicDiv.select("ul#topics").first();
Elements topics = topicList.select("li");
for(Element topic:topics){
System.out.println(topic.text()+" "+topic.select("a").attr("linkprob"));
// System.out.println(topic.text()+" "+topic.select("a").attr("linkprob"));
list.add(new Topic(topic.text(),Double.valueOf(topic.select("a").attr("linkprob"))));
}
return list;
Expand Down
60 changes: 46 additions & 14 deletions src/TopicExtraction/WikipediaMinerAPI.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;


Expand All @@ -23,25 +25,55 @@ public int compare(Topic o1, Topic o2) {
}
public class WikipediaMinerAPI {


public static List<String> splitEqually(String text, int size) {
// Give the list the right capacity to start with. You could use an array
// instead if you wanted.
List<String> ret = new ArrayList<String>((text.length() + size - 1) / size);

for (int start = 0; start < text.length(); start += size) {
ret.add(text.substring(start, Math.min(text.length(), start + size)));
}
return ret;

}
//@input list of words
//@output probability of occurrence of that words
public static List<Topic> getTopics (String text) {
public static List<Topic> getTopics (String text) {

String urlString="";
Set<Topic> topicList=new HashSet<Topic>();
List<Topic> list=new ArrayList<Topic>();
try {
urlString = "http://localhost:8000/?query="+URLEncoder.encode(text,"UTF-8");
topicList=ParseHtml.parse(urlString);
} catch (UnsupportedEncodingException e) {
List<Topic> topicList=new ArrayList<Topic>();
Set<Topic> responseList=new HashSet<Topic>();
Map<String,Double> topicMap=new HashMap<String,Double>();
List<String> textList=splitEqually(text,1500);
for(String textChunk:textList){
try {
urlString = "http://localhost:8000/?query="+URLEncoder.encode(textChunk,"UTF-8");
responseList=ParseHtml.parse(urlString);
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
list.addAll(topicList);
Collections.sort(list,new TopicComparator());

return list;
}
}
for(Topic topic:responseList){
if(topicMap.containsKey(topic.topicName)){
Double existingProbability=topicMap.get(topic.topicName);
if(topic.probability>existingProbability){
topicMap.put(topic.topicName, topic.probability);
}
}
else{
topicMap.put(topic.topicName, topic.probability);
}
}

}
//convert topic map to topic list
for (Map.Entry<String, Double> entry : topicMap.entrySet())
{
topicList.add(new Topic(entry.getKey(), entry.getValue()));
}
Collections.sort(topicList,new TopicComparator());
return topicList;
}


} // end of class definition
1 change: 1 addition & 0 deletions src/TopicExtraction/WikipediaMinerAPIDriver.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ public static void main(String[] args) {
String string=sc.nextLine();
System.out.println("Detected topics and their probability");
List<Topic> list=WikipediaMinerAPI.getTopics(string);
System.out.println("Number of topics found :"+list.size());
for(Topic topic:list){
System.out.println(topic.topicName+" "+topic.probability);

Expand Down
21 changes: 21 additions & 0 deletions src/Utility/MiscellaneousHelper.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package Utility;

import java.util.Random;

public class MiscellaneousHelper {
//returns a random number
//min inclusive and max exclusive
public static int getRandomNumber(int min, int max) {

// NOTE: Usually this should be a field rather than a method
// variable so that it is not re-seeded every call.

Random rand = new Random();

// nextInt is normally exclusive of the top value,
// so add 1 to make it inclusive
int randomNum = rand.nextInt((max - min) + 1) + min;

return randomNum;
}
}
5 changes: 3 additions & 2 deletions src/Utility/VideoClipper.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ public class VideoClipper {
public static ArrayList<String> ClipVideo()
{
ArrayList<String> paragraph =new ArrayList<String>();

try {
Process p = Runtime.getRuntime().exec("python"+" "+Configuration.SUMMARY_PYTHON_SCRIPT_PATH);
BufferedReader in = new BufferedReader(new InputStreamReader(p.getInputStream()));
Expand All @@ -29,12 +30,12 @@ public static ArrayList<String> ClipVideo()
System.out.println("Start time:"+start_time);
System.out.println("End time:"+end_time);

p=Runtime.getRuntime().exec("python"+" "+Configuration.VIDEO_CROPPER_SCRIPT_PATH+" "+start_time+" "+end_time);
/* p=Runtime.getRuntime().exec("python"+" "+Configuration.VIDEO_CROPPER_SCRIPT_PATH+" "+start_time+" "+end_time);
BufferedReader inn=new BufferedReader(new InputStreamReader(p.getInputStream()));
while((clippingStatus = inn.readLine())!=null){
System.out.println(clippingStatus);
}
}*/
return paragraph;
}

Expand Down
87 changes: 67 additions & 20 deletions src/distractorgeneration/DistractorFilter.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,39 @@

public class DistractorFilter {
public static Set<String> filterWords=new HashSet<String>();
public static List<String> applyFiltersToDistractorList(String resolvedAnswerPhrase,String originalAnswerPhrase,List<String> distractorList){
public static List<String> applyFiltersToDistractorList(String resolvedAnswerPhrase,String answerSentence,List<String> distractorList){
List<String> removedList=new ArrayList<String>();

//Filter 1:
//converting all words in answerPhrase to lowercase
originalAnswerPhrase=originalAnswerPhrase.toLowerCase();
originalAnswerPhrase = originalAnswerPhrase.replaceAll("[!?,]", "");
String[] strs = originalAnswerPhrase.split("\\s+");
filterWords=new HashSet<String>(Arrays.asList(strs));
answerSentence=answerSentence.toLowerCase();
answerSentence = answerSentence.replaceAll("[!?,]", "");
// System.out.println(answerSentence);
String[] strs = answerSentence.split("\\s+");
// System.out.println(strs);
int maximumDistractorWordCount = 1;
for(String str:distractorList){
maximumDistractorWordCount=Math.max(maximumDistractorWordCount, str.split("\\s+").length);
}

System.out.println("Filter words for answerSentence :"+answerSentence);
System.out.println("maximumDistractorWordCount :"+maximumDistractorWordCount);
for(int i=0;i<strs.length;i++){
String filterWord="";
int spaceRequired=0;
for(int j=i;j<i+maximumDistractorWordCount&&j<strs.length;j++){
if(spaceRequired==0){
spaceRequired=1;
}
else{
filterWord+=" ";
}
filterWord+=strs[j];
//filterWord=filterWord.trim();
// System.out.println("w:"+filterWord+":w");
filterWords.add(filterWord);
}
}

//Filter 2:
//also remove the stemmed word of the resolvedAnswerPhrase
Expand All @@ -36,17 +60,23 @@ public static List<String> applyFiltersToDistractorList(String resolvedAnswerPhr
// B cannot be distractor and thus should be added to filterWords list
// a) if A's synonym set contain B
// b) intersection of A's synonym set and B's synonym set is not null
String sstOfResolvedAnswerPhrase = SuperSenseTagHelper.getSSTForGivenWord(Configuration.INPUT_FILE_PATH+Configuration.INPUT_FILE_NAME,resolvedAnswerPhrase);
Set<String> synonymsOfResolvedAnswerPhrase=new HashSet<String>(WordNetPythonAPI.getResponse("synonym", resolvedAnswerPhrase,sstOfResolvedAnswerPhrase));
filterWords.addAll(synonymsOfResolvedAnswerPhrase);
//the following for loop is for filter 3 subtask b
for(String distractor:distractorList){
String sstOfDistractor=SuperSenseTagHelper.getSSTForGivenWord(Configuration.INPUT_FILE_PATH+Configuration.INPUT_FILE_NAME,distractor);
List<String> distractorSynonyms = WordNetPythonAPI.getResponse("synonym",distractor,sstOfDistractor);
for(String str:distractorSynonyms){
if(synonymsOfResolvedAnswerPhrase.contains(str)){
filterWords.add(distractor);
break;
if(Configuration.INPUT_FILE_NAME==null){
System.out.println("Input file name is missing .Cannot use filter 2");

}
else{
String sstOfResolvedAnswerPhrase = SuperSenseTagHelper.getSSTForGivenWord(Configuration.INPUT_FILE_PATH+Configuration.INPUT_FILE_NAME,resolvedAnswerPhrase);
Set<String> synonymsOfResolvedAnswerPhrase=new HashSet<String>(WordNetPythonAPI.getResponse("synonym", resolvedAnswerPhrase,sstOfResolvedAnswerPhrase));
filterWords.addAll(synonymsOfResolvedAnswerPhrase);
//the following for loop is for filter 3 subtask b
for(String distractor:distractorList){
String sstOfDistractor=SuperSenseTagHelper.getSSTForGivenWord(Configuration.INPUT_FILE_PATH+Configuration.INPUT_FILE_NAME,distractor);
List<String> distractorSynonyms = WordNetPythonAPI.getResponse("synonym",distractor,sstOfDistractor);
for(String str:distractorSynonyms){
if(!str.equals("NO_RESPONSE")&&synonymsOfResolvedAnswerPhrase.contains(str)){
filterWords.add(distractor);
break;
}
}
}
}
Expand All @@ -55,8 +85,21 @@ public static List<String> applyFiltersToDistractorList(String resolvedAnswerPhr
distractorList.set(i,distractorList.get(i).toLowerCase());
}
removedList.addAll(distractorList);

removedList.removeAll(filterWords);

/* System.out.println("Before filtering");
for(String str:removedList){
System.out.println(str);
}
System.out.println("Filter words ");
for(String str:filterWords){
System.out.println(str);
}
System.out.println("After filtering");
for(String str:removedList){
System.out.println(str);
}
*/
return removedList;
}

Expand All @@ -70,9 +113,13 @@ public static List<String> removeSSTDistractorsFromPOSDistractorList(List<String
}
public static void main(String[] args) {
List<String> list=new ArrayList<String>();
list.add("member");
list.add("Carpenter");
list=applyFiltersToDistractorList("farmer","an excellent farmer and carpenter", list);
list.add("Hardin County");
list.add("vellore");
list.add("chennai");
list.add("lincoln");
list=applyFiltersToDistractorList("Abraham","Abraham Lincoln was born on February 12 , 1809 , in Hardin County , Kentucky , to Thomas and Nancy Lincoln in their one roomlog", list);
System.out.println();
System.out.println();
System.out.println("After removing:");
for(String word:list)
System.out.println(word);
Expand Down
2 changes: 0 additions & 2 deletions src/edu/cmu/ark/AnalysisUtilities.java
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,6 @@ public ParseResult parseSentence(String sentence) {
System.out.println("Result of parser.parse(sentence)"+parser.parse(sentence));
if(parser.parse(sentence)){
parse = parser.getBestParse();
System.out.println("This fucking block was executed");
//remove all the parent annotations (this is a hacky way to do it)
String ps = parse.toString().replaceAll("\\[[^\\]]+/[^\\]]+\\]", "");
System.out.println("ps="+ps);
Expand All @@ -332,7 +331,6 @@ public ParseResult parseSentence(String sentence) {
}catch(Exception e){
System.out.println("Exception-"+e);
}
System.out.println("This fucking block was executed-Exception"); //this is getting executed :(
parse = readTreeFromString("(ROOT (. .))");
parseScore = -99999.0;
return new ParseResult(false, parse, parseScore);
Expand Down
Loading

0 comments on commit bbacb49

Please sign in to comment.