In [135]:
import json
import ast
import numpy as np
import pandas as pd
import nltk
import spacy
from spacy import displacy
from collections import Counter
import random;random.seed(1)
import en_core_web_sm
nlp = en_core_web_sm.load()
# If your application will benefit from a large vocabulary with more vectors, you should consider using 
# one of the larger models or loading in a full vector package, for example, en_vectors_web_lg, which 
# includes over 1 million unique vectors.

In [2]:
with open('squad-dev-v1.1.json', 'r') as f:
    df = json.load(f)

#### Test on first two paragraphs of Super Bowl

In [132]:
print(df['data'][0]['title'])
p_title = df['data'][0]['title']
print('Number of paragraphs: ' + str(len(df['data'][0]['paragraphs'])) + '\n')
print('Paragraph 1: ' + str(df['data'][0]['paragraphs'][0]['context'])+ '\n')
p1 = df['data'][0]['paragraphs'][0]['context']
print('P1 Question 1: ' + str(df['data'][0]['paragraphs'][0]['qas'][0]['question'])+ '\n')
p1_q = df['data'][0]['paragraphs'][0]['qas'][0]['question']
print('P1 Answer 1: ' + str(df['data'][0]['paragraphs'][0]['qas'][0]['answers'])+ '\n')
p1_a = df['data'][0]['paragraphs'][0]['qas'][0]['answers']
print('Paragraph 2: ' + str(df['data'][0]['paragraphs'][1]['context'])+ '\n')
p2 = df['data'][0]['paragraphs'][1]['context']
print('P2 Question 1: ' + str(df['data'][0]['paragraphs'][1]['qas'][0]['question'])+ '\n')
p2_q = df['data'][0]['paragraphs'][1]['qas'][0]['question']
print('P2 Answer 1: ' + str(df['data'][0]['paragraphs'][1]['qas'][0]['answers'])+ '\n')
p2_a = df['data'][0]['paragraphs'][1]['qas'][0]['answers']

Super_Bowl_50
Number of paragraphs: 54

Paragraph 1: Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the "golden anniversary" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as "Super Bowl L"), so that the logo could prominently feature the Arabic numerals 50.

P1 Question 1: Which NFL team represented the AFC at Super Bowl 50?

P1 Answer 1: [{'answer_start': 177, 'text': 'Denver Broncos'}, {'answer_start': 177, 'text': 'Denver 

In [150]:
# answers are the same - take text form
p1_a = p1_a[0]['text']
p2_a = p2_a[0]['text']
print(p1_a)
print(p2_a)

Denver Broncos
Cam Newton


### Types of Answers (from SQuAD paper)
- Date
- Numeric
- Person
- Location
- Other Entity
- Common Noun Phrase
- Adjective Phrase (i.e. second-largest)
- Verb Phrase (i.e. returned to Earth)
- Clause (i.e. to avoid trivialization)
- Other (i.e. quietly)

### SpaCy - seems to have more detailed entity recognition

In [22]:
#extracting named entities
article = nlp(p1)
print('There are ' + str(len(article.ents)) + ' entities in this paragraph.')
labels = [x.label_ for x in article.ents]
print(Counter(labels))
items = [x.text for x in article.ents]
print('These are the most frequent terms: ' + str(Counter(items).most_common(3)))
print('These are entities: ' + str(article.ents))
print('\n')
print(p1)

There are 24 entities in this paragraph.
Counter({'ORG': 7, 'EVENT': 5, 'DATE': 4, 'NORP': 2, 'GPE': 2, 'PERSON': 1, 'ORDINAL': 1, 'FAC': 1, 'LOC': 1})
These are the most frequent terms: [('Super Bowl', 3), ('50', 2), ('American', 1)]
These are entities: (Super Bowl, 50, American, the National Football League, NFL, the 2015 season, The American Football Conference, AFC, Denver Broncos, the National Football Conference, Carolina Panthers, third, Super Bowl, February 7, 2016, Levi's Stadium, the San Francisco Bay Area, Santa Clara, California, the 50th Super Bowl, Super Bowl, Roman, Super Bowl L, Arabic, 50)


Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Fra

In [41]:
# see tags
ne_pd = pd.DataFrame()
ne_pd['entity'] = list(article.ents)
ne_pd['label'] = list(labels)


In [46]:
ne_pd.head(10)

Unnamed: 0,entity,label
0,"(Super, Bowl)",EVENT
1,(50),DATE
2,(American),NORP
3,"(the, National, Football, League)",ORG
4,(NFL),ORG
5,"(the, 2015, season)",DATE
6,"(The, American, Football, Conference)",ORG
7,(AFC),ORG
8,"(Denver, Broncos)",ORG
9,"(the, National, Football, Conference)",ORG


In [61]:
for token in nlp(p1_a):
    print(token.text, token.pos_, token.dep_, token.ent_type)

Denver PROPN compound 382
Broncos PROPN ROOT 0


In [79]:
for s in sent_list:
    if p1_a in s:
        print(s)

The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title.


### Generate wrong answer from same sentence

In [119]:
article = nlp(p1)
# get all named entities in paragraph
ent_list = [str(i) for i in list(article.ents)]
# get all sentences in paragraph
sent_list = [str(i) for i in list(article.sents)]
# find sentence that has answer
for s in sent_list:
    if p1_a in s:
        ans_sent = s
sentence = nlp(ans_sent)
sent_labels = [x.label_ for x in sentence.ents]
ne_pd = pd.DataFrame()
ne_pd['entity'] = [s.orth_ for s in list(sentence.ents)]
ne_pd['label'] = list(sent_labels)
# find label of answer in named entity pd
ans_label = ne_pd[ne_pd['entity']==p1_a]['label'].values[0]
alt_ans_list = list(ne_pd[ne_pd['label']==ans_label]['entity'])
#remove correct answers
alt_ans_list.remove(p1_a)

#### Alternate answers from same sentence

In [120]:
alt_ans_list

['The American Football Conference', 'AFC', 'the National Football Conference']

### Generate wrong answer from same paragraph

In [121]:
ne_pd_paragraph = pd.DataFrame()
ne_pd_paragraph['entity'] = ent_list
ne_pd_paragraph['label'] = [x.label_ for x in article.ents]
alt_ans_list2 = list(ne_pd_paragraph[ne_pd_paragraph['label']==ans_label]['entity'])
#remove correct answers
alt_ans_list2.remove(p1_a)

In [122]:
alt_ans_list2

['the National Football League',
 'NFL',
 'The American Football Conference',
 'AFC',
 'the National Football Conference',
 'Roman']

### Generate wrong answer from same topic

In [136]:
# choose random paragraph from same topic
index_list = list(range(len(df['data'][0]['paragraphs'])))
# make sure isn't the same paragraph as current paragraph
p_index = next((index for (index, d) in enumerate(df['data'][0]['paragraphs']) if d["context"] == p1), None)
print('current paragraph index: ' + str(p_index))
index_list.remove(p_index)
alt_p_index = random.choice(index_list)
print('alternate paragraph index: ' + str(alt_p_index) + '\n')
print(df['data'][0]['paragraphs'][alt_p_index]['context'])
alt_p = df['data'][0]['paragraphs'][alt_p_index]['context']

current paragraph index: 0
alternate paragraph index: 9

Despite waiving longtime running back DeAngelo Williams and losing top wide receiver Kelvin Benjamin to a torn ACL in the preseason, the Carolina Panthers had their best regular season in franchise history, becoming the seventh team to win at least 15 regular season games since the league expanded to a 16-game schedule in 1978. Carolina started the season 14–0, not only setting franchise records for the best start and the longest single-season winning streak, but also posting the best start to a season by an NFC team in NFL history, breaking the 13–0 record previously shared with the 2009 New Orleans Saints and the 2011 Green Bay Packers. With their NFC-best 15–1 regular season record, the Panthers clinched home-field advantage throughout the NFC playoffs for the first time in franchise history. Ten players were selected to the Pro Bowl (the most in franchise history) along with eight All-Pro selections.


In [137]:
alt_article = nlp(alt_p)
ne_pd_alt_p = pd.DataFrame()
ne_pd_alt_p['entity'] = [str(i) for i in list(alt_article.ents)]
ne_pd_alt_p['label'] = [x.label_ for x in alt_article.ents]
alt_ans_list3 = list(ne_pd_alt_p[ne_pd_alt_p['label']==ans_label]['entity'])

In [138]:
alt_ans_list3

['ACL',
 'the Carolina Panthers',
 'NFC',
 'NFL',
 '13–0',
 'New Orleans Saints',
 'NFC',
 'Panthers',
 'NFC']

### Generate wrong answer from different topic

In [139]:
# choose random topic
topic_list = list(range(len(df['data'])))
# make sure isn't the same topic as current topic
topic_index = next((index for (index, d) in enumerate(df['data']) if d["title"] == p_title), None)
print('current topic index: ' + str(topic_index))
topic_list.remove(topic_index)
alt_topic_index = random.choice(topic_list)
print('alternate topic index: ' + str(alt_topic_index) + '\n')
print(df['data'][alt_topic_index]['title'])
alt_topic = df['data'][alt_topic_index]['title']

current topic index: 0
alternate topic index: 37

Kenya


In [141]:
# select first paragraph from topic - option to randomize selection as well
alt_topic_article = nlp(df['data'][alt_topic_index]['paragraphs'][0]['context'])
ne_pd_alt_topic = pd.DataFrame()
ne_pd_alt_topic['entity'] = [str(i) for i in list(alt_topic_article.ents)]
ne_pd_alt_topic['label'] = [x.label_ for x in alt_topic_article.ents]
alt_ans_list4 = list(ne_pd_alt_topic[ne_pd_alt_topic['label']==ans_label]['entity'])
print(alt_ans_list4)

['the East African Community (EAC']


### View all wrong answers

In [153]:
mergedlist = alt_ans_list + alt_ans_list2 + alt_ans_list3 + alt_ans_list4
# see all unique answers
wrong_answers = set(mergedlist)

In [155]:
print('Paragraph: ' + str(p1) + '\n')
print('Question: ' + str(p1_q)+ '\n')
print('Answer: ' + str(p1_a) + '\n')
print('Wrong Answers: ' + str(random.sample(wrong_answers,3)))

Paragraph: Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the "golden anniversary" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as "Super Bowl L"), so that the logo could prominently feature the Arabic numerals 50.

Question: Which NFL team represented the AFC at Super Bowl 50?

Answer: Denver Broncos

Wrong Answers: ['the National Football Conference', 'The American Football Conference', 'NFL']


## Generate Wrong Answer Function for Named Entities and Numerical Answer

In [208]:
def generate_distractor_ne(topic, paragraph, question, correct_answer):
    # tokenize paragraph
    article = nlp(paragraph)
    # get all named entities in paragraph
    ent_list = [str(i) for i in list(article.ents)]
    # get all sentences in paragraph
    sent_list = [str(i) for i in list(article.sents)]
    
    ### Generate wrong answer from same sentence ###
    # find sentence that has answer
    for s in sent_list:
        if correct_answer in s:
            ans_sent = s
    # tokenize sentence
    sentence = nlp(ans_sent)
    sent_labels = [x.label_ for x in sentence.ents]
    # create table of named entities for sentence
    ne_pd = pd.DataFrame()
    ne_pd['entity'] = [s.orth_ for s in list(sentence.ents)]
    ne_pd['label'] = list(sent_labels)
    ans_label = ne_pd[ne_pd['entity']==correct_answer]['label'].values[0]
    alt_ans_list = list(ne_pd[ne_pd['label']==ans_label]['entity'])
    #remove correct answers
    alt_ans_list.remove(correct_answer)
    
    ### Generate wrong answer from same paragraph ###
    # create table of named entities for paragraph
    ne_pd_paragraph = pd.DataFrame()
    ne_pd_paragraph['entity'] = ent_list
    ne_pd_paragraph['label'] = [x.label_ for x in article.ents]
    alt_ans_list2 = list(ne_pd_paragraph[ne_pd_paragraph['label']==ans_label]['entity'])
    #remove correct answers
    alt_ans_list2.remove(correct_answer)
    
    
    ### Generate wrong answer from different topic ###
    # choose random topic
    topic_list = list(range(len(df['data'])))
    # make sure isn't the same topic as current topic
    topic_index = next((index for (index, d) in enumerate(df['data']) if d["title"] == topic), None)
    #print('current topic index: ' + str(topic_index))
    topic_list.remove(topic_index)
    alt_topic_index = random.choice(topic_list)
    #print('alternate topic index: ' + str(alt_topic_index) + '\n')
    #print(df['data'][alt_topic_index]['title'])
    alt_topic = df['data'][alt_topic_index]['title']
    # select first paragraph from topic - option to randomize selection as well
    alt_topic_article = nlp(df['data'][alt_topic_index]['paragraphs'][0]['context'])
    # create table of named entities for alternative topic paragraph
    ne_pd_alt_topic = pd.DataFrame()
    ne_pd_alt_topic['entity'] = [str(i) for i in list(alt_topic_article.ents)]
    ne_pd_alt_topic['label'] = [x.label_ for x in alt_topic_article.ents]
    alt_ans_list4 = list(ne_pd_alt_topic[ne_pd_alt_topic['label']==ans_label]['entity'])
    #print(alt_ans_list4)
    
    ### Generate wrong answer from same topic ###
    # choose random paragraph from same topic
    index_list = list(range(len(df['data'][topic_index]['paragraphs'])))
    # make sure isn't the same paragraph as current paragraph
    p_index = next((index for (index, d) in enumerate(df['data'][topic_index]['paragraphs'])\
                    if d["context"] == paragraph), None)
    #print('current paragraph index: ' + str(p_index))
    index_list.remove(p_index)
    alt_p_index = random.choice(index_list)
    #print('alternate paragraph index: ' + str(alt_p_index) + '\n')
    #print(df['data'][topic_index]['paragraphs'][alt_p_index]['context'])
    alt_p = df['data'][topic_index]['paragraphs'][alt_p_index]['context']
    alt_article = nlp(alt_p)
    # create table of named entities for alternate paragraph of same topic
    ne_pd_alt_p = pd.DataFrame()
    ne_pd_alt_p['entity'] = [str(i) for i in list(alt_article.ents)]
    ne_pd_alt_p['label'] = [x.label_ for x in alt_article.ents]
    alt_ans_list3 = list(ne_pd_alt_p[ne_pd_alt_p['label']==ans_label]['entity'])
    
    
    mergedlist = alt_ans_list + alt_ans_list2 + alt_ans_list3 + alt_ans_list4
    # see all unique answers
    wrong_answers = set(mergedlist)
    return (topic, paragraph, question, correct_answer, random.sample(wrong_answers,3))


In [199]:
topic, paragraph, question, answer, wrong_answer = generate_distractor(p_title, p1, p1_q, p1_a)
print('Topic: ' + str(topic) + '\n')
print('Paragraph: ' + str(paragraph) + '\n')
print('Question: ' + str(question)+ '\n')
print('Answer: ' + str(answer) + '\n')
print('Wrong Answers: ' + str(wrong_answer))

Topic: Super_Bowl_50

Paragraph: Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the "golden anniversary" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as "Super Bowl L"), so that the logo could prominently feature the Arabic numerals 50.

Question: Which NFL team represented the AFC at Super Bowl 50?

Answer: Denver Broncos

Wrong Answers: ['Mongols', 'The American Football Conference', 'the National Football League']


In [215]:
#### Test Example for numerical answer
test_num_topic = df['data'][1]['title']
test_num = df['data'][1]['paragraphs'][5]['context']
test_num_q = df['data'][1]['paragraphs'][5]['qas'][0]['question']
test_num_a = df['data'][1]['paragraphs'][5]['qas'][0]['answers'][0]['text']
print('Correct Answer: ' + str(df['data'][1]['paragraphs'][5]['qas'][0]['answers'][0]['text']))

Correct Answer: 1816


In [200]:
topic, paragraph, question, answer, wrong_answer = generate_distractor(test_num_topic, test_num, test_num_q, test_num_a)
print('Topic: ' + str(topic) + '\n')
print('Paragraph: ' + str(paragraph) + '\n')
print('Question: ' + str(question)+ '\n')
print('Answer: ' + str(answer) + '\n')
print('Wrong Answers: ' + str(wrong_answer))

Topic: Warsaw

Paragraph: Another important library – the University Library, founded in 1816, is home to over two million items. The building was designed by architects Marek Budzyński and Zbigniew Badowski and opened on 15 December 1999. It is surrounded by green. The University Library garden, designed by Irena Bajerska, was opened on 12 June 2002. It is one of the largest and most beautiful roof gardens in Europe with an area of more than 10,000 m2 (107,639.10 sq ft), and plants covering 5,111 m2 (55,014.35 sq ft). As the university garden it is open to the public every day.

Question: When was the University Library founded?

Answer: 1816

Wrong Answers: ['15 December 1999', 'the year 1313', '1815']


  result = method(y)


## Generate Wrong Answer Function for Noun Phrases

In [284]:
def generate_distractor_np(topic, paragraph, question, correct_answer):
    # get length of answer
    correct_answer_len = len(correct_answer)
    print('correct answer: ' + str(correct_answer))
    print('length: ' + str(correct_answer_len))
    # tokenize paragraph
    article = nlp(paragraph)
    # get all named entities in paragraph
    np_list = [str(i) for i in list(article.noun_chunks)]
    # get all sentences in paragraph
    sent_list = [str(i) for i in list(article.sents)]
    
    ### Generate wrong answer from same sentence ###
    # find sentence that has answer
    print(correct_answer)
    for s in sent_list:
        if correct_answer in s:
            ans_sent = s
    # tokenize sentence
    sentence = nlp(ans_sent)
    sent_np_list = [s.orth_ for s in list(sentence.noun_chunks)]
    try:
        sent_np_list.remove(correct_answer)
        sent_np_list = [noun for noun in sent_np_list if len(noun)==correct_answer_len]
    except:
        pass
    
    ### Generate wrong answer from same paragraph ###
    try:
        np_list.remove(correct_answer)
        np_list = [noun for noun in np_list if len(noun)==correct_answer_len]
    except:
        pass
    
    ### Generate wrong answer from different topic ###
    # choose random topic
    topic_list = list(range(len(df['data'])))
    # make sure isn't the same topic as current topic
    topic_index = next((index for (index, d) in enumerate(df['data']) if d["title"] == topic), None)
    topic_list.remove(topic_index)
    alt_topic_index = random.choice(topic_list)
    alt_topic = df['data'][alt_topic_index]['title']
    # select first paragraph from topic - option to randomize selection as well
    alt_topic_article = nlp(df['data'][alt_topic_index]['paragraphs'][0]['context'])
    # get all named entities in paragraph
    alt_topic_np_list = [str(i) for i in list(alt_topic_article.noun_chunks)]
    try:
        alt_topic_np_list.remove(correct_answer)
        alt_topic_np_list = [noun for noun in alt_topic_np_list if len(noun)==correct_answer_len]
    except:
        pass
    
    ### Generate wrong answer from same topic ###
    # choose random paragraph from same topic
    index_list = list(range(len(df['data'][topic_index]['paragraphs'])))
    # make sure isn't the same paragraph as current paragraph
    p_index = next((index for (index, d) in enumerate(df['data'][topic_index]['paragraphs'])\
                    if d["context"] == paragraph), None)
    index_list.remove(p_index)
    alt_p_index = random.choice(index_list)
    alt_p = df['data'][topic_index]['paragraphs'][alt_p_index]['context']
    alt_article = nlp(alt_p)
    # create table of named entities for alternate paragraph of same topic
    alt_article_np_list = [str(i) for i in list(alt_article.noun_chunks)]
    try:
        alt_article_np_list.remove(correct_answer)
        alt_article_np_list = [noun for noun in alt_article_np_list if len(noun)==correct_answer_len]
    except:
        pass
    
    
    mergedlist = sent_np_list+np_list+alt_topic_np_list+alt_article_np_list
    # see all unique answers
    wrong_answers = set(mergedlist)
    return (topic, paragraph, question, correct_answer, random.sample(wrong_answers,3))
    

In [272]:
#### Test Example for noun phrase answer - short phrase
test_np_topic = df['data'][2]['title']
test_np = df['data'][2]['paragraphs'][21]['context']
test_np_q = df['data'][2]['paragraphs'][21]['qas'][0]['question']
test_np_a = df['data'][2]['paragraphs'][21]['qas'][0]['answers'][0]['text']
print('Correct Answer: ' + str(test_np_a))

Correct Answer: Modern English


In [277]:
topic, paragraph, question, answer, wrong_answer = generate_distractor_np(test_np_topic, test_np, test_np_q, test_np_a)
print('Topic: ' + str(topic) + '\n')
print('Paragraph: ' + str(paragraph) + '\n')
print('Question: ' + str(question)+ '\n')
print('Answer: ' + str(answer) + '\n')
print('Wrong Answers: ' + str(wrong_answer))

correct answer: Modern English
length: 14
Modern English
Topic: Normans

Paragraph: Eventually, the Normans merged with the natives, combining languages and traditions. In the course of the Hundred Years' War, the Norman aristocracy often identified themselves as English. The Anglo-Norman language became distinct from the Latin language, something that was the subject of some humour by Geoffrey Chaucer. The Anglo-Norman language was eventually absorbed into the Anglo-Saxon language of their subjects (see Old English) and influenced it, helping (along with the Norse language of the earlier Anglo-Norse settlers and the Latin used by the church) in the development of Middle English. It in turn evolved into Modern English.

Question: What was the Anglo-Norman language's final form?

Answer: Modern English

Wrong Answers: ['the history', 'English- and German-speaking countries', 'It']


In [278]:
#### Test Example for noun phrase answer - long phrase
test_np2_topic = df['data'][3]['title']
test_np2 = df['data'][3]['paragraphs'][21]['context']
test_np2_q = df['data'][3]['paragraphs'][21]['qas'][1]['question']
test_np2_a = df['data'][3]['paragraphs'][21]['qas'][1]['answers'][0]['text']
print('Correct Answer: ' + str(test_np2_a))

Correct Answer: $60,000 in cash and stock and a royalty of $2.50 per AC horsepower produced by each motor


In [285]:
topic, paragraph, question, answer, wrong_answer = generate_distractor_np(test_np2_topic, test_np2, test_np2_q, test_np2_a)
print('Topic: ' + str(topic) + '\n')
print('Paragraph: ' + str(paragraph) + '\n')
print('Question: ' + str(question)+ '\n')
print('Answer: ' + str(answer) + '\n')
print('Wrong Answers: ' + str(wrong_answer))

correct answer: $60,000 in cash and stock and a royalty of $2.50 per AC horsepower produced by each motor
length: 89
$60,000 in cash and stock and a royalty of $2.50 per AC horsepower produced by each motor
Topic: Nikola_Tesla

Paragraph: In July 1888, Brown and Peck negotiated a licensing deal with George Westinghouse for Tesla's polyphase induction motor and transformer designs for $60,000 in cash and stock and a royalty of $2.50 per AC horsepower produced by each motor. Westinghouse also hired Tesla for one year for the large fee of $2,000 ($52,700 in today's dollars) per month to be a consultant at the Westinghouse Electric & Manufacturing Company's Pittsburgh labs.

Question: How much did Westinghouse pay to license Tesla's designs?

Answer: $60,000 in cash and stock and a royalty of $2.50 per AC horsepower produced by each motor

Wrong Answers: ['cash', "Tesla's polyphase induction", 'its roots']


In [218]:
#### Test Example for adjective phrase answer
test_adj_topic = df['data'][30]['title']
test_adj = df['data'][30]['paragraphs'][1]['context']
test_adj_q = df['data'][30]['paragraphs'][1]['qas'][1]['question']
test_adj_a = df['data'][30]['paragraphs'][1]['qas'][1]['answers'][0]['text']
print('Correct Answer: ' + str(test_adj_a))

Correct Answer: 'tuition-free


In [219]:
topic, paragraph, question, answer, wrong_answer = generate_distractor(test_adj_topic, test_adj, test_adj_q, test_adj_a)
print('Topic: ' + str(topic) + '\n')
print('Paragraph: ' + str(paragraph) + '\n')
print('Question: ' + str(question)+ '\n')
print('Answer: ' + str(answer) + '\n')
print('Wrong Answers: ' + str(wrong_answer))

answer sentence: Annual tuition fees at K-12 schools range from nothing at so called 'tuition-free' schools to more than $45,000 at several New England preparatory schools.
ne_pd: 
              entity  label
0             Annual   DATE
1               K-12    ORG
2  more than $45,000  MONEY
3        New England    LOC


UnboundLocalError: local variable 'ans_label' referenced before assignment

In [220]:
#### Test Example for verb phrase answer
test_vp_topic = df['data'][25]['title']
test_vp = df['data'][25]['paragraphs'][25]['context']
test_vp_q = df['data'][25]['paragraphs'][25]['qas'][0]['question']
test_vp_a = df['data'][25]['paragraphs'][25]['qas'][0]['answers'][0]['text']
print('Correct Answer: ' + str(test_vp_a))

Correct Answer: opened the gates


In [None]:
doc = nlp(test_np)
pos_tags = [(i, i.tag_) for i in doc]
print(pos_tags)
ans_tok = nlp(test_np_a)
pos_tags_ans = [(i, i.tag_) for i in ans_tok]
print('\n')
print(pos_tags_ans)
alt_ans = [(i, i.tag_) for i in doc if i.tag_ in set(list(j[1] for j in pos_tags_ans))]
print(alt_ans)

In [221]:
topic, paragraph, question, answer, wrong_answer = generate_distractor(test_adj_topic, test_adj, test_adj_q, test_adj_a)
print('Topic: ' + str(topic) + '\n')
print('Paragraph: ' + str(paragraph) + '\n')
print('Question: ' + str(question)+ '\n')
print('Answer: ' + str(answer) + '\n')
print('Wrong Answers: ' + str(wrong_answer))

answer sentence: Annual tuition fees at K-12 schools range from nothing at so called 'tuition-free' schools to more than $45,000 at several New England preparatory schools.
ne_pd: 
              entity  label
0             Annual   DATE
1               K-12    ORG
2  more than $45,000  MONEY
3        New England    LOC


UnboundLocalError: local variable 'ans_label' referenced before assignment