In [3]:
import nltk
from nltk.tokenize import sent_tokenize
import spacy

# TIPS
1. Delete everything inside a bracket first

In [4]:
nlp = spacy.load('en_core_web_sm')
textfile = "data/set4/a7.txt"

sentence1 = "Harry Potter and the Prisoner of Azkaban is a 2004 fantasy film directed by Alfonso Cuarón and distributed by Warner Bros."
sentence2 = "Harry Potter has been spending another unhappy summer with the Dursleys."
sentence3 = "The film was produced by La Petite Reine and ARP Sélection for 13.47 million dollars."
sentence4 = "The film costs 13.47 million dollars."

doc1 = nlp(sentence1)
doc2 = nlp(sentence2)
doc3 = nlp(sentence3)
doc4 = nlp(sentence4)


In [5]:
textfile = "data/set4/a7.txt"

text = []
with open(textfile, "r") as f:
    for line in f:
        line = line.split('. ')
        if len(line) != 0:
            temp = line[0].strip('\n')
            if len(temp) != 0:
                text.append(temp)

# POS tagging


In [6]:
def pos_tag_lst(text):
    #list of sentences
    POS_tag_dict = dict()
    for i,line in enumerate(text):
        tags = []
        doc = nlp(str(line))
        for token in doc:
            tags.append((token.text, token.pos_, token.tag_, token.dep_, token.is_stop))
        if len(tags) != 0:
            POS_tag_dict[i] = tags
    return POS_tag_dict

In [7]:
def pos_tag_sentence(sentence):
    #list of sentences
    POS_tag_dict = dict()
    text = sentence.split()
    for i,line in enumerate(text):
        tags = []
        doc = nlp(str(line))
        for token in doc:
            tags.append((token.text, token.pos_, token.tag_, token.dep_, token.is_stop, ))
        if len(tags) != 0:
            POS_tag_dict[i] = tags
    return POS_tag_dict

In [8]:
pos_dict = pos_tag_lst(text)
pos_dict

{0: [('Harry', 'PROPN', 'NNP', 'compound', False),
  ('Potter', 'PROPN', 'NNP', 'ROOT', False),
  ('and', 'CCONJ', 'CC', 'cc', True),
  ('the', 'DET', 'DT', 'det', True),
  ('Prisoner', 'PROPN', 'NNP', 'conj', False),
  ('of', 'ADP', 'IN', 'prep', True),
  ('Azkaban', 'PROPN', 'NNP', 'pobj', False),
  ('(', 'PUNCT', '-LRB-', 'punct', False),
  ('film', 'NOUN', 'NN', 'appos', False),
  (')', 'PUNCT', '-RRB-', 'punct', False)],
 1: [('Harry', 'PROPN', 'NNP', 'compound', False),
  ('Potter', 'PROPN', 'NNP', 'nsubj', False),
  ('and', 'CCONJ', 'CC', 'cc', True),
  ('the', 'DET', 'DT', 'det', True),
  ('Prisoner', 'PROPN', 'NNP', 'conj', False),
  ('of', 'ADP', 'IN', 'prep', True),
  ('Azkaban', 'PROPN', 'NNP', 'pobj', False),
  ('is', 'AUX', 'VBZ', 'ROOT', True),
  ('a', 'DET', 'DT', 'det', True),
  ('2004', 'NUM', 'CD', 'nummod', False),
  ('fantasy', 'NOUN', 'NN', 'compound', False),
  ('film', 'NOUN', 'NN', 'attr', False),
  ('directed', 'VERB', 'VBN', 'acl', False),
  ('by', 'ADP', 'IN

# Dependency Tree

In [9]:
#Token dict 
def dependency_dict(doc):
    out = dict()
    root = ''
    for token in doc:
        out[token.text] = (token.dep_, token.head.text, token.head.pos_,[child for child in token.children])
        if token.dep_ == "ROOT":
            root = token.text
    return out, root

In [10]:
token_Dict1, root1 = dependency_dict(doc1)
token_Dict1, root1

({'Harry': ('compound', 'Potter', 'PROPN', []),
  'Potter': ('nsubj', 'is', 'AUX', [Harry, and, Prisoner]),
  'and': ('cc', 'directed', 'VERB', []),
  'the': ('det', 'Prisoner', 'PROPN', []),
  'Prisoner': ('conj', 'Potter', 'PROPN', [the, of]),
  'of': ('prep', 'Prisoner', 'PROPN', [Azkaban]),
  'Azkaban': ('pobj', 'of', 'ADP', []),
  'is': ('ROOT', 'is', 'AUX', [Potter, film]),
  'a': ('det', 'film', 'NOUN', []),
  '2004': ('nummod', 'film', 'NOUN', []),
  'fantasy': ('compound', 'film', 'NOUN', []),
  'film': ('attr', 'is', 'AUX', [a, 2004, fantasy, directed]),
  'directed': ('acl', 'film', 'NOUN', [by, and, distributed]),
  'by': ('agent', 'distributed', 'VERB', [Bros.]),
  'Alfonso': ('compound', 'Cuarón', 'PROPN', []),
  'Cuarón': ('pobj', 'by', 'ADP', [Alfonso]),
  'distributed': ('conj', 'directed', 'VERB', [by]),
  'Warner': ('compound', 'Bros.', 'PROPN', []),
  'Bros.': ('pobj', 'by', 'ADP', [Warner])},
 'is')

In [11]:
token_Dict2, root2 = dependency_dict(doc2)
token_Dict2, root2

({'Harry': ('compound', 'Potter', 'PROPN', []),
  'Potter': ('nsubj', 'spending', 'VERB', [Harry]),
  'has': ('aux', 'spending', 'VERB', []),
  'been': ('aux', 'spending', 'VERB', []),
  'spending': ('ROOT',
   'spending',
   'VERB',
   [Potter, has, been, summer, with, .]),
  'another': ('det', 'summer', 'NOUN', []),
  'unhappy': ('amod', 'summer', 'NOUN', []),
  'summer': ('dobj', 'spending', 'VERB', [another, unhappy]),
  'with': ('prep', 'spending', 'VERB', [Dursleys]),
  'the': ('det', 'Dursleys', 'PROPN', []),
  'Dursleys': ('pobj', 'with', 'ADP', [the]),
  '.': ('punct', 'spending', 'VERB', [])},
 'spending')

In [12]:
token_Dict3, root3 = dependency_dict(doc3)
token_Dict3, root3

({'The': ('det', 'film', 'NOUN', []),
  'film': ('nsubjpass', 'produced', 'VERB', [The]),
  'was': ('auxpass', 'produced', 'VERB', []),
  'produced': ('ROOT', 'produced', 'VERB', [film, was, by, for, .]),
  'by': ('agent', 'produced', 'VERB', [Reine]),
  'La': ('compound', 'Reine', 'PROPN', []),
  'Petite': ('compound', 'Reine', 'PROPN', []),
  'Reine': ('pobj', 'by', 'ADP', [La, Petite, and, Sélection]),
  'and': ('cc', 'Reine', 'PROPN', []),
  'ARP': ('compound', 'Sélection', 'PROPN', []),
  'Sélection': ('conj', 'Reine', 'PROPN', [ARP]),
  'for': ('prep', 'produced', 'VERB', [dollars]),
  '13.47': ('compound', 'million', 'NUM', []),
  'million': ('nummod', 'dollars', 'NOUN', [13.47]),
  'dollars': ('pobj', 'for', 'ADP', [million]),
  '.': ('punct', 'produced', 'VERB', [])},
 'produced')

# NER Tagging

In [13]:
def ner_tag(text):
    NER_tag_dict = dict()
    for i,line in enumerate(text):
        tags = []
        doc = nlp(str(line))

        for ent in doc.ents:
            # print(ent.text +'-' + ent.label_ + '\n')
            tags.append(ent.text +'-' + ent.label_)
        if len(tags) != 0:
            NER_tag_dict[i] = tags
    return NER_tag_dict

In [14]:
def ner_tag_sentence(sentence):
    doc = nlp(str(sentence))
    NER_tag_dict = dict()
    tags = []
    for ent in doc.ents:
        # print(ent.text +'-' + ent.label_ + '\n')
        NER_tag_dict[ent.text] = ent.label_
    return NER_tag_dict

# Binary Question

In [15]:
auxiliary_verbs = ["am", "is", "are", "was", "were", "can", "could", "have", "need", "should", "will", "would"]

In [16]:
#input: a single sentence, with its dependency dict and root word
def binaryQ(sentence, token_dict, root):
    output = ''
    if root in auxiliary_verbs:
        output += root.capitalize() + ' '
    for k in sentence.split():
        if k != root:
            output += k + ' '
    output = output[:-2]+ '?'
    return output
    

In [17]:
binaryQ(sentence1, token_Dict1, root1)

'Is Harry Potter and the Prisoner of Azkaban a 2004 fantasy film directed by Alfonso Cuarón and distributed by Warner Bros?'

# Who Question

In [18]:
pos_tag_sentence(sentence2)

{0: [('Harry', 'PROPN', 'NNP', 'ROOT', False)],
 1: [('Potter', 'NOUN', 'NN', 'ROOT', False)],
 2: [('has', 'VERB', 'VBZ', 'ROOT', True)],
 3: [('been', 'VERB', 'VBN', 'ROOT', True)],
 4: [('spending', 'VERB', 'VBG', 'ROOT', False)],
 5: [('another', 'DET', 'DT', 'ROOT', True)],
 6: [('unhappy', 'ADJ', 'JJ', 'ROOT', False)],
 7: [('summer', 'NOUN', 'NN', 'ROOT', False)],
 8: [('with', 'ADP', 'IN', 'ROOT', True)],
 9: [('the', 'DET', 'DT', 'ROOT', True)],
 10: [('Dursleys', 'PROPN', 'NNP', 'ROOT', False),
  ('.', 'PUNCT', '.', 'punct', False)]}

In [19]:
ner_tag_dict2 = ner_tag_sentence(sentence2)

In [20]:
dependency_dict2= dependency_dict(doc2)
dependency_dict2[0]

{'Harry': ('compound', 'Potter', 'PROPN', []),
 'Potter': ('nsubj', 'spending', 'VERB', [Harry]),
 'has': ('aux', 'spending', 'VERB', []),
 'been': ('aux', 'spending', 'VERB', []),
 'spending': ('ROOT',
  'spending',
  'VERB',
  [Potter, has, been, summer, with, .]),
 'another': ('det', 'summer', 'NOUN', []),
 'unhappy': ('amod', 'summer', 'NOUN', []),
 'summer': ('dobj', 'spending', 'VERB', [another, unhappy]),
 'with': ('prep', 'spending', 'VERB', [Dursleys]),
 'the': ('det', 'Dursleys', 'PROPN', []),
 'Dursleys': ('pobj', 'with', 'ADP', [the]),
 '.': ('punct', 'spending', 'VERB', [])}

In [21]:
sentence2.split()

['Harry',
 'Potter',
 'has',
 'been',
 'spending',
 'another',
 'unhappy',
 'summer',
 'with',
 'the',
 'Dursleys.']

In [22]:
#input: a single sentence, and its ner tag dict and dependency dict
#Who Question
def whoQ(sentence, ner_tag_dict, dependency_dict):
    #find PERSON tag
    theName = ''
    output = ''
    for k in ner_tag_dict.keys():
        if ner_tag_dict[k] == 'PERSON':
            #check if is a subject
            names = k.split()
            for n in names:
                print(dependency_dict[n])
                if dependency_dict[n][0] == 'nsubj':
                    theName = k
    print(theName)
    output = sentence.replace(theName, 'who')
    output = output[:-1] + "?"
    output = output[0].upper() + output[1:]
    return output
    

In [23]:
whoQ(sentence2, ner_tag_dict2, dependency_dict2[0])

('compound', 'Potter', 'PROPN', [])
('nsubj', 'spending', 'VERB', [Harry])
('pobj', 'with', 'ADP', [the])
Harry Potter


'Who has been spending another unhappy summer with the Dursleys?'

# How much Question


In [24]:
ner_tag_dict3 = ner_tag_sentence(sentence3)
dependency_dict3, root3 = dependency_dict(doc3)
# "The film was produced by La Petite Reine and ARP Sélection for 13.47 million euros." don't identify as MONEY
# "The film was produced by La Petite Reine and ARP Sélection for 13.47 million dollars."
    # How much was the film produced by La Petite Reine and ARP Sélection?
# "The film costs 13.47 million dollars."
    # How much does the film costs?


In [25]:
ner_tag_dict4 = ner_tag_sentence(sentence4)
dependency_dict4, root4 = dependency_dict(doc4)


In [26]:
ner_tag_dict3, dependency_dict3, root3

({'La Petite': 'PERSON', '13.47 million dollars': 'MONEY'},
 {'The': ('det', 'film', 'NOUN', []),
  'film': ('nsubjpass', 'produced', 'VERB', [The]),
  'was': ('auxpass', 'produced', 'VERB', []),
  'produced': ('ROOT', 'produced', 'VERB', [film, was, by, for, .]),
  'by': ('agent', 'produced', 'VERB', [Reine]),
  'La': ('compound', 'Reine', 'PROPN', []),
  'Petite': ('compound', 'Reine', 'PROPN', []),
  'Reine': ('pobj', 'by', 'ADP', [La, Petite, and, Sélection]),
  'and': ('cc', 'Reine', 'PROPN', []),
  'ARP': ('compound', 'Sélection', 'PROPN', []),
  'Sélection': ('conj', 'Reine', 'PROPN', [ARP]),
  'for': ('prep', 'produced', 'VERB', [dollars]),
  '13.47': ('compound', 'million', 'NUM', []),
  'million': ('nummod', 'dollars', 'NOUN', [13.47]),
  'dollars': ('pobj', 'for', 'ADP', [million]),
  '.': ('punct', 'produced', 'VERB', [])},
 'produced')

In [27]:
ner_tag_dict4, dependency_dict4, root4

({'13.47 million dollars': 'MONEY'},
 {'The': ('det', 'film', 'NOUN', []),
  'film': ('nsubj', 'costs', 'VERB', [The]),
  'costs': ('ROOT', 'costs', 'VERB', [film, dollars, .]),
  '13.47': ('compound', 'million', 'NUM', []),
  'million': ('nummod', 'dollars', 'NOUN', [13.47]),
  'dollars': ('dobj', 'costs', 'VERB', [million]),
  '.': ('punct', 'costs', 'VERB', [])},
 'costs')

In [28]:
#input: a single sentence, and its ner tag dict and dependency dict
#How much Question
def whoQ(sentence, ner_tag_dict, dependency_dict, root):
    theMoney = ""
    output = ""
    theSubj = ""
    for k in ner_tag_dict.keys():
        if ner_tag_dict[k] == 'MONEY':
            theMoney = k
    #check passive tense 
    sentence_lst = sentence.split()
    root_ind = sentence_lst.index(root)
    if root_ind != 0:
        word_in_front_of_root = sentence_lst[root_ind -1] 
        #if it's passive tense
        if dependency_dict[word_in_front_of_root][0] == 'auxpass':
            root_aux = word_in_front_of_root
        
        
        #if it's not passive tense
        else:
            output += 'How much'
            #check tense
            output += (do, does, did)
            #check subject
            for n in dependency_dict:
                if dependency_dict[n][0] == 'nsubj':
                    theSubj = n
            output += theSubj + (present tense of the root)


SyntaxError: invalid syntax (<ipython-input-28-b4d89f2f64bf>, line 29)

# What Question

In [29]:
sentence = "The trio return to Hogwarts for the school year on the Hogwarts Express train, which is suddenly boarded by dementors, ghostly prison guards that are searching for Black."
doc = nlp(sentence)

In [30]:
token_Dict, root = dependency_dict(doc)
token_Dict, root

({'The': ('det', 'trio', 'NOUN', []),
  'trio': ('nsubj', 'return', 'VERB', [The]),
  'return': ('ROOT', 'return', 'VERB', [trio, to, for, on, .]),
  'to': ('prep', 'return', 'VERB', [Hogwarts]),
  'Hogwarts': ('compound', 'Express', 'PROPN', []),
  'for': ('prep', 'searching', 'VERB', [Black]),
  'the': ('det', 'train', 'NOUN', []),
  'school': ('compound', 'year', 'NOUN', []),
  'year': ('pobj', 'for', 'ADP', [the, school]),
  'on': ('prep', 'return', 'VERB', [train]),
  'Express': ('compound', 'train', 'NOUN', [Hogwarts]),
  'train': ('pobj', 'on', 'ADP', [the, Express, ,, boarded, ,, guards]),
  ',': ('punct', 'train', 'NOUN', []),
  'which': ('nsubjpass', 'boarded', 'VERB', []),
  'is': ('auxpass', 'boarded', 'VERB', []),
  'suddenly': ('advmod', 'boarded', 'VERB', []),
  'boarded': ('relcl', 'train', 'NOUN', [which, is, suddenly, by]),
  'by': ('agent', 'boarded', 'VERB', [dementors]),
  'dementors': ('pobj', 'by', 'ADP', []),
  'ghostly': ('advmod', 'guards', 'NOUN', []),
  'pri

In [31]:
def whatQ1(sentence, dependency_dict):
    output = ''
    if 'which' in dependency_dict.keys():
        output = "What" + sentence.split("which",1)[1][:-1] + "?"
    return output

In [32]:
whatQ1(sentence, token_Dict)

'What is suddenly boarded by dementors, ghostly prison guards that are searching for Black?'

In [33]:
sentence = "Prisoner of Azkaban is often regarded by critics and fans alike as the best film in the series."
doc = nlp(sentence)
token_Dict, root = dependency_dict(doc)
token_Dict, root

({'Prisoner': ('nsubjpass', 'regarded', 'VERB', [of]),
  'of': ('prep', 'Prisoner', 'PROPN', [Azkaban]),
  'Azkaban': ('pobj', 'of', 'ADP', []),
  'is': ('auxpass', 'regarded', 'VERB', []),
  'often': ('advmod', 'regarded', 'VERB', []),
  'regarded': ('ROOT', 'regarded', 'VERB', [Prisoner, is, often, by, .]),
  'by': ('agent', 'regarded', 'VERB', [critics]),
  'critics': ('pobj', 'by', 'ADP', [and, fans, alike, as]),
  'and': ('cc', 'critics', 'NOUN', []),
  'fans': ('conj', 'critics', 'NOUN', []),
  'alike': ('advmod', 'critics', 'NOUN', []),
  'as': ('prep', 'critics', 'NOUN', [film]),
  'the': ('det', 'series', 'NOUN', []),
  'best': ('amod', 'film', 'NOUN', []),
  'film': ('pobj', 'as', 'ADP', [the, best, in]),
  'in': ('prep', 'film', 'NOUN', [series]),
  'series': ('pobj', 'in', 'ADP', [the]),
  '.': ('punct', 'regarded', 'VERB', [])},
 'regarded')

In [34]:
def whatQ2(sentence, dependency_dict):
    output = ''
    if 'is' in dependency_dict.keys():
        output = "What is " + sentence.split(" is ",1)[1][:-1] + "?"
    elif 'are' in dependency_dict.keys():
        output = "What are " + sentence.split(" are ",1)[1][:-1] + "?"
    elif 'was' in dependency_dict.keys():
        output = "What was " + sentence.split(" was ",1)[1][:-1] + "?"
    elif 'were' in dependency_dict.keys():
        output = "What were " + sentence.split("were ",1)[1][:-1] + "?"
    return output

In [35]:
whatQ2(sentence, token_Dict)

'What is often regarded by critics and fans alike as the best film in the series?'

# How Many Question

In [36]:
sentence = "3 sets for the film were built in Glen Coe, Scotland, near the Clachaig Inn."
#sentence = "The film nominated 6 Golden Globes."
#sentence = "The film was also nominated for four BAFTA Awards: Best British Film, Best Production Design, Best Makeup & Hair, and Best Visual Effects, and won public-voted Orange Film of the Year award."
doc = nlp(sentence)
token_Dict, root = dependency_dict(doc)
ner_tag_dict = ner_tag_sentence(sentence)
pos_tag_dict = pos_tag_sentence(sentence)

In [37]:
ner_tag_dict

{'3': 'CARDINAL',
 'Glen Coe': 'PERSON',
 'Scotland': 'GPE',
 'the Clachaig Inn': 'FAC'}

In [38]:
def HowManyQ(sentence, ner_tag_dict, dependency_dict, pos_tag_sentence, root):
    Number = ""
    output = ""
    clause = ""
    for k in ner_tag_dict.keys():
        if ner_tag_dict[k] == 'CARDINAL':
            Number = k
    if Number == "": return
    sentence_lst = sentence.split()
    for seg in sentence.split(',', 1):
        if Number in seg: clause = seg
    if root not in clause: return
    clause_lst = clause.split()
    root_ind = clause_lst.index(root)
    number_ind = clause_lst.index(Number)
    if number_ind < root_ind:
        output = "How many" + sentence.split(Number,1)[1][:-1]
    else:
        if root_ind != 0:
            word_in_front_of_root = sentence_lst[root_ind -1] 
            #if it's passive tense
            if token_Dict[word_in_front_of_root][0] == 'auxpass':
                middle = clause[clause.find(root): clause.find(Number)]
                output = "How many" + clause.split(Number,1)[1][:-1] + " " + word_in_front_of_root + " " + clause.split(word_in_front_of_root,1)[0].lower() + " " + middle
            #if it's not passive tense
            else:
                verb = clause[clause.find(root): clause.find(root)+len(root)]
                #if it's past tense
                if pos_tag_sentence(root)[0][0][2] in ("VBD", "VBN"):
                    output = "How many" + clause.split(Number,1)[1][:-1] + " did " + clause.split(Number,1)[0][0].lower() + clause.split(Number,1)[0][1:clause.find(root)] + nlp(verb)[0].lemma_
                #if it's not past tense
                else:
                    output = "How many" + clause.split(Number,1)[1][:-1] + " does " + clause.split(Number,1)[0][0].lower() + clause.split(Number,1)[0][1:clause.find(root)] + nlp(verb)[0].lemma_
    output = ' '.join(output.split()) + "?"
    return output

In [39]:
HowManyQ(sentence, ner_tag_dict, dependency_dict,  pos_tag_sentence, root)   

'How many sets for the film were built in Glen Coe, Scotland, near the Clachaig Inn?'

# How Long Question

In [176]:
sentence = "Four months after Harris's death, Cuarón chose Gambon as his replacement."
#sentence = "With Prisoner of Azkaban, production of the Harry Potter films was switched to an eighteen-month cycle."
doc = nlp(sentence)
token_Dict, root = dependency_dict(doc)
ner_tag_dict = ner_tag_sentence(sentence)
pos_tag_dict = pos_tag_sentence(sentence)

In [177]:
ner_tag_dict

{'Four months': 'DATE', 'Harris': 'PERSON'}

In [178]:
def HowOftenQ(sentence, ner_tag_dict, dependency_dict, pos_tag_sentence, root):
    date = ""
    output = ""
    clause = ""
    sentence = sentence[:-1]
    for k in ner_tag_dict.keys():
        if ner_tag_dict[k] == 'DATE':
            date = k
    if date == "": return
    date_lst = date.split()
    sentence_lst = sentence.split()
    root_lst_ind = sentence_lst.index(root)
    root_ind = sentence.index(root)
    date_ind = sentence.index(date)
    if date_ind < root_ind:
        word_in_front_of_root = sentence_lst[root_lst_ind - 1]
        if token_Dict[word_in_front_of_root][0] == 'auxpass':
            sentence_lst[root_lst_ind - 1] = sentence_lst[root_lst_ind - 2]
            sentence_lst[root_lst_ind - 2] = word_in_front_of_root
        else:
            sentence_lst[root_lst_ind] = nlp(root)[0].lemma_
            if pos_tag_sentence(root)[0][0][2] in ("VBD", "VBN"):
                sentence_lst.insert(root_lst_ind-1,"did")
            else: sentence_lst.insert(root_lst_ind-1,"does")
        for i in date_lst:
            sentence_lst.remove(i)
        output = "How long " + " ".join(sentence_lst)
    else:
        for seg in sentence.split(',', 1):
            if date in seg: clause = seg
        if root_ind != 0:
            word_in_front_of_root = sentence_lst[root_lst_ind - 1] 
            word_after_root = sentence_lst[root_lst_ind + 1]
            prep_ind = sentence.index(word_after_root)
            if token_Dict[word_after_root][0] != 'prep': word_after_root = ""
            #if it's passive tense
            if token_Dict[word_in_front_of_root][0] == 'auxpass':
                output = "How long " + word_in_front_of_root + " " + clause.split(word_in_front_of_root,1)[0].lower() +  sentence[root_ind:prep_ind+len(word_after_root)]
            else:
                #if it's past tense
                if pos_tag_sentence(root)[0][0][2] in ("VBD", "VBN"):
                    output = "How long" + " did " + clause.split(root,1)[0].lower() + nlp(root)[0].lemma_ + sentence[root_ind+len(root):prep_ind+len(word_after_root)]
                #if it's not past tense
                else:
                    output = "How long" + " does " + clause.split(root,1)[0].lower() + nlp(root)[0].lemma_ + sentence[root_ind+len(root):prep_ind+len(word_after_root)]
    output = ' '.join(output.split()) + "?"
    return output

In [179]:
HowOftenQ(sentence, ner_tag_dict, dependency_dict,  pos_tag_sentence, root)   

"How long after Harris's death, did Cuarón choose Gambon as his replacement?"