In [1]:
import pandas as pd
import csv
import spacy
from spacy import displacy

nlp = spacy.load("en_core_web_lg")

train_data = pd.read_csv('data/QA_train_data.csv',
                        encoding='utf8',
                        delimiter=',',
                        quotechar='"',
                        quoting=csv.QUOTE_MINIMAL)
test_data = pd.read_csv('data/QA_test_data.csv',
                        encoding='utf8',
                        delimiter=',',
                        quotechar='"',
                        quoting=csv.QUOTE_MINIMAL)

In [2]:
questions = train_data['Question']
answers = train_data["Answer"]
qa_dict = {question: answer for question, answer in zip(questions, answers)}

processed_questions = [nlp(q) for q in questions]

test_questions = test_data['Question']
test_answers = test_data['Answer']
test_dict = {question: answer for question, answer in zip(test_questions, test_answers)}

In [3]:
def find_most_similar_question(test_question, questions):
    similarity_scores = []
    most_similar_questions = []
    doc = nlp(test_question)
    
    for question in questions:
        question_doc = nlp(question)
        similarity_scores.append(doc.similarity(question_doc))
        
    most_similar_index = similarity_scores.index(max(similarity_scores))
    most_similar_question = questions[most_similar_index]
    return most_similar_question

In [4]:
def find_answer(test_question, qa_dict):
    answer = qa_dict.get(find_most_similar_question(test_question, questions))
    return answer

In [5]:
def evaluate_result(test_questions):
    score = 0
    for test_question in test_questions:
        if test_dict.get(test_question) == find_answer(test_question, qa_dict):
            score += 1
            
    return score / len(test_questions)

In [6]:
for question in test_questions:
    print(question, "|", find_answer(question, qa_dict))

From which country is the film A Beautiful Mind? | America
Where was the film A Beautiful Mind made? | 2001
In which country was the film A Beautiful Mind made? | 2001
Where was the movie A Beautiful Mind made? | 2001
From which country is the film? | Russell Crowe, along with Ed Harris, Jennifer Connelly, Paul Bettany, Adam Goldberg, Judd Hirsch, Josh Lucas, Anthony Rapp, and Christopher Plummer in supporting roles
From which country is this film? | Russell Crowe, along with Ed Harris, Jennifer Connelly, Paul Bettany, Adam Goldberg, Judd Hirsch, Josh Lucas, Anthony Rapp, and Christopher Plummer in supporting roles
When was the film A beautiful Mind released? | 2001
Of which genre is the film A Beautiful Mind? | biographical drama
Which kind of film is  A Beautiful Mind? | biographical drama
Which type of movie is  A Beautiful Mind? | biographical drama
What is John Nash's profession? | mathematician
What is John Nash's job? | mathematician
Which prices did John Nash win? | Nobel price

In [7]:
print("Score: ", evaluate_result(test_questions))

Score:  0.6136363636363636
