In [23]:
import gensim
from nltk import word_tokenize
import numpy as np
import pandas as pd
from google.colab import drive
from sklearn.metrics.pairwise import cosine_similarity
from collections import defaultdict
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [7]:
drive.mount('/content/drive')

Mounted at /content/drive


In [22]:
# Function to get the cosine similarity between a relation and query
# Note: Be sure to prepend the relation with ns:
word2vec_model = gensim.models.Word2Vec.load('/content/drive/MyDrive/UML/HW6/knowledge-graph/word2vec_train_dev.dat')
def get_rel_score_word2vecbase(rel, query):
    if rel not in word2vec_model.wv:
        return 0.0
    words = word_tokenize(query.lower())
    w_embs = []
    for w in words:
        if w in word2vec_model.wv:
            w_embs.append(word2vec_model.wv[w])
    return np.mean(cosine_similarity(w_embs, [word2vec_model.wv[rel]]))


# Function to load the graph from file
def load_graph():
    # Preparing the graph
    graph = defaultdict(list)
    for line in open('graph'):
        line = eval(line[:-1])
        graph[line[0]].append([line[1], line[2]])
    return graph


# Function to load the queries from file
# Preparing the queries
def load_queries():
    queries = []
    for line in open('annotations'):
        line = eval(line[:-1])
        queries.append(line)
    return queries

In [9]:
graph = load_graph()

In [10]:
queries = load_queries()

In [11]:
# Get actual answers for the current query for evaluation

actual_ans = []

for i in range(len(queries)):
    current_ans = queries[i][5] 
    ans = []
    for j in range(len(current_ans)):
        ans.append(current_ans[j]['AnswerArgument'])

    actual_ans.append(ans)

In [36]:
threshold = 0.25
predicted_ans = []

for i in range(len(queries)):
    
    #Get Current Query, answer entities and starting node
    current_qs = queries[i][1]
    starting_node = queries[i][2]
    
    #Initialize lists to hold Queues and predicted answers
    pred_ans = []
    queue = []
    visited_nodes = []
    
    #Enqueue the current node to the queue
    visited_nodes.append(starting_node)
    queue.append(starting_node)
       
    #Run till queue is not empty
    while len(queue) != 0:

        curr_node = queue.pop(0)

        for k in range(len(graph[curr_node])):
            neighbor_node = graph[curr_node][k][1]    
            relation = "ns:" + str(graph[curr_node][k][0])
            cos_sim = get_rel_score_word2vecbase(relation, current_qs)
            if neighbor_node not in visited_nodes:
                visited_nodes.append(neighbor_node)
                if cos_sim > threshold:
                    pred_ans.append(neighbor_node)
                else:
                    queue.append(neighbor_node)
    
    #Hold Predicted answers for each query
    predicted_ans.append(pred_ans)

In [37]:
def evaluate(y_pred, y_true):
    
    #Calculate Precision
    if len(y_pred) == 0:
        precision = 0
    else:
        count = 0
        for answer in y_pred:
            if answer in y_true:
                count += 1
        precision = count/len(y_pred)

    #Calculate Recall
    if len(y_true) == 0:
        recall = 0
    else:
        count = 0
        for answer in y_true:
            if answer in y_pred:
                count += 1
        recall = count/len(y_true)

    if (precision + recall) == 0:
        f1_score = 0
    else:
        f1_score = 2 * precision * recall / (precision + recall)
    
    return precision, recall, f1_score

In [38]:
i = 0

precision = 0
recall = 0
f1_score = 0

for i in range(len(predicted_ans)):
    precision += evaluate(predicted_ans[i], actual_ans[i])[0]
    recall += evaluate(predicted_ans[i], actual_ans[i])[1]
    f1_score += evaluate(predicted_ans[i], actual_ans[i])[2]
    
precision = precision / len(predicted_ans)
recall = recall / len(predicted_ans)
f1_score = f1_score / len(predicted_ans)

print('Precision:' , round(precision,4))
print('Recall:', round(recall,4))
print('F1-Score:', round(f1_score,4))

Precision: 0.1572
Recall: 0.5505
F1-Score: 0.206
