# 2024 COMP90042 Project
*Make sure you change the file name with your group id.*

# Readme
*If there is something to be noted for the marker, please mention here.*

*If you are planning to implement a program with Object Oriented Programming style, please put those the bottom of this ipynb file*

# 1.DataSet Processing
(You can add as many code blocks and text blocks as you need. However, YOU SHOULD NOT MODIFY the section title)

In [None]:
# prompt: mount drive

from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import torchdata.datapipes as dp
import torchtext.transforms as T
import spacy
from torchtext.vocab import build_vocab_from_iterator
eng = spacy.load("en_core_web_sm")

In [None]:
import pandas as pd

evidences = pd.read_json('/content/drive/MyDrive/nlp/data/evidence.json', orient='index')
train_claims = pd.read_json('/content/drive/MyDrive/nlp/data/train-claims.json', orient='index')
dev_claims = pd.read_json('/content/drive/MyDrive/nlp/data/dev-claims.json', orient='index')

#update column names
evidences.reset_index(inplace=True)
evidences.columns = ['evidence_id', 'evidence_text']

train_claims.reset_index(inplace=True)
train_claims.rename(columns={'index': 'claim_id'}, inplace=True)

dev_claims.reset_index(inplace=True)
dev_claims.rename(columns={'index': 'claim_id'}, inplace=True)

evidence_id = evidences['evidence_id']
evidence_text = evidences['evidence_text']
evidence_idx = evidences.index.tolist()

evidence_id_dict = dict(zip(evidence_id, evidence_idx))

train_claims_text = train_claims['claim_text']
train_evidence_ids = train_claims['evidences']
#map evidence_id to their corrosponding index for faster processing
train_evidence_idxs = train_evidence_ids.apply(lambda x: [evidence_id_dict[evidence_id] for evidence_id in x])

dev_claims_text = dev_claims['claim_text']
dev_evidence_ids = dev_claims['evidences']
dev_evidence_idxs = dev_evidence_ids.apply(lambda x: [evidence_id_dict[evidence_id] for evidence_id in x])

In [None]:
#text preprocessing
import nltk
from nltk.tokenize import TweetTokenizer
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('punkt')

tt = TweetTokenizer()
stopwords = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def preprocess_data(text):
    tokens = tt.tokenize(text)

    processed_tokens = []

    for token in tokens:
        token = token.lower()
        if token not in stopwords and token.isalpha():
            lemma = lemmatizer.lemmatize(token)
            processed_tokens.append(lemma)

    return processed_tokens

train_claims_text_processed = train_claims_text.apply(preprocess_data)
dev_claims_text_precessed = dev_claims_text.apply(preprocess_data)
evidence_text_processed = evidence_text.apply(preprocess_data)

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [None]:
from gensim.models import Word2Vec

# Train Word2Vec model
model = Word2Vec(sentences=evidence_text_processed, vector_size=300, window=10, min_count=3, workers=8, sg=1, hs=0)

In [None]:
# Save the Word2Vec model
model.save('/content/drive/MyDrive/nlp/data/word2vec_model.bin')


In [None]:
import json
json.dump(train_claims_text_processed.tolist(), open("/content/drive/MyDrive/nlp/data/train_claims_text_processed.json", "w"))
json.dump(dev_claims_text_precessed.tolist(), open("/content/drive/MyDrive/nlp/data/dev_claims_text_precessed.json", "w"))
json.dump(evidence_text_processed.tolist(), open("/content/drive/MyDrive/nlp/data/evidence_text_processed.json", "w"))

In [None]:
import json

train_claims_text_processed = json.load(open("/content/drive/MyDrive/nlp/data/train_claims_text_processed.json", "r"))
dev_claims_text_precessed = json.load(open("/content/drive/MyDrive/nlp/data/dev_claims_text_precessed.json", "r"))
evidence_text_processed = json.load(open("/content/drive/MyDrive/nlp/data/evidence_text_processed.json", "r"))

In [None]:
from gensim.models import Word2Vec

model = Word2Vec.load('/content/drive/MyDrive/nlp/data/word2vec_model.bin')

In [None]:
import numpy as np
evidence_text_array = np.array(evidence_text_processed, dtype=object)

flattened_train_evidence_idxs = list(set([idx for sublist in train_evidence_idxs for idx in sublist]))

train_evidence_text = evidence_text_array[flattened_train_evidence_idxs]

In [None]:
train_evidence_text[0]

['heat',
 'likely',
 'increase',
 'risk',
 'mortality',
 'city',
 'northern',
 'part',
 'country',
 'southern',
 'region',
 'country']

In [None]:
from gensim.models import Word2Vec

# Train Word2Vec model
model = Word2Vec(sentences=train_evidence_text, vector_size=300, window=10, min_count=3, workers=8, sg=1, hs=0)

In [None]:
import numpy as np
def generate_embedding(text, model):
    # Filter out words that are not in the Word2Vec model's vocabulary
    words = [word for word in text if word in model.wv.key_to_index]
    if not words:  # Handle cases where none of the words are in the vocabulary
        return np.zeros(model.vector_size)
    # Get embeddings for each word in the text and average them
    word_embeddings = [model.wv[word] for word in words]
    sentence_embedding = np.mean(word_embeddings, axis=0)
    return sentence_embedding

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np

# Generate embeddings for train claim_text
train_claim_text_embeddings = [generate_embedding(text, model) for text in train_claims_text_processed]

# Generate embeddings for dev claim_text
dev_claim_text_embeddings = [generate_embedding(text, model) for text in dev_claims_text_precessed]

# Generate embeddings for all evidence texts
evidence_embeddings = [generate_embedding(text, model) for text in evidence_text_processed]




In [None]:
from sklearn.metrics.pairwise import cosine_similarity

# Function to compute cosine similarity scores for all claims and evidence embeddings
def compute_similarity_scores(claim_embeddings, evidence_embeddings):
    similarity_scores = cosine_similarity(claim_embeddings, evidence_embeddings)
    return similarity_scores


# Compute cosine similarity scores for training claims and evidence embeddings
train_similarity_scores = compute_similarity_scores(train_claim_text_embeddings, evidence_embeddings)
dev_similarity_scores = compute_similarity_scores(dev_claim_text_embeddings, evidence_embeddings)

In [None]:
import torch
# Function to compute recall at k
def compute_recall_at_k(similarity_scores, true_indices, k):
    recall_values = []

    # Convert similarity scores to PyTorch tensor
    similarity_scores_tensor = torch.FloatTensor(similarity_scores)

    # Get top k indices for each sample
    top_k_indices = torch.topk(similarity_scores_tensor, k, dim=-1).indices.tolist()

    for i in range(len(true_indices)):
        # Get true indices for the i-th sample
        true_indices_i = true_indices[i]

        # Calculate recall count for the i-th sample
        recall_count = sum(1 for idx in true_indices_i if idx in top_k_indices[i])

        # Calculate recall for the i-th sample
        recall = recall_count / len(true_indices_i)

        # Store recall value
        recall_values.append(recall)

    # Compute average recall over all samples
    avg_recall = sum(recall_values) / len(recall_values)

    return avg_recall

# Example usage:
# Compute recall at k for training set
train_recall_at_k = compute_recall_at_k(train_similarity_scores, train_evidence_idxs, 500)
print("Training Recall at K:", train_recall_at_k)

# Compute recall at k for dev set
dev_recall_at_k = compute_recall_at_k(dev_similarity_scores, dev_evidence_idxs, 6)
print("Dev Recall at K:", dev_recall_at_k)

Training Recall at K: 0.1245928338762216
Dev Recall at K: 0.03398268398268398


In [None]:
import torch
def get_top_k_indices(similarity_scores, k=1000):
    # Args:
    # similarity_scores: a PyTorch tensor of shape (num_claims, num_evidences)
    # Returns:
    # top_k_indices: a tensor of shape (num_claims, k) containing indices of the top k elements

    # Convert numpy array to PyTorch tensor if needed
    if isinstance(similarity_scores, np.ndarray):
        similarity_scores_tensor = torch.FloatTensor(similarity_scores)
    else:
        similarity_scores_tensor = similarity_scores

    # Get top k indices for each sample using PyTorch
    top_k_values, top_k_indices = torch.topk(similarity_scores_tensor, k, dim=1, largest=True, sorted=True)

    return top_k_indices

# Compute top 1000 indices for training and development sets
train_top_indices = get_top_k_indices(train_similarity_scores, k=1000)
dev_top_indices = get_top_k_indices(dev_similarity_scores, k=10)

In [None]:
train_top_indices[0]

tensor([ 611464,   60163,  288364,  845051,  601231, 1032935,  395937,  776992,
        1029773,  519496,  167527,  609207, 1174437,  711638, 1154814,   21686,
         714969,  198948,  749808,  490550,  442946,  500376, 1019366,   67788,
         903458,  454739, 1145495,  872650, 1070491,  159167,  225665,  131293,
         716982,  239651,   55562,  140540,  363502, 1089159,   56861,  264363,
         371301,  804201,  512209, 1038450,  423643,  907938, 1076085,  885476,
         439640,  377026,  544335,  501796, 1194317, 1028873,  963266,  682716,
        1178054,  627012,  385414, 1110053,  113137, 1079703, 1091017, 1114128,
         292570,  305506,  515748,  757808,  398396,  196321,  573869, 1165038,
         995391,  363157,  881617,  343426,  384911,  218151, 1023876,  383923,
        1134540,  810093, 1145973,  580126,  585382, 1015851,   74910,  204199,
         595520,   26898, 1178854,  298082,  494161,  109299,  398589,  914091,
         922561,  431652, 1138728,  1411

In [None]:
import random
def prepare_train(train_claims_text, evidence_text_processed):
  train_claims = []
  train_evidences = []
  train_labels = []

  for i, claim in enumerate(train_claims_text):
    positive_evidence_idxs = train_evidence_idxs[i]
    negative_candidate_idxs = list(set(train_top_indices[i]) - set(positive_evidence_idxs))
    negative_evidence_idxs = random.sample(negative_candidate_idxs, 20)

    for idx in positive_evidence_idxs:
      train_claims.append(claim)
      train_evidences.append(evidence_text_processed[idx])
      train_labels.append(1) #relevant evidence

    for idx in negative_evidence_idxs:
      idx = int(idx)
      train_claims.append(claim)
      train_evidences.append(evidence_text_processed[idx])
      train_labels.append(-1) #irrelevant evidence

  return train_claims, train_evidences, train_labels

train_claims, train_evidences, train_labels = prepare_train(train_claims_text_processed, evidence_text_processed)


In [None]:
import random

def prepare_evaluation_set(dev_claims_text, evidence_text_processed, evidence_idxs, top_indices):
    eval_claims = []
    eval_evidences = []
    eval_labels = []

    for i, claim in enumerate(dev_claims_text):
        # Get all indices from the top indices
        candidate_idxs = top_indices[i]

        # Ground truth indices
        positive_evidence_idxs = evidence_idxs[i]

        for idx in candidate_idxs:
            idx = idx.item()
            eval_claims.append(claim)
            eval_evidences.append(evidence_text_processed[idx])
            # Check if this index is in the positive labels
            if idx in positive_evidence_idxs:
                eval_labels.append(1)  # Relevant evidence
            else:
                eval_labels.append(0)  # Irrelevant evidence

    return eval_claims, eval_evidences, eval_labels

dev_claims, dev_evidences, dev_labels = prepare_evaluation_set(dev_claims_text, evidence_text_processed, dev_evidence_idxs, dev_top_indices)


In [None]:
import random
def prepare_training_pairs(claims, evidences, ground_truth_indices, top_indices):
    train_claims = []
    x1 = []
    x2 = []
    y = []

    for i, claim in enumerate(claims):
      positive_evidence_idxs = train_evidence_idxs[i]
      negative_candidate_idxs = list(set(train_top_indices[i]) - set(positive_evidence_idxs))
      #negative_evidence_idxs = random.sample(negative_candidate_idxs, 20)

      for pos_idx in positive_evidence_idxs:
        sampled_negatives = random.sample(negative_candidate_idxs, 10)
        for neg_idx in sampled_negatives:
          train_claims.append(claim)
          x1.append(evidences[pos_idx])
          x2.append(evidences[neg_idx])
          y.append(1)

    return train_claims, x1, x2, y
train_claims, x1, x2, y = prepare_training_pairs(train_claims_text_processed, evidence_text_processed, train_evidence_idxs, train_top_indices)

In [None]:
import torch
import numpy as np

embedding_dim = model.vector_size
vocab = {word: idx for idx, word in enumerate(model.wv.index_to_key)}

# Calculate the current maximum index in the vocabulary
max_index = max(vocab.values())


# vocab['<cls>'] = max_index + 1
# vocab['<sep>'] = max_index + 2
vocab['<unk>'] = max_index + 1
vocab['<pad>'] = max_index + 2

#vocab['<unk>'] = len(vocab)

# Create random embeddings for the three new special tokens
random_embeddings = np.random.randn(1, embedding_dim)

# Extend the embedding matrix with random embeddings for special tokens
padding_embeddings = np.zeros((1, embedding_dim))  # Typically zero vector for padding
extended_embeddings = np.vstack([
    model.wv.vectors,  # Existing embeddings from Word2Vec
    random_embeddings,  # Embeddings for `<unk>`
    padding_embeddings  # Zero embeddings for `<pad>`
])
embedding_matrix = torch.FloatTensor(extended_embeddings)

# Check new size of the embedding matrix and the maximum index in the vocabulary
print(f"New size of embedding matrix: {embedding_matrix.size(0)}")
print(f"Maximum index in vocab: {max(vocab.values())}")


New size of embedding matrix: 144508
Maximum index in vocab: 144507


In [None]:
def text_to_indices(text, vocab):
    return [vocab.get(token, vocab['<unk>']) for token in text]

def convert_to_indices(train_claims, x1, x2, vocab):
    train_claims_indices = []
    x1_indices = []
    x2_indices = []

    # Convert train claims to indices
    for claim in train_claims:
        train_claims_indices.append(text_to_indices(claim, vocab))

    # Convert x1 and x2 to indices
    for evidence in x1:
        x1_indices.append(text_to_indices(evidence, vocab))
    for evidence in x2:
        x2_indices.append(text_to_indices(evidence, vocab))

    return train_claims_indices, x1_indices, x2_indices

train_claims_indices, x1_indices, x2_indices = convert_to_indices(train_claims, x1, x2, vocab)

# def convert_to_indicies(claims, evidences, vocab):
#     train_claims_indices = []
#     evidences_indices = []

#     for claim in claims:
#         train_claims_indices.append(text_to_indices(claim, vocab))

#     for evidence in evidences:
#         evidences_indices.append(text_to_indices(evidence, vocab))

#     return train_claims_indices, evidences_indices

# train_claims_indices, evidences_indices = convert_to_indicies(train_claims, train_evidences, vocab)


In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence

class EvidenceRankingDataset(Dataset):
    def __init__(self, claims, evidences1, evidences2, labels):
        self.claims = claims
        self.evidences1 = evidences1
        self.evidences2 = evidences2
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return (self.claims[idx], self.evidences1[idx], self.evidences2[idx], self.labels[idx])

def collate_fn(batch):
    claims, evidences1, evidences2, labels = zip(*batch)
    claims_padded = pad_sequence([torch.tensor(claim, dtype=torch.long) for claim in claims], batch_first=True, padding_value=vocab['<pad>'])
    evidences1_padded = pad_sequence([torch.tensor(evidence1, dtype=torch.long) for evidence1 in evidences1], batch_first=True, padding_value=vocab['<pad>'])
    evidences2_padded = pad_sequence([torch.tensor(evidence2, dtype=torch.long) for evidence2 in evidences2], batch_first=True, padding_value=vocab['<pad>'])
    labels = torch.tensor(labels, dtype=torch.float)
    return claims_padded, evidences1_padded, evidences2_padded, labels


train_dataset = EvidenceRankingDataset(train_claims_indices, x1_indices, x2_indices, y)
train_loader = DataLoader(train_dataset, batch_size=1024, shuffle=True, collate_fn=collate_fn)


# 2. Model Implementation
(You can add as many code blocks and text blocks as you need. However, YOU SHOULD NOT MODIFY the section title)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SiameseNetwork(nn.Module):
    def __init__(self, embedding_matrix, hidden_dim):
        super().__init__()
        self.embedding = nn.Embedding.from_pretrained(embedding_matrix, freeze=False)
        self.gru = nn.GRU(embedding_matrix.size(1), hidden_dim, batch_first=True)

    def forward_one(self, claim, evidence):
        # Embed and process claim
        claim_emb = self.embedding(claim)
        _, claim_hidden = self.gru(claim_emb)
        claim_hidden = claim_hidden.squeeze(0)  # Ensure shape is [batch_size, hidden_dim]

        # Embed and process evidence
        evidence_emb = self.embedding(evidence)
        _, evidence_hidden = self.gru(evidence_emb)
        evidence_hidden = evidence_hidden.squeeze(0)  # Ensure shape is [batch_size, hidden_dim]

        # Calculate cosine similarity
        similarity = F.cosine_similarity(claim_hidden, evidence_hidden, dim=1)
        return similarity

    def forward(self, claim, evidence1, evidence2):
        # Compute cosine similarity for each evidence compared to the same claim
        similarity1 = self.forward_one(claim, evidence1)
        similarity2 = self.forward_one(claim, evidence2)
        return similarity1, similarity2



In [None]:
import torch
from torch.utils.data import DataLoader, Dataset
from torch.nn import MarginRankingLoss
import torch.optim as optim

# DataLoader
#dataset = EvidenceRankingDataset(train_claims_indices, evidence_pairs, labels)
#train_loader = DataLoader(dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)

# Model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
gru_model = SiameseNetwork(embedding_matrix, hidden_dim=1024).to(device)
optimizer = optim.Adam(gru_model.parameters(), lr=0.005)
criterion = MarginRankingLoss(margin=0.7)

# Training loop
def train(model, loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for claims, evidences1, evidences2, labels in loader:
        claims, evidences1, evidences2, labels = claims.to(device), evidences1.to(device), evidences2.to(device), labels.to(device)
        optimizer.zero_grad()
        scores1, scores2 = model(claims, evidences1, evidences2)
        loss = criterion(scores1, scores2, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

for epoch in range(10):
    epoch_loss = train(gru_model, train_loader, optimizer, criterion, device)
    print(f"Epoch {epoch+1}, Loss: {epoch_loss:.4f}")


Epoch 1, Loss: 0.6997
Epoch 2, Loss: 0.5854
Epoch 3, Loss: 0.4496
Epoch 4, Loss: 0.4108
Epoch 5, Loss: 0.3654
Epoch 6, Loss: 0.3287
Epoch 7, Loss: 0.3149
Epoch 8, Loss: 0.2557
Epoch 9, Loss: 0.2396
Epoch 10, Loss: 0.2347


In [None]:
def evaluate_evidence_retrieval(predicted_indices_list, actual_indices_list, k=5):
    assert len(predicted_indices_list) == len(actual_indices_list), "Both inputs must have the same length."

    total_recall = 0.0
    total_precision = 0.0
    total_fscore = 0.0
    num_claims = len(predicted_indices_list)

    for predicted_indices, actual_indices in zip(predicted_indices_list, actual_indices_list):
        # Retrieve the top k predictions
        top_k_predicted = set(predicted_indices[:k])
        actual_indices_set = set(actual_indices)

        # Calculate the number of correct predictions
        correct_predictions = len(top_k_predicted.intersection(actual_indices_set))

        # Calculate metrics
        if correct_predictions > 0:
            recall = float(correct_predictions) / len(actual_indices_set)
            precision = float(correct_predictions) / k
            if (precision + recall) != 0:
                fscore = 2 * (precision * recall) / (precision + recall)
            else:
                fscore = 0.0
        else:
            recall = 0.0
            precision = 0.0
            fscore = 0.0

        # Accumulate the metrics to calculate averages later
        total_recall += recall
        total_precision += precision
        total_fscore += fscore

    # Calculate average metrics
    average_recall = total_recall / num_claims
    average_precision = total_precision / num_claims
    average_fscore = total_fscore / num_claims

    return {
        "average_recall": average_recall,
        "average_precision": average_precision,
        "average_fscore": average_fscore
    }

# Example usage:
predicted_indices_list = [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]
actual_indices_list = [[1, 2, 3], [7, 11, 8, 10]]
results = evaluate_evidence_retrieval(predicted_indices_list, actual_indices_list)
print(results)


{'average_recall': 0.75, 'average_precision': 0.5, 'average_fscore': 0.5972222222222222}


# 3.Testing and Evaluation
(You can add as many code blocks and text blocks as you need. However, YOU SHOULD NOT MODIFY the section title)

In [None]:
import random

# def prepare_evaluation_set(dev_claims_text, evidence_text_processed, top_indices):
#     eval_claims = []
#     eval_evidences = []
#     eval_evidences_idxs = []

#     for i, claim in enumerate(dev_claims_text):
#       claim_top_indices = top_indices[i]

#       for idx in claim_top_indices:
#         #idx = idx.item()
#         eval_claims.append(claim)
#         eval_evidences.append(evidence_text_processed[idx])
#         eval_evidences_idxs.append(idx)


#     return eval_claims, eval_evidences, eval_evidences_idxs

# dev_claims, dev_evidences, dev_evidences_idxs = prepare_evaluation_set(dev_claims_text_precessed, evidence_text_processed, dev_top_indices)

def convert_to_indices(claims, all_evidences, vocab):
    claims_indices = [torch.tensor([vocab.get(token, vocab['<unk>']) for token in claim], dtype=torch.long) for claim in claims]
    all_evidences_indices = [[torch.tensor([vocab.get(token, vocab['<unk>']) for token in evidence], dtype=torch.long) for evidence in evidences] for evidences in all_evidences]
    return claims_indices, all_evidences_indices

# dev_claims_indices, dev_evidence_indices, _ = convert_to_indices(dev_claims, dev_evidences, dev_evidences, vocab)

def prepare_evaluation_set(dev_claims_text, evidence_text_processed, top_indices):
    eval_claims = []
    eval_all_evidences = []
    eval_all_evidence_idxs = []

    # Iterate over each claim
    for i, claim in enumerate(dev_claims_text):
        claim_evidences = []
        claim_evidence_idxs = []

        # Get the list of top indices for the current claim
        claim_top_indices = top_indices[i]

        # Append each evidence corresponding to the current claim
        for idx in claim_top_indices:
            claim_evidences.append(evidence_text_processed[idx])
            claim_evidence_idxs.append(idx)

        # Append the aggregated evidence lists and their indices for the current claim
        eval_claims.append(claim)
        eval_all_evidences.append(claim_evidences)
        eval_all_evidence_idxs.append(claim_evidence_idxs)

    return eval_claims, eval_all_evidences, eval_all_evidence_idxs

# Example usage
dev_claims, dev_all_evidences, dev_all_evidence_idxs = prepare_evaluation_set(dev_claims_text_precessed, evidence_text_processed, dev_top_indices)
dev_claims_indices, dev_all_evidence_indices = convert_to_indices(dev_claims, dev_all_evidences, vocab)


In [None]:
from torch.utils.data import DataLoader, Dataset
from torch.nn.utils.rnn import pad_sequence
class EvaluateDataset(Dataset):
    def __init__(self, claims, all_evidences, all_evidence_idxs):
        self.claims = claims
        self.all_evidences = all_evidences
        self.all_evidence_idxs = all_evidence_idxs

    def __len__(self):
        return len(self.claims)

    def __getitem__(self, idx):
        return self.claims[idx], self.all_evidences[idx], self.all_evidence_idxs[idx]

def collate_fn(batch):
    claims, all_evidences, all_evidence_idxs = zip(*batch)
    claims_padded = pad_sequence([claim for claim in claims], batch_first=True, padding_value=vocab['<pad>'])

    # Handling variable number of evidences per claim
    all_evidences_padded = [pad_sequence([evidence for evidence in evidences], batch_first=True, padding_value=vocab['<pad>']) for evidences in all_evidences]

    return claims_padded, all_evidences_padded, all_evidence_idxs

# DataLoader creation
eval_dataset = EvaluateDataset(dev_claims_indices, dev_all_evidence_indices, dev_all_evidence_idxs)
eval_loader = DataLoader(eval_dataset, batch_size=1, shuffle=False, collate_fn=collate_fn)

In [None]:
def evaluate(model, data_loader, device, k=5):
    model.eval()
    results = []
    with torch.no_grad():
        for claims, all_evidences, all_evidence_idxs in data_loader:
            scores = []
            sorted_evidence_idxs = []

            # Assuming each batch contains one claim with all its evidences
            claim = claims.to(device)

            # Process each evidence set
            for evidences, evidence_idxs in zip(all_evidences, all_evidence_idxs):
                evidence_scores = []
                for evidence in evidences:
                    evidence = evidence.to(device)
                    score = model.forward_one(claim, evidence)
                    evidence_scores.append(score)

                evidence_scores = torch.tensor(evidence_scores)
                #probabilities = F.softmax(evidence_scores, dim=0)

                #zip(evidence_scores, evidence_idxs)
                sorted_indices = torch.argsort(evidence_scores, descending=True)
                top_k_indices = sorted_indices[:k]

                top_k_evidence_indices = [evidence_idxs[idx].item() for idx in top_k_indices]
                sorted_evidence_idxs.append(top_k_evidence_indices)

            results.append(sorted_evidence_idxs)

    return results


# Assume eval_loader is properly defined, as well as gru_model and device
sorted_indices_per_claim = evaluate(gru_model, eval_loader, device, k=5)

In [None]:
sorted_indices_per_claim[1]

[[208053, 142506, 36224, 213568, 1099333]]

In [None]:
def calculate_metrics(predicted_indices, ground_truth_indices):
    total_precision = 0
    total_recall = 0
    total_f1 = 0
    num_claims = len(predicted_indices)

    for predicted, true in zip(predicted_indices, ground_truth_indices):
        true_set = set(true)
        predicted_set = set(item for sublist in predicted for item in sublist)
        #print(predicted_set)

        # Calculate intersections for true positives
        true_positives = len(predicted_set & true_set)

        #print(true_set)

        # Calculate precision and recall
        if len(predicted_set) == 0:
            precision = 0
        else:
            precision = true_positives / len(predicted_set)

        if len(true_set) == 0:
            recall = 0
        else:
            recall = true_positives / len(true_set)

        # Calculate F1 score
        if precision + recall == 0:
            f1 = 0
        else:
            f1 = 2 * (precision * recall) / (precision + recall)

        # Accumulate metrics to compute average later
        total_precision += precision
        total_recall += recall
        total_f1 += f1

    # Compute average metrics
    average_precision = total_precision / num_claims
    average_recall = total_recall / num_claims
    average_f1 = total_f1 / num_claims

    return average_precision, average_recall, average_f1

# Usage
average_precision, average_recall, average_f1 = calculate_metrics(sorted_indices_per_claim, dev_evidence_idxs)
print(f"Average Precision: {average_precision:.4f}")
print(f"Average Recall: {average_recall:.4f}")
print(f"Average F1 Score: {average_f1:.4f}")

Average Precision: 0.0455
Average Recall: 0.0742
Average F1 Score: 0.0526


In [None]:
dev_evidence_idxs[1]

[996421, 1080858, 208053, 699212, 832334]

In [None]:
# Define the path
save_path = '/content/drive/MyDrive/nlp/data/gru_model.bin'

# Saving the model's state dictionary
torch.save(gru_model.state_dict(), save_path)

#model.save(gru_model.state_dict(), '/content/drive/MyDrive/nlp/data/gru_model.bin')

In [None]:
# Define the path
load_path = '/content/drive/MyDrive/nlp/data/gru_model.bin'

# Loading the model's state dictionary
gru_model = SiameseNetwork(embedding_matrix, hidden_dim=128)  # Re-create the model structure
gru_model.load_state_dict(torch.load(load_path))

<All keys matched successfully>

## Object Oriented Programming codes here

*You can use multiple code snippets. Just add more if needed*