# 2024 COMP90042 Project
*Make sure you change the file name with your group id.*

# Readme
*If there is something to be noted for the marker, please mention here.*

*If you are planning to implement a program with Object Oriented Programming style, please put those the bottom of this ipynb file*

# 1.DataSet Processing
(You can add as many code blocks and text blocks as you need. However, YOU SHOULD NOT MODIFY the section title)

In [None]:
# prompt: mount drive

from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import torchdata.datapipes as dp
import torchtext.transforms as T
import spacy
from torchtext.vocab import build_vocab_from_iterator
eng = spacy.load("en_core_web_sm")

In [None]:
import pandas as pd

evidences = pd.read_json('/content/drive/MyDrive/nlp/data/evidence.json', orient='index')
train_claims = pd.read_json('/content/drive/MyDrive/nlp/data/train-claims.json', orient='index')
dev_claims = pd.read_json('/content/drive/MyDrive/nlp/data/dev-claims.json', orient='index')

#update column names
evidences.reset_index(inplace=True)
evidences.columns = ['evidence_id', 'evidence_text']

train_claims.reset_index(inplace=True)
train_claims.rename(columns={'index': 'claim_id'}, inplace=True)

dev_claims.reset_index(inplace=True)
dev_claims.rename(columns={'index': 'claim_id'}, inplace=True)

evidence_id = evidences['evidence_id']
evidence_text = evidences['evidence_text']
evidence_idx = evidences.index.tolist()

evidence_id_dict = dict(zip(evidence_id, evidence_idx))

train_claims_text = train_claims['claim_text']
train_evidence_ids = train_claims['evidences']
#map evidence_id to their corrosponding index for faster processing
train_evidence_idxs = train_evidence_ids.apply(lambda x: [evidence_id_dict[evidence_id] for evidence_id in x])

dev_claims_text = dev_claims['claim_text']
dev_evidence_ids = dev_claims['evidences']
dev_evidence_idxs = dev_evidence_ids.apply(lambda x: [evidence_id_dict[evidence_id] for evidence_id in x])

In [None]:
#text preprocessing
import nltk
from nltk.tokenize import TweetTokenizer
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('punkt')

tt = TweetTokenizer()
stopwords = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def preprocess_data(text):
    tokens = tt.tokenize(text)

    processed_tokens = []

    for token in tokens:
        token = token.lower()
        if token not in stopwords and token.isalpha():
            lemma = lemmatizer.lemmatize(token)
            processed_tokens.append(lemma)

    return processed_tokens

train_claims_text_processed = train_claims_text.apply(preprocess_data)
dev_claims_text_precessed = dev_claims_text.apply(preprocess_data)
evidence_text_processed = evidence_text.apply(preprocess_data)

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [None]:
from gensim.models import Word2Vec

# Train Word2Vec model
model = Word2Vec(sentences=evidence_text_processed, vector_size=300, window=10, min_count=3, workers=8, sg=1, hs=0)

In [None]:
# Save the Word2Vec model
model.save('/content/drive/MyDrive/nlp/data/word2vec_model.bin')


In [None]:
import json
json.dump(train_claims_text_processed.tolist(), open("/content/drive/MyDrive/nlp/data/train_claims_text_processed.json", "w"))
json.dump(dev_claims_text_precessed.tolist(), open("/content/drive/MyDrive/nlp/data/dev_claims_text_precessed.json", "w"))
json.dump(evidence_text_processed.tolist(), open("/content/drive/MyDrive/nlp/data/evidence_text_processed.json", "w"))

In [None]:
import json

train_claims_text_processed = json.load(open("/content/drive/MyDrive/nlp/data/train_claims_text_processed.json", "r"))
dev_claims_text_precessed = json.load(open("/content/drive/MyDrive/nlp/data/dev_claims_text_precessed.json", "r"))
evidence_text_processed = json.load(open("/content/drive/MyDrive/nlp/data/evidence_text_processed.json", "r"))

In [None]:
from gensim.models import Word2Vec

model = Word2Vec.load('/content/drive/MyDrive/nlp/data/word2vec_model.bin')

In [None]:
import numpy as np
def generate_embedding(text, model):
    # Filter out words that are not in the Word2Vec model's vocabulary
    words = [word for word in text if word in model.wv.key_to_index]
    if not words:  # Handle cases where none of the words are in the vocabulary
        return np.zeros(model.vector_size)
    # Get embeddings for each word in the text and average them
    word_embeddings = [model.wv[word] for word in words]
    sentence_embedding = np.mean(word_embeddings, axis=0)
    return sentence_embedding

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np

# Generate embeddings for train claim_text
train_claim_text_embeddings = [generate_embedding(text, model) for text in train_claims_text_processed]

# Generate embeddings for dev claim_text
dev_claim_text_embeddings = [generate_embedding(text, model) for text in dev_claims_text_precessed]

# Generate embeddings for all evidence texts
evidence_embeddings = [generate_embedding(text, model) for text in evidence_text_processed]




In [None]:
from sklearn.metrics.pairwise import cosine_similarity

# Function to compute cosine similarity scores for all claims and evidence embeddings
def compute_similarity_scores(claim_embeddings, evidence_embeddings):
    similarity_scores = cosine_similarity(claim_embeddings, evidence_embeddings)
    return similarity_scores


# Compute cosine similarity scores for training claims and evidence embeddings
train_similarity_scores = compute_similarity_scores(train_claim_text_embeddings, evidence_embeddings)

# Compute cosine similarity scores for development claims and evidence embeddings
dev_similarity_scores = compute_similarity_scores(dev_claim_text_embeddings, evidence_embeddings)

In [None]:
import torch
# Function to compute recall at k
def compute_recall_at_k(similarity_scores, true_indices, k):
    recall_values = []

    # Convert similarity scores to PyTorch tensor
    similarity_scores_tensor = torch.FloatTensor(similarity_scores)

    # Get top k indices for each sample
    top_k_indices = torch.topk(similarity_scores_tensor, k, dim=-1).indices.tolist()

    for i in range(len(true_indices)):
        # Get true indices for the i-th sample
        true_indices_i = true_indices[i]

        # Calculate recall count for the i-th sample
        recall_count = sum(1 for idx in true_indices_i if idx in top_k_indices[i])

        # Calculate recall for the i-th sample
        recall = recall_count / len(true_indices_i)

        # Store recall value
        recall_values.append(recall)

    # Compute average recall over all samples
    avg_recall = sum(recall_values) / len(recall_values)

    return avg_recall

# Example usage:
# Compute recall at k for training set
train_recall_at_k = compute_recall_at_k(train_similarity_scores, train_evidence_idxs, 15)
print("Training Recall at K:", train_recall_at_k)

# Compute recall at k for dev set
dev_recall_at_k = compute_recall_at_k(dev_similarity_scores, dev_evidence_idxs, 10)
print("Dev Recall at K:", dev_recall_at_k)

Training Recall at K: 0.1888707926167208
Dev Recall at K: 0.17651515151515146


In [None]:
import torch
def get_top_k_indices(similarity_scores, k=1000):
    # Args:
    # similarity_scores: a PyTorch tensor of shape (num_claims, num_evidences)
    # Returns:
    # top_k_indices: a tensor of shape (num_claims, k) containing indices of the top k elements

    # Convert numpy array to PyTorch tensor if needed
    if isinstance(similarity_scores, np.ndarray):
        similarity_scores_tensor = torch.FloatTensor(similarity_scores)
    else:
        similarity_scores_tensor = similarity_scores

    # Get top k indices for each sample using PyTorch
    top_k_values, top_k_indices = torch.topk(similarity_scores_tensor, k, dim=1, largest=True, sorted=True)

    return top_k_indices, top_k_values

# Compute top 1000 indices for training and development sets
train_top_indices, _ = get_top_k_indices(train_similarity_scores, k=100)
dev_top_indices, dev_orig_scores = get_top_k_indices(dev_similarity_scores, k=100)

In [None]:
train_top_indices[0]

In [None]:
import torch
import numpy as np

embedding_dim = model.vector_size
vocab = {word: idx for idx, word in enumerate(model.wv.index_to_key)}

# Calculate the current maximum index in the vocabulary
max_index = max(vocab.values())


# vocab['<cls>'] = max_index + 1
# vocab['<sep>'] = max_index + 2
vocab['<unk>'] = max_index + 1
vocab['<pad>'] = max_index + 2

#vocab['<unk>'] = len(vocab)

# Create random embeddings for the three new special tokens
random_embeddings = np.random.randn(1, embedding_dim)

# Extend the embedding matrix with random embeddings for special tokens
padding_embeddings = np.zeros((1, embedding_dim))  # Typically zero vector for padding
extended_embeddings = np.vstack([
    model.wv.vectors,  # Existing embeddings from Word2Vec
    random_embeddings,  # Embeddings for `<unk>`
    padding_embeddings  # Zero embeddings for `<pad>`
])
embedding_matrix = torch.FloatTensor(extended_embeddings)

# Check new size of the embedding matrix and the maximum index in the vocabulary
print(f"New size of embedding matrix: {embedding_matrix.size(0)}")
print(f"Maximum index in vocab: {max(vocab.values())}")


New size of embedding matrix: 144508
Maximum index in vocab: 144507


In [None]:
def text_to_indices(text, vocab):
    return [vocab.get(token, vocab['<unk>']) for token in text]

def convert_to_indices(train_claims, x1, x2, vocab):
    train_claims_indices = []
    x1_indices = []
    x2_indices = []

    # Convert train claims to indices
    for claim in train_claims:
        train_claims_indices.append(text_to_indices(claim, vocab))

    # Convert x1 and x2 to indices
    for evidence in x1:
        x1_indices.append(text_to_indices(evidence, vocab))
    for evidence in x2:
        x2_indices.append(text_to_indices(evidence, vocab))

    return train_claims_indices, x1_indices, x2_indices

train_claims_indices, x1_indices, x2_indices = convert_to_indices(train_claims, x1, x2, vocab)

# def convert_to_indicies(claims, evidences, vocab):
#     train_claims_indices = []
#     evidences_indices = []

#     for claim in claims:
#         train_claims_indices.append(text_to_indices(claim, vocab))

#     for evidence in evidences:
#         evidences_indices.append(text_to_indices(evidence, vocab))

#     return train_claims_indices, evidences_indices

# train_claims_indices, evidences_indices = convert_to_indicies(train_claims, train_evidences, vocab)


In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
import random
def text_to_indices(text, vocab):
    #print(text)
    return [vocab.get(token, vocab['<unk>']) for token in text]

class ListwiseRankingDataset(Dataset):
    def __init__(self, claims, evidences, true_indices, top_k_indices, k=3000, neg_samples = 32):
        self.claims = claims
        self.evidences = evidences
        self.true_indices = true_indices
        self.top_k_indices = top_k_indices
        self.k = k
        self.neg_samples = neg_samples

    def __len__(self):
        return len(self.claims)

    def __getitem__(self, idx):
        claim = self.claims[idx]
        true_idxs = self.true_indices[idx]
        top_k_indices = self.top_k_indices[idx][:self.k]
        valid_indices = [i for i in true_idxs if i in top_k_indices]
        if not valid_indices:
            return None

        pos_idx = random.choice(valid_indices)
        pos_evidence = self.evidences[pos_idx]

        neg_indices = [i for i in top_k_indices if i not in valid_indices]
        neg_evidences = random.sample([self.evidences[neg_idx] for neg_idx in neg_indices], min(self.neg_samples, len(neg_indices))) # Ensure we do not exceed available negatives

        return claim, pos_evidence, neg_evidences


from torch.nn.utils.rnn import pad_sequence
import torch

def collate_fn(batch):
    # Remove None items that were skipped in the dataset
    batch = [item for item in batch if item is not None]

    if not batch:
        # If all items are None, return None. This needs to be handled in the training loop.
        return None

    claims, pos_evidences, neg_evidences_lists = zip(*batch)

    # Convert claims and evidences to indices
    claims_indices = [text_to_indices(claim, vocab) for claim in claims]
    pos_indices = [text_to_indices(evidence, vocab) for evidence in pos_evidences]
    neg_indices = [text_to_indices(neg, vocab) for sublist in neg_evidences_lists for neg in sublist]

    # Pad all sequences
    claims_padded = pad_sequence([torch.tensor(ci, dtype=torch.long) for ci in claims_indices], batch_first=True, padding_value=vocab['<pad>'])
    pos_padded = pad_sequence([torch.tensor(pi, dtype=torch.long) for pi in pos_indices], batch_first=True, padding_value=vocab['<pad>'])
    neg_padded = pad_sequence([torch.tensor(ni, dtype=torch.long) for ni in neg_indices], batch_first=True, padding_value=vocab['<pad>'])

    # Now that neg_padded is fully defined, you can reshape it
    if neg_padded.numel() > 0:  # Check to make sure there are elements to avoid size mismatch
        neg_padded = neg_padded.view(len(batch), -1, neg_padded.size(1))

    return claims_padded, pos_padded, neg_padded

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
import random

def text_to_indices(text, vocab):
    #print(text)
    return [vocab.get(token, vocab['<unk>']) for token in text]



class ListwiseRankingDataset(Dataset):
    def __init__(self, claims, evidences, true_indices):
        self.claims = claims
        self.evidences = evidences
        self.pairs = self._create_pairs(true_indices)

    def _create_pairs(self, true_indices):
        # Create all possible (claim_index, pos_evidence_index) pairs
        pairs = []
        for claim_idx, indices in enumerate(true_indices):
            for pos_idx in indices:
                pairs.append((claim_idx, pos_idx))
        return pairs

    def __len__(self):
        return len(self.pairs)

    def __getitem__(self, idx):
        claim_idx, pos_idx = self.pairs[idx]
        claim = self.claims[claim_idx]
        pos_evidence = self.evidences[pos_idx]
        return claim, pos_evidence


from torch.nn.utils.rnn import pad_sequence
import torch

def collate_fn(batch):
    claims, pos_evidences = zip(*batch)

    # Prepare claims and pos_evidences
    claims_indices = [text_to_indices(claim, vocab) for claim in claims]
    pos_indices = [text_to_indices(evidence, vocab) for evidence in pos_evidences]

    # Pad sequences for claims and positive evidences
    claims_padded = pad_sequence([torch.tensor(ci, dtype=torch.long) for ci in claims_indices], batch_first=True, padding_value=vocab['<pad>'])
    pos_padded = pad_sequence([torch.tensor(pi, dtype=torch.long) for pi in pos_indices], batch_first=True, padding_value=vocab['<pad>'])

    # Generate in-batch negatives:
    # Each claim gets the positive samples of all other claims as its negatives.
    neg_padded_list = []
    for i in range(len(batch)):
        neg_samples = [pos_indices[j] for j in range(len(batch)) if i != j]
        neg_padded = pad_sequence([torch.tensor(ni, dtype=torch.long) for ni in neg_samples], batch_first=True, padding_value=vocab['<pad>'])
        neg_padded_list.append(neg_padded)

    # Pad the list of negative batches to ensure they all have the same shape
    # Find the maximum length of any sequence in any batch
    max_length = max([np.size(1) for np in neg_padded_list])

    # Pad each batch of negatives to this maximum length
    neg_padded_uniform = [pad_sequence(batch, batch_first=True, padding_value=vocab['<pad>'], max_length=max_length) for batch in neg_padded_list]

    # Stack the uniformly padded negative samples
    neg_padded_stack = torch.stack(neg_padded_uniform, dim=0)

    return claims_padded, pos_padded, neg_padded_stack




# 2. Model Implementation
(You can add as many code blocks and text blocks as you need. However, YOU SHOULD NOT MODIFY the section title)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# class SiameseNetwork(nn.Module):
#     def __init__(self, embedding_matrix, hidden_dim):
#         super().__init__()
#         self.embedding = nn.Embedding.from_pretrained(embedding_matrix, freeze=False)
#         self.gru = nn.GRU(embedding_matrix.size(1), hidden_dim, batch_first=True)

#     def forward_one(self, claim, evidence):
#         # Embed and process claim
#         claim_emb = self.embedding(claim)
#         _, claim_hidden = self.gru(claim_emb)
#         claim_hidden = claim_hidden.squeeze(0)  # Ensure shape is [batch_size, hidden_dim]

#         # Embed and process evidence
#         evidence_emb = self.embedding(evidence)
#         _, evidence_hidden = self.gru(evidence_emb)
#         evidence_hidden = evidence_hidden.squeeze(0)  # Ensure shape is [batch_size, hidden_dim]

#         # Calculate cosine similarity
#         similarity = F.cosine_similarity(claim_hidden, evidence_hidden, dim=1)
#         return similarity

#     def forward(self, claim, evidence1, evidence2):
#         # Compute cosine similarity for each evidence compared to the same claim
#         similarity1 = self.forward_one(claim, evidence1)
#         similarity2 = self.forward_one(claim, evidence2)
#         return similarity1, similarity2

import torch
import torch.nn as nn
import torch.nn.functional as F
class Attention(nn.Module):
    def __init__(self, hidden_dim):
        super().__init__()
        self.attn = nn.Linear(hidden_dim, 1)  # Adjusted for unidirectional GRU

    def forward(self, outputs):
        attn_weights = torch.tanh(self.attn(outputs))
        attn_weights = F.softmax(attn_weights, dim=1)
        context = (attn_weights * outputs).sum(dim=1)
        return context

class SiameseNetwork(nn.Module):
    def __init__(self, embedding_matrix, hidden_dim, dropout_rate=0.5):
        super().__init__()
        self.embedding = nn.Embedding.from_pretrained(embedding_matrix, freeze=False)
        self.hidden_dim = hidden_dim
        # Single layer unidirectional GRU
        self.gru = nn.GRU(embedding_matrix.size(1), hidden_dim, num_layers=1, batch_first=True, dropout=0)
        self.dropout = nn.Dropout(dropout_rate)
        self.attention = Attention(hidden_dim)  # Adjusted for unidirectional output

    def forward_one(self, text):
        text_emb = self.embedding(text)
        text_out, _ = self.gru(text_emb)
        text_out = self.dropout(text_out)
        text_context = self.attention(text_out)
        return text_context

    def forward(self, claims, evidences):
        claim_contexts = self.forward_one(claims)
        flattened_evidences = evidences.view(-1, evidences.size(-1))
        evidence_contexts = self.forward_one(flattened_evidences)
        evidence_contexts = evidence_contexts.view(claims.size(0), -1, evidence_contexts.size(-1))
        similarities = F.cosine_similarity(claim_contexts.unsqueeze(1), evidence_contexts, dim=2)
        return similarities

In [None]:
import torch.nn.functional as F

def listwise_loss(model, claims_emb, pos_evidences_emb, neg_evidences_emb, ignore_index=vocab['<pad>']):
    # Get scores for positive and negative evidences
    pos_scores = model(claims_emb, pos_evidences_emb).unsqueeze(1)  # Ensure this is [batch_size, 1]
    neg_scores = torch.stack([model(claims_emb, neg) for neg in neg_evidences_emb.transpose(0, 1)], dim=1)  # Should be [batch_size, num_negatives]

    # Concatenate pos_scores and neg_scores along dim=1
    scores = torch.cat((pos_scores, neg_scores), dim=1)  # Shape should be [batch_size, 1 + num_negatives]
    scores = scores.squeeze(-1)  # Remove the unnecessary last dimension
    scores = F.log_softmax(scores, dim=1)  # Apply softmax to scores

    # Create target tensor where the index of positive examples is always 0
    # Assume target can potentially have padding indices which should be ignored
    target = torch.zeros(scores.size(0), dtype=torch.long, device=scores.device)
    # Set padding indices manually if required, or ensure they are set before calling this function
    # Example: target[some_condition] = ignore_index

    # Calculate and return NLL Loss with ignore_index
    return F.nll_loss(scores, target, ignore_index=ignore_index)

In [None]:
import torch
from torch.utils.data import DataLoader
import torch.optim as optim

# Assume vocab and embedding_matrix are predefined
# Assume the dataset is already loaded with ListwiseRankingDataset
dataset = ListwiseRankingDataset(train_claims_text_processed, evidence_text_processed, train_evidence_idxs, train_top_indices)
dataloader = DataLoader(dataset, batch_size=64, shuffle=True, collate_fn=collate_fn)

# Initialize model
gru_model = SiameseNetwork(embedding_matrix, hidden_dim=128).to(device)
gru_model.train()

# Optimizer
optimizer = optim.Adam(gru_model.parameters(), lr=0.001)

# Loss Function
margin = 1.0  # Define the margin for the margin ranking loss
loss_fn = nn.MarginRankingLoss(margin=margin)

# Training loop
import torch

num_epochs = 10
for epoch in range(num_epochs):
    total_loss = 0
    for batch in dataloader:
        if batch is None:
            continue

        claims_padded, pos_padded, neg_padded = batch

        # Move tensors to GPU if available
        claims_padded = claims_padded.to(device)
        pos_padded = pos_padded.to(device)
        neg_padded = neg_padded.to(device)

        # Compute the positive score
        pos_score = gru_model(claims_padded, pos_padded)  # [batch_size, 1]

        # Initialize loss for the batch
        batch_loss = 0

        # Iterate over each negative in the padded negatives
        # Assume neg_padded is [batch_size, num_negatives, seq_length]
        for i in range(neg_padded.size(1)):  # num_negatives dimension
            neg_single_padded = neg_padded[:, i, :]  # Select the ith negative for each element in the batch
            neg_score = gru_model(claims_padded, neg_single_padded)  # [batch_size, 1]

            # Calculate the loss for the current negative
            target = torch.ones_like(pos_score)  # [batch_size, 1]
            loss = loss_fn(pos_score, neg_score, target)
            batch_loss += loss

        # Aggregate losses from all negatives before the backward pass
        optimizer.zero_grad()
        batch_loss.backward()  # Only call backward once all losses have been accumulated
        optimizer.step()
        total_loss += batch_loss.item()

    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {total_loss / len(dataloader)}')





Epoch 1/10, Loss: 13.388742923915673
Epoch 2/10, Loss: 8.429342570535551
Epoch 3/10, Loss: 6.302742189620263
Epoch 4/10, Loss: 5.817699965985062
Epoch 5/10, Loss: 8.977933188954081
Epoch 6/10, Loss: 5.404826521980935
Epoch 7/10, Loss: 3.286890445671116
Epoch 8/10, Loss: 3.8300782826084356
Epoch 9/10, Loss: 4.4999058139868655
Epoch 10/10, Loss: 4.582532592726728


In [None]:
import torch
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import StepLR
# Example usage
# Define the dataset and dataloader
dataset = ListwiseRankingDataset(train_claims_text_processed, evidence_text_processed, train_evidence_idxs)
train_loader = DataLoader(dataset, batch_size=64, shuffle=True, collate_fn=collate_fn)


# Model and optimizer
# gru_model = SiameseNetwork(embedding_matrix, hidden_dim=128, dropout_rate=0.5)
# optimizer = optim.Adam(gru_model.parameters(), lr=0.001)

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

gru_model = SiameseNetwork(embedding_matrix, hidden_dim=128, dropout_rate=0.5).to(device)
optimizer = optim.Adam(gru_model.parameters(), lr=0.001)
scheduler = StepLR(optimizer, step_size=10, gamma=0.1)
# Training loop
num_epochs = 10
gru_model.train()
for epoch in range(num_epochs):
    total_loss = 0
    for batch in train_loader:
        if batch is None:  # Skip batches that are None
            continue
        claims, pos_evidences, neg_evidences = batch
        claims = claims.to(device)
        pos_evidences = pos_evidences.to(device)
        neg_evidences = neg_evidences.to(device)

        optimizer.zero_grad()
        #pos_scores = gru_model(claims, pos_evidences)
        #neg_scores = torch.stack([gru_model(claims, neg) for neg in neg_evidences.transpose(0, 1)], dim=1)
        loss = listwise_loss(gru_model, claims, pos_evidences, neg_evidences)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    scheduler.step()
    print(f'Epoch {epoch+1}, Loss: {total_loss / len(train_loader)}')

# Note: You may want to add more sophisticated error handling, validation, and model saving mechanisms.


TypeError: pad_sequence() got an unexpected keyword argument 'max_length'

In [None]:
from torch.nn.utils.rnn import pad_sequence
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def score_query(model, query, evidences, vocab, pad_idx):
    # Convert query and evidences to indices using the same function as during training
    query_indices = text_to_indices(query, vocab)  # Query to indices
    evidence_indices = [text_to_indices(evidence, vocab) for evidence in evidences]  # Evidences to indices

    # Convert lists to tensors and pad
    query_tensor = pad_sequence([torch.tensor(query_indices)], batch_first=True, padding_value=pad_idx)
    evidence_tensors = pad_sequence([torch.tensor(ei) for ei in evidence_indices], batch_first=True, padding_value=pad_idx)

    # Assuming your model and data are on the same device
    query_tensor = query_tensor.to(device)
    evidence_tensors = evidence_tensors.to(device)

    # Set the model to evaluation mode and disable gradient computation
    model.eval()
    scores = []
    with torch.no_grad():
        # Process all evidences in one batch for efficiency
        for i in range(evidence_tensors.shape[0]):
            score = model(query_tensor, evidence_tensors[i].unsqueeze(0))
            scores.append(score.item())

    return scores

dev_scores = []
for idx in range(len(dev_claims_text_precessed)):
    top_k_evidence_idxs = dev_top_indices[idx]
    top_k_evidences = [evidence_text_processed[i] for i in top_k_evidence_idxs]
    scores = score_query(gru_model, dev_claims_text_precessed[idx], top_k_evidences, vocab, vocab['<pad>'])
    dev_scores.append(scores)


In [None]:
dev_scores[0][5]

0.973311185836792

In [None]:
def aggregate_scores(original_scores, new_scores, lam=0.1):
    """
    Aggregate scores from two different sources using a weighted sum approach.

    Parameters:
    - original_scores: List of lists containing scores from the original model.
    - new_scores: List of lists containing scores from the new model.
    - lam: Weighting factor for new_scores; 1 - lam will be the weight for the original_scores.

    Returns:
    - List of lists containing aggregated scores.
    """
    aggregated_scores = []
    for original, new in zip(original_scores, new_scores):
        # Calculate the weighted sum of scores
        aggregated = [(1 - lam) * o + lam * n for o, n in zip(original, new)]
        aggregated_scores.append(aggregated)
    return aggregated_scores

scores = aggregate_scores(dev_orig_scores, dev_scores)

In [None]:
reranked_indices = []

for indices, scores in zip(dev_top_indices, dev_scores):
    # Combine indices and scores into a list of tuples
    indexed_scores = list(zip(indices, scores))

    # Sort the list of tuples by the score in descending order
    sorted_by_score = sorted(indexed_scores, key=lambda x: x[1], reverse=True)

    # Extract the sorted indices
    sorted_indices = [idx.item() for idx, _ in sorted_by_score]

    # Add to the final list
    reranked_indices.append(sorted_indices)

In [None]:
def evaluate_evidence_retrieval(predicted_indices_list, actual_indices_list, k=5):
    assert len(predicted_indices_list) == len(actual_indices_list), "Both inputs must have the same length."

    total_recall = 0.0
    total_precision = 0.0
    total_fscore = 0.0
    num_claims = len(predicted_indices_list)

    for predicted_indices, actual_indices in zip(predicted_indices_list, actual_indices_list):
        # Convert tensors in predicted_indices to integers if they are not already
        predicted_indices = [index.item() if isinstance(index, torch.Tensor) else index for index in predicted_indices]

        # Retrieve the top k predictions
        top_k_predicted = set(predicted_indices[:k])
        actual_indices_set = set(actual_indices)

        # Calculate the number of correct predictions
        correct_predictions = len(top_k_predicted.intersection(actual_indices_set))

        # Calculate metrics
        if correct_predictions > 0:
            recall = float(correct_predictions) / len(actual_indices_set)
            precision = float(correct_predictions) / k
            if (precision + recall) != 0:
                fscore = 2 * (precision * recall) / (precision + recall)
            else:
                fscore = 0.0
        else:
            recall = 0.0
            precision = 0.0
            fscore = 0.0

        # Accumulate the metrics to calculate averages later
        total_recall += recall
        total_precision += precision
        total_fscore += fscore

    # Calculate average metrics
    average_recall = total_recall / num_claims
    average_precision = total_precision / num_claims
    average_fscore = total_fscore / num_claims

    return {
        "average_recall": average_recall,
        "average_precision": average_precision,
        "average_fscore": average_fscore
    }

# Example usage
results = evaluate_evidence_retrieval(reranked_indices, dev_evidence_idxs)
print(results)

{'average_recall': 0.016666666666666666, 'average_precision': 0.012987012987012986, 'average_fscore': 0.014192949907235623}


# 3.Testing and Evaluation
(You can add as many code blocks and text blocks as you need. However, YOU SHOULD NOT MODIFY the section title)

In [None]:
import random

# def prepare_evaluation_set(dev_claims_text, evidence_text_processed, top_indices):
#     eval_claims = []
#     eval_evidences = []
#     eval_evidences_idxs = []

#     for i, claim in enumerate(dev_claims_text):
#       claim_top_indices = top_indices[i]

#       for idx in claim_top_indices:
#         #idx = idx.item()
#         eval_claims.append(claim)
#         eval_evidences.append(evidence_text_processed[idx])
#         eval_evidences_idxs.append(idx)


#     return eval_claims, eval_evidences, eval_evidences_idxs

# dev_claims, dev_evidences, dev_evidences_idxs = prepare_evaluation_set(dev_claims_text_precessed, evidence_text_processed, dev_top_indices)

def convert_to_indices(claims, all_evidences, vocab):
    claims_indices = [torch.tensor([vocab.get(token, vocab['<unk>']) for token in claim], dtype=torch.long) for claim in claims]
    all_evidences_indices = [[torch.tensor([vocab.get(token, vocab['<unk>']) for token in evidence], dtype=torch.long) for evidence in evidences] for evidences in all_evidences]
    return claims_indices, all_evidences_indices

# dev_claims_indices, dev_evidence_indices, _ = convert_to_indices(dev_claims, dev_evidences, dev_evidences, vocab)

def prepare_evaluation_set(dev_claims_text, evidence_text_processed, top_indices):
    eval_claims = []
    eval_all_evidences = []
    eval_all_evidence_idxs = []

    # Iterate over each claim
    for i, claim in enumerate(dev_claims_text):
        claim_evidences = []
        claim_evidence_idxs = []

        # Get the list of top indices for the current claim
        claim_top_indices = top_indices[i]

        # Append each evidence corresponding to the current claim
        for idx in claim_top_indices:
            claim_evidences.append(evidence_text_processed[idx])
            claim_evidence_idxs.append(idx)

        # Append the aggregated evidence lists and their indices for the current claim
        eval_claims.append(claim)
        eval_all_evidences.append(claim_evidences)
        eval_all_evidence_idxs.append(claim_evidence_idxs)

    return eval_claims, eval_all_evidences, eval_all_evidence_idxs

# Example usage
dev_claims, dev_all_evidences, dev_all_evidence_idxs = prepare_evaluation_set(dev_claims_text_precessed, evidence_text_processed, dev_top_indices)
dev_claims_indices, dev_all_evidence_indices = convert_to_indices(dev_claims, dev_all_evidences, vocab)


In [None]:
from torch.utils.data import DataLoader, Dataset
from torch.nn.utils.rnn import pad_sequence
class EvaluateDataset(Dataset):
    def __init__(self, claims, all_evidences, all_evidence_idxs):
        self.claims = claims
        self.all_evidences = all_evidences
        self.all_evidence_idxs = all_evidence_idxs

    def __len__(self):
        return len(self.claims)

    def __getitem__(self, idx):
        return self.claims[idx], self.all_evidences[idx], self.all_evidence_idxs[idx]

def collate_fn(batch):
    claims, all_evidences, all_evidence_idxs = zip(*batch)
    claims_padded = pad_sequence([claim for claim in claims], batch_first=True, padding_value=vocab['<pad>'])

    # Handling variable number of evidences per claim
    all_evidences_padded = [pad_sequence([evidence for evidence in evidences], batch_first=True, padding_value=vocab['<pad>']) for evidences in all_evidences]

    return claims_padded, all_evidences_padded, all_evidence_idxs

# DataLoader creation
eval_dataset = EvaluateDataset(dev_claims_indices, dev_all_evidence_indices, dev_all_evidence_idxs)
eval_loader = DataLoader(eval_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)

In [None]:
import torch.nn.functional as F

def evaluate(model, data_loader, device, k=5):
    model.eval()
    results = []
    with torch.no_grad():
        for claims, all_evidences, all_evidence_idxs in data_loader:
            sorted_evidence_idxs = []

            # Assuming each batch contains one claim with all its evidences
            claim = claims.to(device)

            # Process each evidence set
            for evidences, evidence_idxs in zip(all_evidences, all_evidence_idxs):
                evidence_scores = []
                for evidence in evidences:
                    evidence = evidence.to(device)
                    score = model.forward_one(claim, evidence)
                    # If the output is not scalar, adjust accordingly here:
                    if score.shape[0] > 1:
                        # Example adjustment, assuming the need to take the maximum, mean, or a specific element:
                        score = score.mean()  # or score[0], or any appropriate reduction
                    evidence_scores.append(score)

                # Convert list of scores to a tensor
                evidence_scores = torch.stack(evidence_scores).squeeze()
                # Sort the scores to find top k
                sorted_indices = torch.argsort(evidence_scores, descending=True)
                top_k_indices = sorted_indices[:k]

                top_k_evidence_indices = [evidence_idxs[idx].item() for idx in top_k_indices]
                sorted_evidence_idxs.append(top_k_evidence_indices)

            results.append(sorted_evidence_idxs)

    return results





# Assume eval_loader is properly defined, as well as gru_model and device
sorted_indices_per_claim = evaluate(gru_model, eval_loader, device, k=5)

In [None]:
def calculate_metrics(predicted_indices, ground_truth_indices):
    total_precision = 0
    total_recall = 0
    total_f1 = 0
    num_claims = len(predicted_indices)

    for predicted, true in zip(predicted_indices, ground_truth_indices):
        true_set = set(true)
        predicted_set = set(item for sublist in predicted for item in sublist)
        #print(predicted_set)

        # Calculate intersections for true positives
        true_positives = len(predicted_set & true_set)

        #print(true_set)

        # Calculate precision and recall
        if len(predicted_set) == 0:
            precision = 0
        else:
            precision = true_positives / len(predicted_set)

        if len(true_set) == 0:
            recall = 0
        else:
            recall = true_positives / len(true_set)

        # Calculate F1 score
        if precision + recall == 0:
            f1 = 0
        else:
            f1 = 2 * (precision * recall) / (precision + recall)

        # Accumulate metrics to compute average later
        total_precision += precision
        total_recall += recall
        total_f1 += f1

    # Compute average metrics
    average_precision = total_precision / num_claims
    average_recall = total_recall / num_claims
    average_f1 = total_f1 / num_claims

    return average_precision, average_recall, average_f1

# Usage
average_precision, average_recall, average_f1 = calculate_metrics(sorted_indices_per_claim, dev_evidence_idxs)
print(f"Average Precision: {average_precision:.4f}")
print(f"Average Recall: {average_recall:.4f}")
print(f"Average F1 Score: {average_f1:.4f}")

Average Precision: 0.0026
Average Recall: 0.2000
Average F1 Score: 0.0051


In [None]:
dev_evidence_idxs[1]

[996421, 1080858, 208053, 699212, 832334]

In [None]:
# Define the path
save_path = '/content/drive/MyDrive/nlp/data/gru_model.bin'

# Saving the model's state dictionary
torch.save(gru_model.state_dict(), save_path)

#model.save(gru_model.state_dict(), '/content/drive/MyDrive/nlp/data/gru_model.bin')

In [None]:
# Define the path
load_path = '/content/drive/MyDrive/nlp/data/gru_model.bin'

# Loading the model's state dictionary
gru_model = SiameseNetwork(embedding_matrix, hidden_dim=128)  # Re-create the model structure
gru_model.load_state_dict(torch.load(load_path))

<All keys matched successfully>

## Object Oriented Programming codes here

*You can use multiple code snippets. Just add more if needed*