# Load data

In [1]:
import json
import torch
from torch.utils.data import DataLoader, TensorDataset
from transformers import BertTokenizer, BertForSequenceClassification, AdamW

def load_data(filename):
    with open(filename, 'r') as file:
        data = json.load(file)
    return data

train_data = load_data("train-claims.json")
dev_data = load_data("dev-claims.json")
evidence_data = load_data("evidence.json")
test_data = load_data("test-claims-unlabelled.json")

# Load the pretrained BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Map claim labels to integers
label_map = {"SUPPORTS": 0, "REFUTES": 1, "NOT_ENOUGH_INFO": 2, "DISPUTED": 3}



# Preprocessing data

In [4]:
def encode_claim_evidence(claims, evidence_data):
    input_ids = []
    attention_masks = []

    for claim in claims.values():
        claim_text = claim['claim_text']
        evidence_texts = [evidence_data[evidence_id] for evidence_id in claim.get('evidences', [])]
        text = claim_text + " " + " ".join(evidence_texts)
        encoded = tokenizer.encode_plus(
            text,
            max_length=512,
            truncation=True,
            padding='max_length',
            return_attention_mask=True,
            return_tensors='pt'
        )
        input_ids.append(encoded['input_ids'])
        attention_masks.append(encoded['attention_mask'])

    return torch.cat(input_ids), torch.cat(attention_masks)

# Encode the training and development data
train_input_ids, train_attention_masks = encode_claim_evidence(train_data, evidence_data)
dev_input_ids, dev_attention_masks = encode_claim_evidence(dev_data, evidence_data)

# Convert labels to tensors
train_labels = torch.tensor([label_map[claim['claim_label']] for claim in train_data.values()])
dev_labels = torch.tensor([label_map[claim['claim_label']] for claim in dev_data.values()])

# Create TensorDatasets
train_dataset = TensorDataset(train_input_ids, train_attention_masks, train_labels)
dev_dataset = TensorDataset(dev_input_ids, dev_attention_masks, dev_labels)

# Create DataLoaders
train_dataloader = DataLoader(train_dataset, batch_size=2, shuffle=True)
dev_dataloader = DataLoader(dev_dataset, batch_size=2, shuffle=False)

# Train model

In [6]:
from tqdm import tqdm

# Load the pretrained BERT model for sequence classification
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=4)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Train the model
optimizer = AdamW(model.parameters(), lr=2e-5)

for epoch in range(3):
    model.train()
    epoch_iterator = tqdm(train_dataloader, desc=f"Epoch {epoch+1}", unit="batch")
    for batch in epoch_iterator:
        optimizer.zero_grad()
        input_ids, attention_mask, labels = tuple(t.to(device) for t in batch)
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

KeyboardInterrupt: 

In [None]:
import numpy as np
def calculate_similarity(claim_text, evidence_texts, model, tokenizer, device):
    claim_embedding = get_embedding(claim_text, model, tokenizer, device)
    evidence_embeddings = [get_embedding(evidence_text, model, tokenizer, device) for evidence_text in evidence_texts]
    similarities = [cosine_similarity(claim_embedding, evidence_embedding) for evidence_embedding in evidence_embeddings]
    return similarities

def get_embedding(text, model, tokenizer, device):
    with torch.no_grad():
        inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512).to(device)
        outputs = model(**inputs, output_hidden_states=True)
    last_hidden_state = outputs.hidden_states[-1].squeeze(0).mean(dim=0)
    return last_hidden_state.cpu().numpy()

def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))


# Evaluation

In [None]:
reverse_label_map = {v: k for k, v in label_mapping.items()}

# Evaluate the model on the development set
model.eval()
# Get predictions and relevant evidence for the test set
test_predictions_and_evidence = []
with torch.no_grad():
    for claim_id, claim in test_data.items():
        claim_text = claim["claim_text"]
        
        # Calculate similarities between the claim and all evidence passages
        similarities = calculate_similarity(claim_text, evidence_data.values(), model, tokenizer, device)
        
        # Get the indices of the top k most similar evidence passages
        top_k_indices = np.argsort(similarities)[-k:]
        
        # Retrieve the corresponding evidence IDs
        top_k_evidence_ids = [list(evidence_data.keys())[idx] for idx in top_k_indices]
        
        # Get the claim label prediction
        input_ids, attention_mask = encode_claim_evidence({claim_id: claim}, evidence_data)
        input_ids, attention_mask = input_ids.to(device), attention_mask.to(device)
        outputs = model(input_ids.unsqueeze(0), attention_mask=attention_mask)
        pred_label = torch.argmax(outputs.logits, dim=1).item()
        
        test_predictions_and_evidence.append({
            "claim_id": claim_id,
            "claim_label": reverse_label_map[pred_label],
            "evidences": top_k_evidence_ids
        })

# Calculate the accuracy
dev_labels_list = dev_labels.tolist()
accuracy = sum(1 for p, g in zip(predictions, dev_labels_list) if p == g) / len(dev_labels_list)
print(f"Accuracy: {accuracy}")

# Make predictions on the test set
test_input_ids, test_attention_masks = encode_claim_evidence(test_data, evidence_data)
test_dataset = TensorDataset(test_input_ids, test_attention_masks)
test_dataloader = DataLoader(test_dataset, batch_size=8, shuffle=False)

# Reverse the label map
reverse_label_map = {v: k for k, v in label_map.items()}

# Get predictions for the test set
test_predictions = []
with torch.no_grad():
    for batch in test_dataloader:
        input_ids, attention_mask = tuple(t.to(device) for t in batch)
        outputs = model(input_ids, attention_mask=attention_mask)
        preds = torch.argmax(outputs.logits, dim=1)
        test_predictions.extend(preds.tolist())

# Save output

In [None]:
# Save the results to a JSON file
output_data = {item["claim_id"]: {"claim_text": test_data[item["claim_id"]]["claim_text"],
                                   "claim_label": item["claim_label"],
                                   "evidences": item["evidences"]}
               for item in test_predictions_and_evidence}

with open("output.json", "w") as outfile:
    json.dump(output_data, outfile)

# You can now use the `output.json` file with the provided evaluation script (eval.py)