In [4]:
import torch
from transformers import pipeline
import os
import numpy
print(os.getcwd())

# setting device on GPU if available, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()

if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3, 1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3, 1), 'GB')

# import transformers
# print("Torch version:", torch.__version__)
# print("Transformers version:", transformers.__version__)
# print("Transformers detects framework:", transformers.file_utils.is_torch_available())

# # Now the pipeline should work
# classifier = pipeline(
#     "zero-shot-classification"
# )

# result = classifier(
#     "According to all known laws of aviation, there is no way that a bee should be able to fly. Its wings are too small to get its fat little body off the ground.",
#     candidate_labels=["SUPPORTS", "REFUTES", "DISPUTED", "NOT_ENOUGH_INFO"]
# )

# print(result)

import json
import pandas as pd
from transformers import pipeline

# === Load data ===
with open("processed/retrieved_train_evidence.json") as f:
    claim_to_evid = json.load(f)

with open("data/train-claims.json") as f:
    claims_list = json.load(f)  # Should be a list of dicts

with open("data/evidence.json") as f:
    evidence_list = json.load(f)  # Should be a list of dicts


# Convert to dicts for easy lookup
claim_dict = {cid: cdata["claim_text"] for cid, cdata in claims_list.items()}
evidence_dict = evidence_list 


# === Set up pipeline ===
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

# === Process ===
output = []

for claim_id, evid_ids in claim_to_evid.items():
    if claim_id not in claim_dict:
        continue

    claim = claim_dict[claim_id]
    used_evidences = [evidence_dict[eid] for eid in evid_ids if eid in evidence_dict]

    if not used_evidences:
        continue

    combined_evidence = " ".join(used_evidences)

    result = classifier(
        claim,
        candidate_labels=["SUPPORTS", "REFUTES", "DISPUTED", "NOT_ENOUGH_INFO"],
        hypothesis_template="{}"
    )

    output.append({
        "claim_id": claim_id,
        "claim": claim,
        "predicted_label": result["labels"][0],
        "evidences_used": used_evidences
    })

    

# === Output ===
df_out = pd.DataFrame(output)
df_out.to_csv("predicted_claims_with_evidence.csv", index=False)


/Users/billzhu/Documents/GitHub/COMP90042_2025/Bill-Agent
Using device: cpu



Device set to use mps:0


RuntimeError: Numpy is not available