In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
from torch.utils.data import DataLoader, Dataset
import pandas as pd
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm





In [2]:
class FactCheckDataset(Dataset):
    def __init__(self, data, tokenizer, max_len):
        self.data = data
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        # Extract relevant fields
        statement = self.data.iloc[index]["Statement"]
        evidence = self.data.iloc[index]["Evidence_List"]  # Use multi-evidence as a single string
        label = self.data.iloc[index]["labels"]

        # Optionally include context if available
        context = self.data.iloc[index].get("Context", None)

        # Prepare the input text
        if context:
            input_text = f"Statement: {statement} Context: {context}"
        else:
            input_text = f"Statement: {statement}"

        # Tokenize statement, context, and evidence
        inputs = self.tokenizer(
            text=input_text,
            text_pair=evidence,
            max_length=self.max_len,
            padding="max_length",
            truncation=True,
            return_tensors="pt",
        )

        # Return tokenized data and label
        return {
            "input_ids": inputs["input_ids"].squeeze(0),
            "attention_mask": inputs["attention_mask"].squeeze(0),
            "label": torch.tensor(label, dtype=torch.long),
        }

In [3]:
# Load model and tokenizer
MODEL_NAME = "ura-hcmut/ura-llama-7b-r64"
access_token = "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"

device_map = {
    "": "cpu",
    "encoder": "cpu",
    "decoder": "cpu",
    "lm_head": "cpu",
}

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, 
                                                                      trust_remote_code=True, 
                                                                      token=access_token
                                                                      )
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, 
                                                                                device_map=device_map, 
                                                                                token=access_token
                                                                                )

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

: 

In [12]:
# Load data
train = pd.read_csv('data/train_data.csv')
dev = pd.read_csv('data/dev_data.csv')
test = pd.read_csv('data/test_data.csv')

# Create datasets
max_len = 512
train_dataset = FactCheckDataset(train, tokenizer, max_len=max_len)
dev_dataset = FactCheckDataset(dev, tokenizer, max_len=max_len)
test_dataset = FactCheckDataset(test, tokenizer, max_len=max_len)

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=True)

In [None]:
# Sample formatting function
def format_prompt(row):
    """
    Formats the input into a structured prompt for fact-checking.
    """
    prompt = (
        f"Statement: {row['Statement']}\n"
        f"Context: {row['Context']}\n"
        f"Evidence List: {row['Evidence_List']}\n"
        f"Top-5 Evidence: {row['evidence_top5']}\n\n"
        f"Question: Based on the evidence and context, is the statement factual?\n"
        "Answer: "
    )
    return prompt

# Create a new column for prompts
data["prompt"] = data.apply(format_prompt, axis=1)


In [None]:
def fact_check(prompt, model, tokenizer, max_length=512, temperature=0.7):
    """
    Generates a fact-checking response using the LLaMA model.
    """
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=max_length)
    outputs = model.generate(
        input_ids=inputs.input_ids.cuda(),
        max_length=max_length + 50,
        temperature=temperature,
        do_sample=True
    )
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response.split("Answer:")[-1].strip()


In [6]:
def primary_claim_prompt(statement, context, evidence_list, top_evidence):
    return (
        f"Statement: {statement}\n"
        f"Context: {context}\n"
        f"Evidence List: {evidence_list}\n"
        f"Evidence: {top_evidence}\n\n"
        f"Task: Analyze the statement and determine its truthfulness. "
        "Consider the provided evidence and context.\n\n"
        "1. Restate the main claim.\n"
        "2. Identify any assumptions or gaps in the evidence.\n\n"
        "Answer:"
    )


def generate_counterfactuals_prompt(statement, context):
    return (
        f"Statement: {statement}\n"
        f"Context: {context}\n\n"
        "Task: Generate counterfactual scenarios where the statement would not hold true. "
        "Provide at least 3 plausible counterfactuals.\n\n"
        "Answer:"
    )


def evaluate_counterfactuals_prompt(counterfactuals, evidence_list, top_evidence):
    return (
        f"Counterfactuals: {counterfactuals}\n"
        f"Evidence List: {evidence_list}\n"
        f"Top-5 Evidence: {top_evidence}\n\n"
        "Task: Evaluate each counterfactual against the provided evidence. "
        "State whether each counterfactual is supported, contradicted, or unaddressed by the evidence.\n\n"
        "Answer:"
    )


In [None]:
# Load LLaMA model and tokenizer
model_name = "ura-hcmut/ura-llama-7b-r64"
access_token = "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"

tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=access_token)
model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=access_token, device_map="auto")

In [13]:

def generate_response(prompt, max_length=512, temperature=0.7):
    """
    Generate response from the model based on the given prompt.
    """
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=max_length)
    outputs = model.generate(
        input_ids=inputs.input_ids.cuda(),
        max_length=max_length + 50,
        temperature=temperature,
        do_sample=True
    )
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# Example: Apply ToCP on a sample
statement = "The population of City X is growing at a rate of 5% annually."
context = "City X is a major metropolitan area with a history of rapid urbanization."
evidence_list = "Census reports, surveys, and research articles."
top_evidence = "Census report from 2022: Population growth rate is 4.5%."

# Step 1: Analyze the primary claim
primary_prompt = primary_claim_prompt(statement, context, evidence_list, top_evidence)
primary_response = generate_response(primary_prompt)
print("Primary Analysis:\n", primary_response)

# Step 2: Generate counterfactuals
counterfactuals_prompt = generate_counterfactuals_prompt(statement, context)
counterfactuals_response = generate_response(counterfactuals_prompt)
print("\nGenerated Counterfactuals:\n", counterfactuals_response)

# Step 3: Evaluate counterfactuals
evaluate_prompt = evaluate_counterfactuals_prompt(counterfactuals_response, evidence_list, top_evidence)
evaluation_response = generate_response(evaluate_prompt)
print("\nEvaluation of Counterfactuals:\n", evaluation_response)


NameError: name 'model' is not defined

In [None]:
# Apply ToCP to each row in the dataset
def apply_tocp(row):
    statement = row['Statement']
    context = row['Context']
    evidence_list = row['Evidence_List']
    top_evidence = row['evidence_top5']
    
    # Step 1: Analyze primary claim
    primary = primary_claim_prompt(statement, context, evidence_list, top_evidence)
    primary_analysis = generate_response(primary)
    
    # Step 2: Generate counterfactuals
    counterfactuals = generate_counterfactuals_prompt(statement, context)
    counterfactuals_generated = generate_response(counterfactuals)
    
    # Step 3: Evaluate counterfactuals
    evaluation = evaluate_counterfactuals_prompt(counterfactuals_generated, evidence_list, top_evidence)
    evaluation_result = generate_response(evaluation)
    
    return primary_analysis, counterfactuals_generated, evaluation_result

# Apply ToCP for the dataset
data[['primary_analysis', 'counterfactuals', 'evaluation']] = data.apply(
    lambda row: pd.Series(apply_tocp(row)), axis=1
)

# Save results
data.to_csv("fact_checking_tocp_results.csv", index=False)
