In [None]:
import os
import json
import torch
import pandas as pd
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForTokenClassification, AutoConfig
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import precision_recall_fscore_support


# Configuration
# comment the models that will not be used 

MODELS = [
    {
        "name": "BERT",
        "hf_model": "bert-base-cased",
        "pt_path": "/kaggle/input/bert-hedgepeer-newww/fine_tuned_hedgepeer_30_03.2/bert/bert_model.pt"
    },
    # {
    #     "name": "SciBERT",
    #     "hf_model": "allenai/scibert_scivocab_cased",
    #     "pt_path": "/kaggle/input/scibert-hedgepeer/fine_tuned_hedgepeer_30_03.2/scibert/scibert_model.pt"
    # },
    # {
    #     "name": "XLNet",
    #     "hf_model": "xlnet-base-cased",
    #     "pt_path": "/kaggle/input/xlnet-hedgepeer/fine_tuned_hedgepeer_30_03.2/xlnet/xlnet_model.pt"
    # }
]

DATA_PATH = "/kaggle/input/fini-bioscope/merged_bioscope.jsonl" #change the path according to the dataset which will be used 
OUTPUT_DIR = "/kaggle/working/inference_results_all_models" #change the path 
MAX_LEN = 128
BATCH_SIZE = 8
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
os.makedirs(OUTPUT_DIR, exist_ok=True)


class HedgePeerDataset(Dataset):
    def __init__(self, tokens, labels, tokenizer, max_len=128):
        self.tokens = tokens
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.tokens)

    def __getitem__(self, idx):
        tokens = self.tokens[idx]
        labels = self.labels[idx]

        encoding = self.tokenizer(
            tokens,
            is_split_into_words=True,
            truncation=True,
            padding="max_length",
            max_length=self.max_len,
            return_offsets_mapping=True,
            return_tensors="pt"
        )

        label_ids = []
        word_ids = encoding.word_ids()
        previous_word_idx = None

        for word_idx in word_ids:
            if word_idx is None:
                label_ids.append(-100)
            elif word_idx != previous_word_idx:
                label_ids.append(1 if labels[word_idx] == "HEDGE" else 0)
            else:
                label_ids.append(-100)
            previous_word_idx = word_idx

        return {
            "input_ids": encoding["input_ids"].squeeze(),
            "attention_mask": encoding["attention_mask"].squeeze(),
            "labels": torch.tensor(label_ids)
        }


def preprocess_data(data_path, tokenizer):
    df = pd.read_json(data_path, lines=True)
    tokens, labels, metadata = [], [], []

    for _, row in df.iterrows():
        for sent in row["Sentences"]:
            text = sent["Sentence"]
            hedges = [h["Hedge"] for h in sent.get("Hedges", [])]
            tokenized = tokenizer.tokenize(text)
            token_labels = ["O"] * len(tokenized)

            for hedge in hedges:
                hedge_tokens = tokenizer.tokenize(hedge)
                for i in range(len(tokenized) - len(hedge_tokens) + 1):
                    if tokenized[i:i+len(hedge_tokens)] == hedge_tokens:
                        token_labels[i:i+len(hedge_tokens)] = ["HEDGE"] * len(hedge_tokens)

            tokens.append(tokenized)
            labels.append(token_labels)
            metadata.append({
                "Review_id": row["Review_id"],
                "Sentence": text,
                "Gold_Hedges": "; ".join(hedges)
            })

    return tokens, labels, metadata


def run_inference(model_cfg):
    name = model_cfg["name"]
    hf_model = model_cfg["hf_model"]
    pt_path = model_cfg["pt_path"]

    print(f"\n Loading {name}...")

    tokenizer = AutoTokenizer.from_pretrained(hf_model)
    if hasattr(tokenizer, "add_prefix_space") and "bert" not in hf_model.lower():
        tokenizer.add_prefix_space = True

    config = AutoConfig.from_pretrained(hf_model, num_labels=2)
    model = AutoModelForTokenClassification.from_pretrained(hf_model, config=config)
    model.load_state_dict(torch.load(pt_path, map_location=DEVICE))
    model.to(DEVICE)
    model.eval()

    print(f" Model loaded: {name}")

    tokens, labels, metadata = preprocess_data(DATA_PATH, tokenizer)
    dataset = HedgePeerDataset(tokens, labels, tokenizer)
    loader = DataLoader(dataset, batch_size=BATCH_SIZE)

    pred_binary, gold_binary = [], []

    with torch.no_grad():
        for batch in tqdm(loader, desc=f"Inference {name}"):
            input_ids = batch["input_ids"].to(DEVICE)
            attention_mask = batch["attention_mask"].to(DEVICE)
            gold_labels = batch["labels"]

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            preds = torch.argmax(outputs.logits, dim=-1).cpu().numpy()

            for p_seq, g_seq in zip(preds, gold_labels):
                pred_tokens = [1 if p == 1 else 0 for p, g in zip(p_seq, g_seq) if g != -100]
                gold_tokens = [g.item() for g in g_seq if g != -100]

                pred_binary.append(1 if 1 in pred_tokens else 0)
                gold_binary.append(1 if 1 in gold_tokens else 0)

    precision, recall, f1, _ = precision_recall_fscore_support(gold_binary, pred_binary, average="binary")

   
    df = pd.DataFrame(metadata)
    df["Predicted_Hedge"] = ["HEDGE" if p else "NO HEDGE" for p in pred_binary]
    df.to_csv(os.path.join(OUTPUT_DIR, f"{name.lower()}_predictions.csv"), index=False)

    metrics = pd.DataFrame([{
        "Model": name,
        "Precision": precision,
        "Recall": recall,
        "F1": f1
    }])
    metrics.to_csv(os.path.join(OUTPUT_DIR, f"{name.lower()}_metrics.csv"), index=False)

    print(f" {name} — Precision: {precision:.4f}, Recall: {recall:.4f}, F1: {f1:.4f}")
    print(f" Saved to: {OUTPUT_DIR}")


# Run all models one-by-one

for model_cfg in MODELS:
    run_inference(model_cfg)
    torch.cuda.empty_cache()
