In [1]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse
from collections import deque
import base64
import os
import logging

import openai
import json
import time
from pathlib import Path
from html import escape

In [2]:
def load_bio_dataset(filepath):
    """
    Load sentences and labels from a BIO-formatted file.
    Returns a list of sentences, where each sentence is a list of tokens,
    and a parallel list of labels.
    """
    sentences, labels = [], []
    _, tokens, tags = [], [], []

    with open(filepath, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:  # Sentence boundary
                if tokens:
                    sentences.append(tokens)
                    labels.append(tags)
                    _, tokens, tags = [], [], []
                continue
            parts = line.split()
            if len(parts) < 3:
                continue
            token, tag = parts[1], parts[-1]
            tokens.append(token)
            tags.append(tag)

    # Add last sentence if file doesn't end with newline
    if tokens:
        sentences.append(tokens)
        labels.append(tags)

    return sentences, labels



filepath = "test.conllu"  # path to your dataset
sentences, labels = load_bio_dataset(filepath)




In [3]:
import json
from sklearn.metrics import classification_report
import re

def clean_llm_output(output: str) -> str:
    """
    Extracts the first valid JSON array from an LLM response.
    """
    # Remove code fences if present
    output = re.sub(r"```(json)?", "", output).strip()
    
    # Find JSON array with regex
    match = re.search(r"\[.*\]", output, re.DOTALL)
    if match:
        return match.group(0)
    else:
        raise ValueError("No valid JSON array found in output.")



def parse_llm_output(llm_json_str):
    """
    Parse LLM output JSON into a list of labels.
    """
    try:
        parsed = json.loads(llm_json_str)
        return [item["label"] for item in parsed]
    except Exception as e:
        print("Error parsing JSON:", e)
        return []

def evaluate_predictions(gold_labels, pred_labels):
    """
    Evaluate BIO predictions with precision, recall, F1.
    """
    # Flatten sequences for token-level evaluation
    gold_flat = [tag for seq in gold_labels for tag in seq]
    pred_flat = [tag for seq in pred_labels for tag in seq]

    print(classification_report(gold_flat, pred_flat, digits=4))





In [4]:

# --- Mock LLM outputs ---
# In practice, replace this with actual calls to your LLM
llm_outputs = []
for sent in sentences[:5]:  # only first 5 sentences for testing
    
    fake_json = json.dumps([{"token": tok, "label": "O"} for tok in sent])
    llm_outputs.append(fake_json)

# Parse predictions
pred_labels = [parse_llm_output(out) for out in llm_outputs]

# Evaluate
evaluate_predictions(gold_labels[:5], pred_labels)

In [None]:
filepath = "test.conllu"
sentences, gold_labels = load_bio_dataset(filepath)

llm_outputs = []


for sent, gold in zip(sentences[:5],gold_labels[:5]):  # only first 5 sentences for testing
    
    
    
    example=[
      {"token": "I", "label": "O"},
      {"token": "need", "label": "O"},
      {"token": "that", "label": "O"},
      {"token": "movie", "label": "O"},
      {"token": "which", "label": "O"},
      {"token": "involves", "label": "O"},
      {"token": "aliens", "label": "O"},
      {"token": "invading", "label": "O"},
      {"token": "earth", "label": "O"},
      {"token": "in", "label": "O"},
      {"token": "a", "label": "O"},
      {"token": "particular", "label": "O"},
      {"token": "united", "label": "B-LOC"},
      {"token": "states", "label": "I-LOC"},
      {"token": "place", "label": "O"},
      {"token": "in", "label": "O"},
      {"token": "california", "label": "B-LOC"}
    ]
    
    
    list_tokens=[tok for tok in sent]
    
    input_text= f"{list_tokens}"


    prompt = f"""
    ### Instructions:
    1. Each sentence is alrealdy tokenized.
    2. Assign each token a label in **BIO format**:
       - **B-<ENTITY>** → the first token of a named entity
       - **I-<ENTITY>** → a subsequent token inside the same entity
       - **O** → a token not part of any entity
    3. Use the following entity types: **PER (for person), ORG (for organization), LOC (for location)**.
    4. Output the result in **JSON format** as an array of objects, where each object has:
       - `"token"` → the token text
       - `"label"` → the BIO label

    ### Example Input:

    ```json
    {example}
    ```

    Perform BIO tagging for the following text:  
    **[{input_text}]**

    Important: Return **only valid JSON** in the format shown above.  
    Do not include explanations, comments, or any text outside of the JSON array.



    """

    payload = {
        "messages": [
            {"role": "system", "content": "You are an expert in LLMs, prompt engineering, and AI. Your task is to perform **Named Entity Recognition (NER)** using the **BIO tagging scheme**."},
            {"role": "user", "content": prompt}
        ],
        "temperature": 0.0
    }

    response = requests.post(MODEL_URL, headers=HEADERS, json=payload)
    response.raise_for_status()


    llm_outputs.append(response.json()["choices"][0]["message"]["content"])

# Parse predictions

pred_labels = [parse_llm_output(clean_llm_output(out)) for out in llm_outputs]



In [None]:
a=evaluate_predictions(gold_labels[:5], pred_labels)