In [1]:
import sys
import os

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

In [2]:
from tqdm import tqdm
from pathlib import Path
from src.utils.config_loader import load_config

base_dir = Path(os.getcwd()).parent

config = load_config(base_dir / 'secrets.yaml')

In [3]:
from src.data.preprocessing import create_df

val_df = create_df(base_dir / 'data/my_data/regplans-dev.conllu')

In [4]:
from langchain_openai import AzureChatOpenAI
from langchain_core.messages import (SystemMessage, HumanMessage)

os.environ['OPENAI_API_VERSION'] = config['OPENAI_API_VERSION']
os.environ['AZURE_OPENAI_ENDPOINT'] = config['OPENAI_API_BASE']
os.environ['AZURE_OPENAI_API_KEY'] = config['OPENAI_API_KEY']

llm = AzureChatOpenAI(
    deployment_name=config['OPENAI_DEPLOYMENT_NAME'],
    temperature=0.0
)

In [5]:
import json
import random

def format_examples(example_subset): 
    # Formats the examples into a string for later prompt
    formatted = []
    for i, ex in enumerate(example_subset):
        entity_lines = "\n".join([f"{e['word']} {e['label']}" for e in ex["entities"]])
        formatted.append(f"Example {i+1}:\nSentence: \"{ex['sentence']}\"\nEntities:\n{entity_lines}\n")
    
    return "\n".join(formatted)

with open(base_dir / 'llm_stuff/prompts/examples.json', 'r') as f:
    example_bank = json.load(f)

ids = [19, 16, 3] 

#ids = random.sample(range(1, 26), 3)

examples = [next(ex for ex in example_bank if ex["id"] == id) for id in ids]

formatted_examples = format_examples(examples)

print(formatted_examples)

Example 1:
Sentence: "Parkeringsplassar ( SPP ) Grøntstruktur , jf . PBL § 12-5 , 2 . ledd nr . 3 - Turveg ( GT )"
Entities:
SPP B-FELT
GT B-FELT

Example 2:
Sentence: "Før det vert gjeve mellombels bruksløyve / ferdigattest for ny bueining innanfor felt BKS1 og BFS14 og 15"
Entities:
BKS1 B-FELT
BFS14 B-FELT
og I-FELT
15 I-FELT

Example 3:
Sentence: "Bebyggelsestype Innenfor BKS1-BKS6 og BFS2 skal det oppføres flermannsboliger , kjedeboliger og / eller rekkehus ."
Entities:
BKS1-BKS6 B-FELT
BFS2 B-FELT



In [None]:
from src.utils.label_mapping_regplans import label_to_id
from collections import defaultdict

all_pred_ids = []
all_true_ids = []
all_results = []

val_df = val_df.iloc[:int(len(val_df) * 0.5)] # Use only half the data for testing

for idx, row in tqdm(val_df.iterrows(), total=len(val_df)):

    sentence = row['full_text']
    tokens = row['words']
    true_labels = row['labels']  

    msg = [
    SystemMessage(
        f"""
        You are an expert in Named Entity Recognition (NER). Your task is to identify named entities that represent field zone names in the given text.
        """
    ),
    HumanMessage(
        f""" 
        The possible named entities are exclusively B-FELT (beginning of a field zone name) and I-FELT (continuation of the same field zone name).

        Examples: 
        {formatted_examples}

        Return one line per token, where each line contains the token followed by its corresponding label, separated by a space.
                 
        Identify the named entities in the following sentence: '{sentence}'
        """
    )]

    response = llm.invoke(msg)

    entities = defaultdict(list) # Word-label pairs

    for line in response.content.splitlines():
        parts = line.strip().split()
        if len(parts) == 2:
            word, label = parts[0], parts[1]
            entities[word].append(label)

    pred_labels = []
    word_counts = defaultdict(int)  # Track occurrences of each word

    for token in tokens:
        if token in entities and word_counts[token] < len(entities[token]):
            pred_labels.append(entities[token][word_counts[token]])  # Get the label in order
            word_counts[token] += 1  # Increment occurrence counter
        else:
            pred_labels.append("O")  # Default to "O" if missing

    # Convert labels to IDs
    pred_ids = []
    for label in pred_labels:
        if label in label_to_id:
            pred_ids.append(label_to_id[label])
        else:
            pred_ids.append(label_to_id.get("O", -1))

    true_ids = [label_to_id[label] for label in true_labels]

    all_pred_ids.extend(pred_ids)
    all_true_ids.extend(true_ids)

    all_results.append({
        'sentence': sentence,
        'tokens': tokens,
        'true_labels': true_labels,
        'predicted_labels': pred_labels,
        'generated_text': response.content
    })

  0%|          | 0/176 [00:00<?, ?it/s]

100%|██████████| 176/176 [15:18<00:00,  5.22s/it]


In [7]:
from llm_stuff.evaluation import evaluate 

metrics = evaluate(all_true_ids, all_pred_ids)

print("Evaluation Metrics on Val Set:")
print(metrics)

final_output = {
    'prompt': str(msg),
    'evaluation_metrics': metrics,
    'results': all_results
}

with open(base_dir / f"llm_stuff/results/{config['OPENAI_DEPLOYMENT_NAME']}_V1_WO_LABEL_INFO.json", 'w', encoding='utf-8') as f:
    json.dump(final_output, f, indent=4, ensure_ascii=False)

Evaluation Metrics on Val Set:
{'precision': 0.6771328449249268, 'recall': 0.6116820573806763, 'f1': 0.6396599411964417, 'span_acc': 0.7007299065589905, 'classification_report': {'B-FELT': {'precision': 0.8771929824561403, 'recall': 0.7299270072992701, 'f1-score': 0.796812749003984, 'support': 137.0}, 'I-FELT': {'precision': 0.1875, 'recall': 0.12, 'f1-score': 0.14634146341463414, 'support': 25.0}, 'O': {'precision': 0.9667056074766355, 'recall': 0.9851190476190477, 'f1-score': 0.9758254716981132, 'support': 1680.0}, 'accuracy': 0.9543973941368078, 'macro avg': {'precision': 0.6771328633109253, 'recall': 0.6116820183061059, 'f1-score': 0.6396598947055772, 'support': 1842.0}, 'weighted avg': {'precision': 0.9494725076857973, 'recall': 0.9543973941368078, 'f1-score': 0.951253352688242, 'support': 1842.0}}}
