In [1]:
import sys
import os

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

In [2]:
from tqdm import tqdm
from pathlib import Path
from src.utils.config_loader import load_config
from src.utils.seed import seed_everything

base_dir = Path(os.getcwd()).parent

config = load_config(base_dir / 'secrets.yaml')

seed_everything(42)

In [3]:
from src.data.preprocessing import create_df

val_df = create_df(base_dir / 'data/my_data/regplans-dev.conllu')

In [4]:
from langchain_openai import AzureChatOpenAI
from langchain_core.messages import (SystemMessage, HumanMessage)

os.environ['OPENAI_API_VERSION'] = config['OPENAI_API_VERSION']
os.environ['AZURE_OPENAI_ENDPOINT'] = config['OPENAI_API_BASE']
os.environ['AZURE_OPENAI_API_KEY'] = config['OPENAI_API_KEY']

llm = AzureChatOpenAI(
    deployment_name='o1-mini',
    #temperature=0.0
)

In [5]:
import json

def format_examples(example_subset): 
    # Formats the examples into a string for later prompt
    formatted = []
    for i, ex in enumerate(example_subset):
        entity_lines = "\n".join([f"{e['word']} {e['label']}" for e in ex["entities"]])
        formatted.append(f"Example {i+1}:\Text: \"{ex['sentence']}\"\nEntities:\n{entity_lines}\n##\n")
    
    return "\n".join(formatted)

with open(base_dir / 'llm_stuff/prompts/examples.json', 'r') as f:
    example_bank = json.load(f)

ids = [1, 19, 16, 3, 21]

examples = [next(ex for ex in example_bank if ex["id"] == id) for id in ids]

formatted_examples = format_examples(examples)

print(formatted_examples)

Example 1:\Text: "Adkomst til BFS1 og BFS2 skal være fra Solfjellveien ."
Entities:
BFS1 B-FELT
BFS2 B-FELT
##

Example 2:\Text: "Parkeringsplassar ( SPP ) Grøntstruktur , jf . PBL § 12-5 , 2 . ledd nr . 3 - Turveg ( GT )"
Entities:
SPP B-FELT
GT B-FELT
##

Example 3:\Text: "Før det vert gjeve mellombels bruksløyve / ferdigattest for ny bueining innanfor felt BKS1 og BFS14 og 15"
Entities:
BKS1 B-FELT
BFS14 B-FELT
og I-FELT
15 I-FELT
##

Example 4:\Text: "Bebyggelsestype Innenfor BKS1-BKS6 og BFS2 skal det oppføres flermannsboliger , kjedeboliger og / eller rekkehus ."
Entities:
BKS1-BKS6 B-FELT
BFS2 B-FELT
##

Example 5:\Text: "Areal brattere enn 1 : 3 , arealer i gul eller rød sone for henholdsvis støy ( T-1442 ) og luftkvalitet ( T-1520 ) ."
Entities:

##



  formatted.append(f"Example {i+1}:\Text: \"{ex['sentence']}\"\nEntities:\n{entity_lines}\n##\n")


In [6]:
from src.utils.label_mapping_regplans import label_to_id
from collections import defaultdict

all_pred_ids = []
all_true_ids = []
all_results = []

#val_df = val_df.iloc[:int(len(val_df) * 0.5)]

for idx, row in tqdm(val_df.iterrows(), total=len(val_df)):
    sentence = row['full_text']
    tokens = row['words']
    true_labels = row['labels']

    prompt = f"""
    You are an expert in Named Entity Recognition (NER). Your task is to identify named entities that represent field zone names in the given text.
    
    The possible named entities are exclusively B-FELT (beginning of a field zone name) and I-FELT (continuation of the same field zone name).

    {formatted_examples}

    Return one line per token, including only tokens that are part of field zone names, each followed by its corresponding label, separated by a space.
                 
    Text: '{sentence}'

    Entities:
    """
    
    msg = [HumanMessage(content=prompt)]

    try:
        response = llm.invoke(msg)
        entities = defaultdict(list)

        for line in response.content.splitlines():
            parts = line.strip().split()
            if len(parts) == 2:
                word, label = parts[0], parts[1]
                entities[word].append(label)

        pred_labels = []
        word_counts = defaultdict(int)

        for token in tokens:
            if token in entities and word_counts[token] < len(entities[token]):
                pred_labels.append(entities[token][word_counts[token]])
                word_counts[token] += 1
            else:
                pred_labels.append("O")

        pred_ids = [label_to_id.get(label, label_to_id.get("O", -1)) for label in pred_labels]
        true_ids = [label_to_id[label] for label in true_labels]

        all_pred_ids.extend(pred_ids)
        all_true_ids.extend(true_ids)

        all_results.append({
            'sentence': sentence,
            'tokens': tokens,
            'true_labels': true_labels,
            'predicted_labels': pred_labels,
            'generated_text': response.content
        })

    except Exception as e:
        print(f"Skipping row {idx} due to error: {e}")
        continue


 16%|█▋        | 58/353 [06:22<42:47,  8.70s/it]  

Skipping row 57 due to error: Azure has not provided the response due to a content filter being triggered


 17%|█▋        | 60/353 [06:33<34:42,  7.11s/it]

Skipping row 59 due to error: Error code: 400 - {'error': {'message': 'Invalid prompt: your prompt was flagged as potentially violating our usage policy. Please try again with a different prompt: https://platform.openai.com/docs/guides/reasoning#advice-on-prompting', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_prompt'}}


 20%|██        | 72/353 [07:46<31:00,  6.62s/it]

Skipping row 71 due to error: Error code: 400 - {'error': {'message': 'Invalid prompt: your prompt was flagged as potentially violating our usage policy. Please try again with a different prompt: https://platform.openai.com/docs/guides/reasoning#advice-on-prompting', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_prompt'}}


 53%|█████▎    | 187/353 [17:37<20:08,  7.28s/it]

Skipping row 186 due to error: Error code: 400 - {'error': {'message': 'Invalid prompt: your prompt was flagged as potentially violating our usage policy. Please try again with a different prompt: https://platform.openai.com/docs/guides/reasoning#advice-on-prompting', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_prompt'}}


 86%|████████▋ | 305/353 [27:54<05:52,  7.34s/it]

Skipping row 304 due to error: Error code: 400 - {'error': {'message': 'Invalid prompt: your prompt was flagged as potentially violating our usage policy. Please try again with a different prompt: https://platform.openai.com/docs/guides/reasoning#advice-on-prompting', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_prompt'}}


 88%|████████▊ | 310/353 [28:29<04:32,  6.34s/it]

Skipping row 309 due to error: Error code: 400 - {'error': {'message': 'Invalid prompt: your prompt was flagged as potentially violating our usage policy. Please try again with a different prompt: https://platform.openai.com/docs/guides/reasoning#advice-on-prompting', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_prompt'}}


100%|██████████| 353/353 [31:56<00:00,  5.43s/it]


In [7]:
from llm_stuff.evaluation import evaluate 

metrics = evaluate(all_true_ids, all_pred_ids)

print("Evaluation Metrics on Dev Set:")
print(metrics)

final_output = {
    'prompt': str(msg),
    'evaluation_metrics': metrics,
    'results': all_results
}

with open(base_dir / f"llm_stuff/results/o1-mini_FEWSHOT_5_FULLVALSET.json", 'w', encoding='utf-8') as f:
    json.dump(final_output, f, indent=4, ensure_ascii=False)

Evaluation Metrics on Dev Set:
{'precision': 0.803317666053772, 'recall': 0.6209480166435242, 'f1': 0.6653620004653931, 'span_acc': 0.5873016119003296, 'classification_report': {'B-FELT': {'precision': 0.8090909090909091, 'recall': 0.7063492063492064, 'f1-score': 0.7542372881355932, 'support': 126.0}, 'I-FELT': {'precision': 0.625, 'recall': 0.16129032258064516, 'f1-score': 0.2564102564102564, 'support': 62.0}, 'O': {'precision': 0.9758620689655172, 'recall': 0.9952046035805626, 'f1-score': 0.9854384298828743, 'support': 3128.0}, 'accuracy': 0.9686369119420989, 'macro avg': {'precision': 0.803317659352142, 'recall': 0.620948044170138, 'f1-score': 0.6653619914762413, 'support': 3316.0}, 'weighted avg': {'precision': 0.9629650199847986, 'recall': 0.9686369119420989, 'f1-score': 0.9630225400712158, 'support': 3316.0}}}
