In [45]:
import sys
import os

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

In [46]:
from tqdm import tqdm
from pathlib import Path
from src.utils.config_loader import load_config

base_dir = Path(os.getcwd()).parent

config = load_config(base_dir / 'secrets.yaml')

In [47]:
from src.data.preprocessing import create_df

val_df = create_df(base_dir / 'data/my_data/regplans-dev.conllu')

In [48]:
from langchain_openai import AzureChatOpenAI
from langchain_core.messages import (SystemMessage, HumanMessage)

os.environ['OPENAI_API_VERSION'] = config['OPENAI_API_VERSION']
os.environ['AZURE_OPENAI_ENDPOINT'] = config['OPENAI_API_BASE']
os.environ['AZURE_OPENAI_API_KEY'] = config['OPENAI_API_KEY']

llm = AzureChatOpenAI(
    deployment_name=config['OPENAI_DEPLOYMENT_NAME'],
    #temperature=0.0
)

In [49]:
import json

def format_examples(example_subset): 
    # Formats the examples into a string for later prompt
    formatted = []
    for i, ex in enumerate(example_subset):
        entity_lines = "\n".join([f"{e['word']} {e['label']}" for e in ex["entities"]])
        formatted.append(f"Example {i+1}:\nSentence: \"{ex['sentence']}\"\nEntities:\n{entity_lines}\n")
    
    return "\n".join(formatted)

with open(base_dir / 'llm_stuff/prompts/examples.json', 'r') as f:
    example_bank = json.load(f)

ids = [1, 2, 4, 21]

examples = [next(ex for ex in example_bank if ex["id"] == id) for id in ids]

formatted_examples = format_examples(examples)

print(formatted_examples)

Example 1:
Sentence: "Adkomst til BFS1 og BFS2 skal være fra Solfjellveien ."
Entities:
BFS1 B-FELT
BFS2 B-FELT

Example 2:
Sentence: "På friområdene GF1 - GF3 tillates vanlig skjøtsel av trær og vegetasjon ."
Entities:
GF1 B-FELT
- I-FELT
GF3 I-FELT

Example 3:
Sentence: "Sonene med nemningane # 1 , # 2 og # 3 gjeld automatisk freda kulturminne , dyrkingsspor med id ."
Entities:
# B-FELT
1 I-FELT
# B-FELT
2 I-FELT
# B-FELT
3 I-FELT

Example 4:
Sentence: "Areal brattere enn 1 : 3 , arealer i gul eller rød sone for henholdsvis støy ( T-1442 ) og luftkvalitet ( T-1520 ) ."
Entities:




In [None]:
from src.utils.label_mapping_regplans import label_to_id
from collections import defaultdict

all_pred_ids = []
all_true_ids = []
all_results = []

val_df = val_df.iloc[:int(len(val_df) * 0.25)]

for idx, row in tqdm(val_df.iterrows(), total=len(val_df)):
    sentence = row['full_text']
    tokens = row['words']
    true_labels = row['labels']

    prompt = f"""You are an expert in Natural Language Processing. Your task is to identify Named Entities (NER) in a given text.
            The possible Named Entities are exclusively 'B-FELT' and 'I-FELT'. The entities are defined as follows:

            - B-FELT: The beginning of a field zone name.
            - I-FELT: The continuation of a field zone name.   
                        
            ### Examples:

            {formatted_examples}

            Your task is to identify the Named Entities in the following sentence: '{sentence}'
            """
    
    msg = [HumanMessage(content=prompt)]

    try:
        response = llm.invoke(msg)
        entities = defaultdict(list)

        for line in response.content.splitlines():
            parts = line.strip().split()
            if len(parts) == 2:
                word, label = parts[0], parts[1]
                entities[word].append(label)

        pred_labels = []
        word_counts = defaultdict(int)

        for token in tokens:
            if token in entities and word_counts[token] < len(entities[token]):
                pred_labels.append(entities[token][word_counts[token]])
                word_counts[token] += 1
            else:
                pred_labels.append("O")

        pred_ids = [label_to_id.get(label, label_to_id.get("O", -1)) for label in pred_labels]
        true_ids = [label_to_id[label] for label in true_labels]

        all_pred_ids.extend(pred_ids)
        all_true_ids.extend(true_ids)

        all_results.append({
            'sentence': sentence,
            'tokens': tokens,
            'true_labels': true_labels,
            'predicted_labels': pred_labels,
            'generated_text': response.content
        })

    except Exception as e:
        print(f"Skipping row {idx} due to error: {e}")
        continue


 88%|████████▊ | 77/88 [11:25<01:09,  6.36s/it]

Skipping row 76 due to error: Error code: 400 - {'error': {'message': 'Invalid prompt: your prompt was flagged as potentially violating our usage policy. Please try again with a different prompt: https://platform.openai.com/docs/guides/reasoning#advice-on-prompting', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_prompt'}}


100%|██████████| 88/88 [12:51<00:00,  8.77s/it]


In [51]:
from llm_stuff.evaluation import evaluate 

metrics = evaluate(all_true_ids, all_pred_ids)

print("Evaluation Metrics on Dev Set:")
print(metrics)

final_output = {
    'prompt': str(msg),
    'evaluation_metrics': metrics,
    'results': all_results
}

with open(base_dir / f"llm_stuff/results/prompt_structure_testing/{config['OPENAI_DEPLOYMENT_NAME']}_PROMPT_V4_WO_EXAMPLES.json", 'w', encoding='utf-8') as f:
    json.dump(final_output, f, indent=4, ensure_ascii=False)

Evaluation Metrics on Dev Set:
{'precision': 0.6049709916114807, 'recall': 0.48562800884246826, 'f1': 0.4880664050579071, 'span_acc': 0.39047619700431824, 'classification_report': {'B-FELT': {'precision': 0.40350877192982454, 'recall': 0.4380952380952381, 'f1-score': 0.4200913242009132, 'support': 105.0}, 'I-FELT': {'precision': 0.5625, 'recall': 0.09183673469387756, 'f1-score': 0.15789473684210525, 'support': 98.0}, 'O': {'precision': 0.8489042675893888, 'recall': 0.9269521410579346, 'f1-score': 0.8862131246237207, 'support': 794.0}, 'accuracy': 0.7933801404212638, 'macro avg': {'precision': 0.6049710131730711, 'recall': 0.4856280379490167, 'f1-score': 0.4880663952222464, 'support': 997.0}, 'weighted avg': {'precision': 0.7738449443516612, 'recall': 0.7933801404212638, 'f1-score': 0.7655330934833064, 'support': 997.0}}}
