In [1]:
import sys
import os

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

In [2]:
from pathlib import Path
from src.utils.config_loader import load_config
from src.utils.seed import seed_everything

base_dir = Path(os.getcwd()).parent

config = load_config(base_dir / 'model_params.yaml')

seed_everything(config['general']['seed'])

In [3]:
import json
import random

def format_examples(example_subset): 
    # Formats the examples into a string for later prompt
    formatted = []
    for i, ex in enumerate(example_subset):
        entity_lines = "\n".join([f"{e['word']} {e['label']}" for e in ex["entities"]])
        formatted.append(f"Example {i+1}:\nText: \"{ex['sentence']}\"\nEntities:\n{entity_lines}\n##\n")
    
    return "\n".join(formatted)

with open(base_dir / 'llm_stuff/prompts/examples.json', 'r') as f:
    example_bank = json.load(f)

ids = [1, 19, 16, 3, 21]

#ids = random.sample(range(1, 26), 3)

examples = [next(ex for ex in example_bank if ex["id"] == id) for id in ids]

formatted_examples = format_examples(examples)

print(formatted_examples)

Example 1:
Text: "Adkomst til BFS1 og BFS2 skal være fra Solfjellveien ."
Entities:
BFS1 B-FELT
BFS2 B-FELT
##

Example 2:
Text: "Parkeringsplassar ( SPP ) Grøntstruktur , jf . PBL § 12-5 , 2 . ledd nr . 3 - Turveg ( GT )"
Entities:
SPP B-FELT
GT B-FELT
##

Example 3:
Text: "Før det vert gjeve mellombels bruksløyve / ferdigattest for ny bueining innanfor felt BKS1 og BFS14 og 15"
Entities:
BKS1 B-FELT
BFS14 B-FELT
og I-FELT
15 I-FELT
##

Example 4:
Text: "Bebyggelsestype Innenfor BKS1-BKS6 og BFS2 skal det oppføres flermannsboliger , kjedeboliger og / eller rekkehus ."
Entities:
BKS1-BKS6 B-FELT
BFS2 B-FELT
##

Example 5:
Text: "Areal brattere enn 1 : 3 , arealer i gul eller rød sone for henholdsvis støy ( T-1442 ) og luftkvalitet ( T-1520 ) ."
Entities:

##



In [None]:
import torch
import transformers
from collections import defaultdict
from src.utils.label_mapping_regplans import id_to_label, label_to_id

class LlamaForNER:
    def __init__(self, model_path: str):
        self.model_id = model_path
        self.pipeline = transformers.pipeline(
            'text-generation',
            model=self.model_id,
            model_kwargs={'torch_dtype': torch.float16} # FP16 for faster inference
        )
        tokenizer = self.pipeline.tokenizer
        tokenizer.pad_token_id = tokenizer.eos_token_id 

        self.terminators = tokenizer.eos_token_id
  
    def format_prompt(self, sentence):
        prompt = f"""
            You are an expert in Named Entity Recognition (NER). Your task is to identify named entities that represent field zone names in the given text.
    
            The possible named entities are exclusively B-FELT (beginning of a field zone name) and I-FELT (continuation of the same field zone name).

            {formatted_examples}

            Each token must be written only once.  
            Each token should be written on a separate line.  
            Each line must consist of the token followed by its label.  
            Separate the token and its label with a single space.  
            Do not include any explanations, comments, or Python code.  
                     
            Text: '{sentence}'
    
            Entities:
        """
        return prompt

    def predict_ner(self, sentence, tokens, max_tokens=50, temperature=0.0, top_p=0.9):

        prompt = self.format_prompt(sentence) 

        outputs = self.pipeline(
            prompt,
            max_new_tokens=max_tokens,
            eos_token_id=self.terminators,
            pad_token_id=self.terminators,
            do_sample=False, # True could be useful
            temperature=temperature, # lower = more deterministic
            top_p=top_p 
        )
        
        generated_text = outputs[0]['generated_text'][len(prompt):].strip()
        pred_labels = self.extract_entities(generated_text, tokens)
        
        return {
            'tokens': tokens,
            'pred_labels': pred_labels,
            'generated_text': generated_text
        }

    def extract_entities(self, output_text, tokens):

        entities = defaultdict(list) # Word-label pairs

        for line in output_text.splitlines():
            parts = line.strip().split()
            if len(parts) == 2:
                word, label = parts[0], parts[1]
                entities[word].append(label)

        pred_labels = []
        word_counts = defaultdict(int)  # Track occurrences of each word
    
        for token in tokens:
            if token in entities and word_counts[token] < len(entities[token]):
                pred_labels.append(entities[token][word_counts[token]])  # Get the label in order
                word_counts[token] += 1  # Increment occurrence counter
            else:
                pred_labels.append("O")  # Default to "O" if missing
        
        return pred_labels 

In [5]:
from src.data.preprocessing import create_df
from llm_stuff.evaluation import evaluate 
from tqdm import tqdm
import json

val_df = create_df(base_dir / 'data/my_data/regplans-dev.conllu')
val_df = val_df.iloc[:int(len(val_df) * 0.5)] # Use only half the data for testing

model_path = 'meta-llama/Meta-Llama-3-8B-Instruct' 
ner_model = LlamaForNER(model_path)

all_pred_ids = []
all_true_ids = []

all_results = []

for idx, row in tqdm(val_df.iterrows(), total=len(val_df)):
    sentence = row['full_text']
    tokens = row['words']
    true_labels = row['labels']  

    output = ner_model.predict_ner(sentence, tokens)
    pred_labels = output['pred_labels']

    pred_ids = []
    for label in pred_labels:
        if label in label_to_id:  
            pred_ids.append(label_to_id[label])
        else:
            print(f"Warning: Unexpected label '{label}' found. Assigning default label 'O'.")
            pred_ids.append(label_to_id.get("O", -1))

    true_ids = [label_to_id[label] for label in true_labels]

    all_pred_ids.extend(pred_ids)
    all_true_ids.extend(true_ids)

    all_results.append({
        'sentence': sentence,
        'tokens': tokens,
        'true_labels': true_labels,
        'predicted_labels': pred_labels,
        'generated_text': output['generated_text']
    })

    #print(f"Sentence: {sentence}")
    #print(f"Predicted Labels: {pred_labels}")
    #print(f"True Labels: {true_labels}")

# Calculate evaluation metrics across all tokens in the dev set.
metrics = evaluate(all_true_ids, all_pred_ids)

print("Evaluation Metrics on Dev Set:")
print(metrics)

final_output = {
    'prompt': ner_model.format_prompt(''),
    'evaluation_metrics': metrics,
    'results': all_results
}

with open(base_dir / f'llm_stuff/results/Meta-Llama-3-8B-Instruct_ZEROSHOT.json', 'w', encoding='utf-8') as f:
    json.dump(final_output, f, indent=4, ensure_ascii=False)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Device set to use cuda:0
  6%|▌         | 10/176 [00:15<04:10,  1.51s/it]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
  7%|▋         | 12/176 [00:18<04:06,  1.50s/it]



  9%|▊         | 15/176 [00:22<04:01,  1.50s/it]



 11%|█▏        | 20/176 [00:30<03:54,  1.50s/it]



 12%|█▏        | 21/176 [00:31<03:52,  1.50s/it]



 15%|█▌        | 27/176 [00:40<03:43,  1.50s/it]



 20%|█▉        | 35/176 [00:53<03:31,  1.50s/it]



 20%|██        | 36/176 [00:54<03:30,  1.50s/it]



 23%|██▎       | 41/176 [01:02<03:22,  1.50s/it]



 24%|██▍       | 43/176 [01:05<03:19,  1.50s/it]



 25%|██▌       | 44/176 [01:06<03:18,  1.50s/it]



 27%|██▋       | 48/176 [01:12<03:12,  1.51s/it]



 30%|██▉       | 52/176 [01:18<03:06,  1.50s/it]



 31%|███       | 54/176 [01:21<02:55,  1.44s/it]



 35%|███▍      | 61/176 [01:31<02:52,  1.50s/it]



 36%|███▋      | 64/176 [01:36<02:47,  1.50s/it]



 42%|████▏     | 74/176 [01:51<02:33,  1.50s/it]



 44%|████▍     | 78/176 [01:57<02:27,  1.51s/it]



 52%|█████▏    | 92/176 [02:17<01:56,  1.39s/it]



 59%|█████▊    | 103/176 [02:34<01:50,  1.51s/it]



 60%|█████▉    | 105/176 [02:37<01:47,  1.51s/it]



 63%|██████▎   | 111/176 [02:46<01:38,  1.51s/it]



 64%|██████▎   | 112/176 [02:47<01:36,  1.51s/it]



 65%|██████▍   | 114/176 [02:50<01:33,  1.51s/it]



 66%|██████▋   | 117/176 [02:55<01:29,  1.51s/it]



 73%|███████▎  | 128/176 [03:12<01:12,  1.51s/it]



 74%|███████▍  | 131/176 [03:16<01:08,  1.51s/it]



 77%|███████▋  | 135/176 [03:22<01:01,  1.51s/it]



 81%|████████  | 142/176 [03:33<00:51,  1.52s/it]



 86%|████████▋ | 152/176 [03:48<00:36,  1.50s/it]



 89%|████████▊ | 156/176 [03:54<00:30,  1.50s/it]



 93%|█████████▎| 163/176 [04:04<00:19,  1.50s/it]



 93%|█████████▎| 164/176 [04:06<00:18,  1.50s/it]



 95%|█████████▍| 167/176 [04:10<00:13,  1.51s/it]



 96%|█████████▌| 169/176 [04:13<00:10,  1.50s/it]



100%|██████████| 176/176 [04:24<00:00,  1.50s/it]

Evaluation Metrics on Dev Set:
{'precision': 0.47055530548095703, 'recall': 0.4161546230316162, 'f1': 0.4141826629638672, 'span_acc': 0.20121951401233673, 'classification_report': {'B-FELT': {'precision': 0.38596491228070173, 'recall': 0.2682926829268293, 'f1-score': 0.3165467625899281, 'support': 164.0}, 'I-FELT': {'precision': 0.1875, 'recall': 0.015789473684210527, 'f1-score': 0.02912621359223301, 'support': 190.0}, 'O': {'precision': 0.8382009345794392, 'recall': 0.9643817204301075, 'f1-score': 0.896875, 'support': 1488.0}, 'accuracy': 0.8045602605863192, 'macro avg': {'precision': 0.4705552822867136, 'recall': 0.4161546256803825, 'f1-score': 0.41418265872738697, 'support': 1842.0}, 'weighted avg': {'precision': 0.7308177178437789, 'recall': 0.8045602605863192, 'f1-score': 0.7556990497542196, 'support': 1842.0}}}



