In [None]:
import sys
import os

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

In [None]:
from pathlib import Path
from src.utils.config_loader import load_config
from src.utils.seed import seed_everything

base_dir = Path(os.getcwd()).parent

config = load_config(base_dir / 'secrets.yaml')

seed_everything(42)

In [None]:
from src.data.preprocessing import create_df

global val_df
val_df = create_df(base_dir / 'data/my_data/regplans-dev.conllu')

In [None]:
from langchain_openai import AzureChatOpenAI
from langchain_core.messages import (SystemMessage, HumanMessage)

os.environ['OPENAI_API_VERSION'] = config['OPENAI_API_VERSION']
os.environ['AZURE_OPENAI_ENDPOINT'] = config['OPENAI_API_BASE']
os.environ['AZURE_OPENAI_API_KEY'] = config['OPENAI_API_KEY']

llm = AzureChatOpenAI(
    deployment_name=config['OPENAI_DEPLOYMENT_NAME'],
    temperature=0.0
)

In [None]:
import random
import json

with open(base_dir / 'llm_stuff/prompts/examples.json', 'r') as f:
    example_bank = json.load(f)

def format_examples(example_subset): 
    # Formats the examples into a string for later prompt
    formatted = []
    for i, ex in enumerate(example_subset):
        entity_lines = "\n".join([f"{e['word']} {e['label']}" for e in ex["entities"]])
        formatted.append(f"Example {i+1}:\nSentence: \"{ex['sentence']}\"\nEntities:\n{entity_lines}\n")
    
    return "\n".join(formatted)

In [None]:
from src.utils.label_mapping_regplans import label_to_id
from llm_stuff.evaluation import evaluate 
from collections import defaultdict

def evaluate_example_subset(examples, sentence, tokens, true_labels):

    formatted_examples = format_examples(examples)

    msg = [
    SystemMessage(
        # TODO: Use the best PROMPT
        f"""You are an expert in Natural Language Processing. Your task is to identify Named Entities (NER) in a given text.
            The possible Named Entities are exclusively 'B-FELT' and 'I-FELT'. The entities are defined as follows:

            - B-FELT: The beginning of a field zone name.
            - I-FELT: The continuation of a field zone name.   
                        
            ### Examples:

            {formatted_examples}
        """
    ),
    HumanMessage(f"Your task is to identify the Named Entities in the following sentence: '{sentence}'") ]

    response = llm.invoke(msg)

    entities = defaultdict(list) # Word-label pairs

    for line in response.content.splitlines():
        parts = line.strip().split()
        if len(parts) == 2:
            word, label = parts[0], parts[1]
            entities[word].append(label)

    pred_labels = []
    word_counts = defaultdict(int)  # Track occurrences of each word

    for token in tokens:
        if token in entities and word_counts[token] < len(entities[token]):
            pred_labels.append(entities[token][word_counts[token]])  # Get the label in order
            word_counts[token] += 1  # Increment occurrence counter
        else:
            pred_labels.append("O")  # Default to "O" if missing

    # Convert labels to IDs
    pred_ids = []
    for label in pred_labels:
        if label in label_to_id:
            pred_ids.append(label_to_id[label])
        else:
            pred_ids.append(label_to_id.get("O", -1))

    true_ids = [label_to_id[label] for label in true_labels]

    metrics = evaluate(true_ids, pred_ids)

    return metrics['f1'] # Return f1 score

In [None]:
from deap import base, creator, tools, algorithms
import numpy as np

NUM_EXAMPLES = len(example_bank)
SUBSET_SIZE = 5

creator.create("FitnessMax", base.Fitness, weights=(1.0,)) # try to maximize the f1
creator.create("Individual", list, fitness=creator.FitnessMax)

toolbox = base.Toolbox()
toolbox.register("attr_sample", lambda: random.sample(range(NUM_EXAMPLES), SUBSET_SIZE))
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.attr_sample)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

def mutate(individual):
    idx_to_replace = random.randint(0, SUBSET_SIZE - 1)
    available_examples = list(set(range(NUM_EXAMPLES)) - set(individual)) # Get examples not in the subset
    if available_examples:
        new_example = random.choice(available_examples)
        individual[idx_to_replace] = new_example

    print("After mutation:", individual)
    return (individual,)

def evaluate_fitness(individual):
    examples = [example_bank[i] for i in individual]
    scores = []

    global val_df
    val_df_sample = val_df.sample(frac=0.15) # Sample 15% of the validation data
    
    for _, row in val_df_sample.iterrows():
        sentence = row['full_text']
        tokens = row['words']
        true_labels = row['labels']
        score = evaluate_example_subset(examples, sentence, tokens, true_labels)
        scores.append(score)
    
    avg_score = sum(scores) / len(scores)
    
    return (avg_score,) # Return the average f1 score

toolbox.register("evaluate", evaluate_fitness)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", mutate)
toolbox.register("select", tools.selTournament, tournsize=3)

stats = tools.Statistics(key=lambda ind: ind.fitness.values)
stats.register("avg", np.mean)
stats.register("std", np.std)
stats.register("min", np.min)
stats.register("max", np.max)

# Run GA

pop = toolbox.population(n=10)

pop, logbook = algorithms.eaSimple(pop, toolbox, cxpb=0.5, mutpb=0.2, ngen=10, stats=stats, verbose=True)

best_individual = tools.selBest(pop, 1)[0]
best_examples = [example_bank[i] for i in best_individual]

print("Best example subset:", best_examples)

In [None]:
import matplotlib.pyplot as plt

gen = logbook.select("gen")
avg = logbook.select("avg")
std = logbook.select("std")
min_ = logbook.select("min")
max_ = logbook.select("max")

plt.plot(gen, avg, label='avg')
plt.fill_between(gen, np.array(avg) - np.array(std), np.array(avg) + np.array(std), alpha=0.2)
plt.plot(gen, min_, label='min')
plt.plot(gen, max_, label='max')
plt.legend()
plt.show()