In [36]:
import sys
import os

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

In [37]:
from pathlib import Path
from src.utils.config_loader import load_config

base_dir = Path(os.getcwd()).parent

config = load_config(base_dir / 'secrets.yaml')

In [38]:
from langchain_openai import AzureChatOpenAI
from langchain_core.messages import HumanMessage

os.environ['OPENAI_API_VERSION'] = config['OPENAI_API_VERSION']
os.environ['AZURE_OPENAI_ENDPOINT'] = config['OPENAI_API_BASE']
os.environ['AZURE_OPENAI_API_KEY'] = config['OPENAI_API_KEY']

llm = AzureChatOpenAI(
    deployment_name=config['OPENAI_DEPLOYMENT_NAME']
)

In [39]:
import json

with open(base_dir / 'data/val_set.json', 'r') as f:
    data = json.load(f)

In [40]:
def format_examples(example_subset): 
    # Formats the examples into a string for later prompt
    formatted = []
    for i, ex in enumerate(example_subset):
        formatted.append(
            f"Eksempel {i+1}:\n"
            f"Versjon 1:\n{ex['version_1']}\n\n"
            f"Versjon 2:\n{ex['version_2']}\n\n"
            f"Oppsummering:\n{ex['ref_summary']}\n##\n"
        )
    
    return "\n".join(formatted)

ids = [27]
#ids = [27, 39, 23]

examples = [next(ex for ex in data if ex["id"] == id) for id in ids]

formatted_examples = format_examples(examples)

print(formatted_examples)

Eksempel 1:
Versjon 1:
Det tillates maks en boenhet per tomt.

Versjon 2:
Det tillates inntil to boenheter per tomt.

Oppsummering:
Tillatt antall boenheter per tomt er økt fra en til to.
##



In [41]:
from tqdm import tqdm

results = []

data_sample = data #[:1]

for item in tqdm(data_sample):

    version_1 = item['version_1']
    version_2 = item['version_2']
    ref_summary = item['ref_summary']

    prompt = f"""\
You are an urban planning and regulatory documentation expert, specializing in Norwegian zoning plans. Your role is to assist case workers by identifying and clearly summarizing the differences between two versions of a zoning plan text. The summary should be as brief as possible, yet clear and informative. Avoid unnecessary details and write the summary in Norwegian. If version 1 contains text, but not version 2, it means the text has been removed. If it's the other way around, the text has been added. Do not refer to version 1 or version 2 in the summary. Only describe what has been removed, added, or changed.

Summarize the differences between the following two versions:

Version 1:
{version_1}

Version 2:
{version_2}

Summary:
"""
    
    msg = [HumanMessage(content=prompt)]

    try: 
        response = llm.invoke(msg)

        results.append({
            'id': item['id'],
            'ref_summary': ref_summary,
            'model_summary': response.content
        })

    except Exception as e:
        print(f"Error: {e}")
        continue

100%|██████████| 35/35 [02:16<00:00,  3.90s/it]


In [42]:
with open(base_dir / f"results/{config['OPENAI_DEPLOYMENT_NAME']}/{config['OPENAI_DEPLOYMENT_NAME']}_ENG_ZEROSHOT.jsonl", "w") as f:
    for result in results:
        json.dump(result, f, ensure_ascii=False)
        f.write('\n')