## Imports

In [57]:
import os
import dotenv
from pathlib import Path

import openai
import litellm
from litellm import completion
import subprocess
import json
import pandas as pd
from logging import getLogger
logger = getLogger(__name__)
import logging
logging.basicConfig(
    format='%(asctime)s %(levelname)s %(message)s',
    level=logging.INFO,
    datefmt='%Y-%m-%d %H:%M:%S'
)

dotenv.load_dotenv('./.env')

client = openai.OpenAI(
    api_key=os.environ["LITELLM_ILSP_EVAL_API_KEY"],
    base_url=os.environ["LITELLM_HOST"]
)

models_to_test=[ "krikri-dpo-latest",
           #"krikri-dpo",
           "claude-3.7-sonnet",
           "gpt-4o"
          ]

DEFAULT_MODEL = models_to_test[0]
DEFAULT_SYSTEM_PROMPT = "You are a helpful assistant. Answer the question as best you can."
DEFAULT_USER_PROMPT = "Γράψε ένα ποίημα στα Ελληνικά."


2025-04-09 09:33:38 INFO Note: NumExpr detected 12 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
2025-04-09 09:33:38 INFO NumExpr defaulting to 8 threads.


claude-3.7-sonnet


In [51]:

def generate_text(
    client,
    model=DEFAULT_MODEL,
    system_prompt=DEFAULT_SYSTEM_PROMPT ,
    user_prompt=DEFAULT_USER_PROMPT,
    temperature=0.5,
    max_tokens=5000,
    top_p=0.9,
    frequency_penalty=0.0,
    presence_penalty=0.0,
):
    """
    Generates text using the litellm library.

    Args:
        model (str): The language model to use.
        system_prompt (str): The system prompt for the model.
        user_prompt (str): The user's input prompt.
        temperature (float): Controls the randomness of the output.
        max_tokens (int): Limits the length of the generated response.
        top_p (float): Controls nucleus sampling.
        frequency_penalty (float): Penalizes repeated tokens.
        presence_penalty (float): Penalizes new tokens.

    Returns:
        str: The generated text, or None if there was an error.
    """
    try:
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt},
            ],
            temperature=temperature,
            max_tokens=max_tokens,
            top_p=top_p,
            frequency_penalty=frequency_penalty,
            presence_penalty=presence_penalty,
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"Error generating text: {e}")
        return None



In [85]:
SYSTEM_PROMPT = ""

# The examples below are fictional, also generated by an LLM.

USER_PROMPT = """Your task is to generate synthetic named entity data related to archaeology excavations. Assume you are an archaeologist working at an excavation site. While examining the findings of a productive excavation day, you dictate brief 3-4 sentence notes with descriptions of the artifacts you and your team have found. In your oral notes, dictate your initial descriptions of the artifacts including features like ARTIFACT: e.g. "Axe, pot, stake, arrow head, coin",    PERIOD: e.g. "Middle Ages, Neolithic, 500 BC, 4000 BP",    LOCATION: e.g. "Zominthos, Ferrycarrig, Ireland, Ano Toumba",  FEATURE e.g. "exceptionally clear lettering" and CONTEXT: e.g. "Rubbish pit, burial mound, stake hole". Your answer should be a json file including 4 entries with textual descriptions and extracted entities of imaginary excavations in Europe. The text should be in English. Here is an example of such a json file with 4 entries related to an excavation site in Italy. 
[
    {
        "id": "description_1",
        "text": "Marble funerary inscription fragment with exceptionally clear lettering mentioning what appears to be a gladiatorial title, likely dating to the 2nd century CE based on epigraphic style. Recovered from fill material adjacent to the central mausoleum in the eastern quadrant of the Liternum necropolis. Association with nearby cinerary urn suggests this may be part of the rare gladiator epitaph mentioned in preliminary reports.",
        "entities": [
            {
                "start": 0,
                "end": 37,
                "text": "Marble funerary inscription fragment",
                "label": "ARTIFACT"
            },
            {
                "start": 43,
                "end": 72,
                "text": "exceptionally clear lettering",
                "label": "FEATURE"
            },
            {
                "start": 146,
                "end": 160,
                "text": "2nd century CE",
                "label": "PERIOD"
            },
            {
                "start": 282,
                "end": 301,
                "text": "Liternum necropolis",
                "label": "LOCATION"
            },
            {
                "start": 361,
                "end": 395,
                "text": "part of the rare gladiator epitaph",
                "label": "CONTEXT"
            }
        ]
    },
    {
        "id": "description_2",
        "text": "Intact terracotta oil lamp with decorative motif depicting what appears to be a gladiatorial combat scene, typical of late 1st to early 2nd century CE production. Found within enchytrismos burial of juvenile individual at the southwestern section of the necropolis in Liternum, Campania. Position near the head of the deceased suggests possible votive offering related to funerary rites.",
        "entities": [
            {
                "start": 0,
                "end": 22,
                "text": "Intact terracotta oil lamp",
                "label": "ARTIFACT"
            },
            {
                "start": 23,
                "end": 68,
                "text": "with decorative motif depicting what appears to be a gladiatorial combat scene",
                "label": "FEATURE"
            },
            {
                "start": 81,
                "end": 110,
                "text": "late 1st to early 2nd century CE",
                "label": "PERIOD"
            },
            {
                "start": 142,
                "end": 163,
                "text": "enchytrismos burial",
                "label": "CONTEXT"
            },
            {
                "start": 208,
                "end": 227,
                "text": "Liternum, Campania",
                "label": "LOCATION"
            }
        ]
    }
]
"""


answer = generate_text(client, system_prompt=SYSTEM_PROMPT,  user_prompt=USER_PROMPT)
print(answer)


2025-04-09 10:48:30 INFO HTTP Request: POST http://ec2-3-19-37-251.us-east-2.compute.amazonaws.com:4000/chat/completions "HTTP/1.1 200 OK"


```json
[
    {
        "id": "description_1",
        "text": "Bronze age ceremonial dagger with intricate spiral patterns on the handle and remarkably preserved wooden elements, likely dating to approximately 1800 BCE based on typology. Recovered from primary burial context within a stone cist at the Stonehenge ritual landscape in Wiltshire. Position suggests this was a high-status grave good placed deliberately alongside the deceased individual.",
        "entities": [
            {
                "start": 0,
                "end": 26,
                "text": "Bronze age ceremonial dagger",
                "label": "ARTIFACT"
            },
            {
                "start": 27,
                "end": 92,
                "text": "with intricate spiral patterns on the handle and remarkably preserved wooden elements",
                "label": "FEATURE"
            },
            {
                "start": 111,
                "end": 120,
                "text": "1800 BCE",
      