In [9]:
import os 
from prompts import context, code_parser_template, FORMAT_INSTRUCTIONS_TEMPLATE, react_system_header_str, EXTRACT_TEMPLATE_STR

from dotenv import load_dotenv
load_dotenv()


OLLAMA_URL=os.getenv("OLLAMA_URL")
LLM_MODEL=os.getenv("LLM_MODEL")
EMBEDDING_MODEL=os.getenv("EMBEDDING_MODEL")
DATAPATH=os.getenv("DATAPATH")
DATAPATH='/data'
#OLLAMA_URL='http://localhost:11434'

print(OLLAMA_URL)
print(f"model: {LLM_MODEL}")
print(f"embedding: {EMBEDDING_MODEL}")
print(f"datapath {DATAPATH}")

http://ollama:11434
model: qwen2.5:1.5b
embedding: nomic-embed-text:latest
datapath /data


Test connection to ollama

In [10]:
import requests
import time

response = requests.get(OLLAMA_URL,timeout=3)
if response.status_code == requests.codes.ok:
    print(response)
else:
    print(response)    

<Response [200]>


In [11]:
from llama_index.llms.ollama import Ollama
llm = Ollama(model=LLM_MODEL, request_timeout=120.0, base_url=OLLAMA_URL, temperature=0)

[nltk_data] Downloading package punkt_tab to
[nltk_data]     /opt/venv/lib/python3.13/site-
[nltk_data]     packages/llama_index/core/_static/nltk_cache...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


In [12]:

from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings, PromptTemplate
documents = SimpleDirectoryReader(
    input_files=[f"{DATAPATH}/testdata.txt",f"{DATAPATH}/additionalinfo.txt"]
).load_data()
print("loaded")

loaded


In [13]:
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.core.node_parser import SentenceSplitter

Settings.embed_model = OllamaEmbedding(model_name=EMBEDDING_MODEL, base_url=OLLAMA_URL,embed_batch_size=100)
Settings.text_splitter = SentenceSplitter(chunk_size=512, chunk_overlap=30)
# ollama
Settings.llm = llm

index = VectorStoreIndex.from_documents(
documents,
#embed_model=ollama_embedding,
)

print("finish")

finish


In [14]:
from llama_index.core.tools import QueryEngineTool

query_engine = index.as_query_engine(similarity_top_k=3)

knowledge_tool = QueryEngineTool.from_defaults(
    query_engine,
    name="knowledge_tool",
    description="""A RAG engine with some basic facts persons. Ask natural-language questions about persons and their properties and relations.
              if the knowledge_tool has no relatied information, ignore the answer.
              """
)

In [15]:
from llama_index.core.tools import FunctionTool

# generate_kwargs parameters are taken from https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct
def find_person(name: str, **kwargs):
    """
    provides information about known persons. including ther detail information like birthdate

    args:
        name
    """
    # Mock response; replace with real query logic
    person_data = {
        "anna gölding": {"birthdate": "October 24, 1734", "known_for": "Last witch executed in Switzerland.","object_id":"1234","relations":{"knows other person":"ron paul","organzation":"pilz mafia"}},
        "john doe": {"birthdate": "Unknown", "known_for": "Placeholder name for anonymous individuals."},
        "ron paul": {"birthdate": "May 1, 1928", "known_for": "Talking a lot."},
        "miranda meyers": {"birthdate": "Aug 11, 1998", "known_for": "Miranda verkauft gerne verdorbens Eis. Das Eis erhält sie illegal von Litauen, wo es mit Mäusemilch hergestellt wird."},
    }
    return person_data.get(name.lower(), "No information available for this person.")

find_person_tool = FunctionTool.from_defaults(
    fn=find_person,
    name="find_person",
)


def find_organization(name: str, **kwargs):
    """
    provides information about known official and inofficial organzations.

    args:
        name
    """
    # Mock response; replace with real query logic
    org_data  = {
        "un": {"name": "United Nations", "description": "The Security Council has primary responsibility for the maintenance of international peace and security.","id":"200","relations":{""}},
        "pilz mafia": {"name": "Pilz Mafia", "description": "","id":"201","members":{"anna gölding","ron paul"}},
        "acme company": {"name":"acme company","description":"placeholder company"},
    }
    return org_data.get(name.lower(), "No information available for this organization.")

find_orgnization_tool = FunctionTool.from_defaults(
    fn=find_organization,
    name="find_organization",
)

#def get_messages(name: str, min_daterange: datetime, max_daterange: datetime):
def get_messages(name: str, min_daterange: str, max_daterange: str, **kwargs):
    """
    Retrieve information about communications between two or more people within a given date range.

    # Example usage:
        name = "c1"
        min_daterange = ISO8601 date string
        max_daterange = ISO8601 date string
        messages = get_messages(name, min_daterange, max_daterange)

    Args:
        name (str): The name of the context always use c1.
        min_daterange (datetime): The start of the date range.
        max_daterange (datetime): The end of the date range.

    Returns:
        dict: A dictionary of messages for the given name within the date range.
    """
    # Mock response; replace with real query logic
    min_daterange_ts = parser.parse(min_daterange)
    max_daterange_ts = parser.parse(max_daterange)
    org_data = {
        "c1_1738446338": {"sender": "Ron Paul", "message": "Anna Gölding ist gestorben.", "timestamp": datetime(2025, 1, 1)},
        "c1_1738446338": {"sender": "Pilz Mafia", "message": "Hat Sie mit Boris Weed gesprochen oder ihn erwähnt? Sie wollte von ihm ein Sack voll Vogelfutter kaufen.", "timestamp": datetime(2025, 1, 5)},
    }

    result = {}
    for key, value in org_data.items():
        if key.startswith(name.lower()): # and min_daterange_ts <= value["timestamp"] <= max_daterange_ts:
            result[key] = value

    if not result:
        return "No information available for this timerange"

    return result

get_messages_tool = FunctionTool.from_defaults(
    fn=get_messages,
    name="get_messages",
)


In [18]:
tools = [
    find_person_tool,
    find_orgnization_tool,
    get_messages_tool,
    knowledge_tool,
]

#memory = ChatMemoryBuffer(token_limit=2000)


# extract only entities
from pydantic import BaseModel
from pydantic_ai import Agent
from pydantic_ai.models.openai import OpenAIModel

class Organization(BaseModel):
    """Organization entity"""
    name: str

ollama_model = OpenAIModel(model_name=LLM_MODEL, base_url=f"{OLLAMA_URL}/v1")
#ollama_model = OpenAIModel(model_name='ollama:llama3.2', base_url=OLLAMA_URL)
agent = Agent(ollama_model, result_type=Organization,system_prompt=EXTRACT_TEMPLATE_STR)
#TODO



In [21]:
import nest_asyncio

nest_asyncio.apply()

prod_data = ["Die Pilz Mafia ist eine Organisation.", "Die Polnische Polizei überwacht die Pilz Mafia."]

org_result = agent.run_sync(f'What organizations are mentioned {prod_data}')
print(org_result.data)
print(org_result.usage())
#memory.save(org_result.data)


UnexpectedModelBehavior: Exceeded maximum retries (1) for result validation

In [28]:
from llama_index.core.agent import ReActAgent
from llama_index.core.memory import ChatMemoryBuffer, ChatSummaryMemoryBuffer

memory = ChatMemoryBuffer(token_limit=2000)
agent = ReActAgent.from_tools(tools, llm=llm, verbose=True, context=context, tool_choice='auto',max_iterations=25, timeout=300 memory=memory) #, chat_history=memory)

# update agent system prompt
react_system_prompt = PromptTemplate(react_system_header_str)
agent.update_prompts({"agent_worker:system_prompt": react_system_prompt})
agent.reset()
print("start prompt")
#prompt = "Who is Anna Gölding and what other person may be related to her? to which organzations may she be related?"
question_context = """context: c1, zeitbereich: 2025-02-01T00:00:00+00:00 to 2025-02-15T00:00:00+00:00 """
prompt = f"""Wer war Anna Gölding und welche andren personen oder organisationen stehen mit ihr in verbindung?
             Liste alle informationen und fakten die du findest in der antwort auf.
            questions context: {question_context}
            1. Analysiere die Person bekannt ist im find person tool.
            2. Analysiere die Person verbindungen zu anderen Personen oder Organisationen hat
            3. Prüfe ob Nachriten (messages) im context dieser Personen im gesuchten Zeitbereich statgefunden haben mit dem get_messages tool.
            4. Nenne die Anzahl der conversationen
            5. Nenne die Teilnehmer der conversationen
            6. Fasse den Inhalt der Kommunikation zusammen
            7. Prüfe ob Entitäten wie Personen, Organisationen oder Orte in der Nachrichten vorkommen, die bisher nicht bekannt sind.
            8. Kontrolliere ob alle Punkte dieser liste erfüllt sind
          """
response = agent.query(prompt)
#response = llm.complete(prompt)

#memory.save_context({"input": prompt}, {"output": str(response)})

print(f"AI: {response}")

start prompt
> Running step 9b0613fb-08cd-4b5e-93a5-a6d8ce4e37b6. Step input: Wer war Anna Gölding und welche andren personen oder organisationen stehen mit ihr in verbindung?
             Liste alle informationen und fakten die du findest in der antwort auf.
            questions context: context: c1, zeitbereich: 2025-02-01T00:00:00+00:00 to 2025-02-15T00:00:00+00:00 
            1. Analysiere die Person bekannt ist im find person tool.
            2. Analysiere die Person verbindungen zu anderen Personen oder Organisationen hat
            3. Prüfe ob Nachriten (messages) im context dieser Personen im gesuchten Zeitbereich statgefunden haben mit dem get_messages tool.
            4. Nenne die Anzahl der conversationen
            5. Nenne die Teilnehmer der conversationen
            6. Fasse den Inhalt der Kommunikation zusammen
            7. Prüfe ob Entitäten wie Personen, Organisationen oder Orte in der Nachrichten vorkommen, die bisher nicht bekannt sind.
            8. Kontro

ReadTimeout: timed out