In [2]:
from llama_index.vector_stores.qdrant import QdrantVectorStore
from qdrant_client import QdrantClient, models
from dotenv import load_dotenv
from llama_index.core import SimpleDirectoryReader
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.node_parser import TokenTextSplitter
from llama_index.core.extractors import TitleExtractor
from llama_index.core.ingestion import IngestionPipeline
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import Settings
from llama_index.core import PromptTemplate
from storage.prompt_store.prompts import N3Prompts

import os

In [3]:
load_dotenv()
n3p = N3Prompts()

Settings.llm = OpenAI(model="gpt-4o")
Settings.embedding = OpenAIEmbedding(model="text-embedding-3-small")

In [4]:
from utils.file_loader import load_from_config

In [5]:
public_data = load_from_config("scraped_data")

In [6]:
iter_dict = lambda x:  ",\n".join([f"{k} : {v}"  for k,v in x.items() if v!=''])

def dict2str(i):
   pi = iter_dict(public_data[i]["poslovna.hr"]["poslovna_intro"])
   po = iter_dict(public_data[i]["poslovna.hr"]["poslovna_osnovno"])
   ddgo = iter_dict(public_data[i]["ddgo"])
   abs =  public_data[i]["poslovna.hr"]["abstract"]

   abs_2 ="" if abs=="" else f"\n\nDokument: \n\n{abs}"

   return f"{pi}\\n{po}\n\n{ddgo}{abs_2}"

In [7]:
FILTER_OF_RELEVANT_DOCS = """ i ==public_data[i]['poslovna.hr']['poslovna_intro']['OIB']  \
or public_data[i]['NAZIV'] ==public_data[i]['poslovna.hr']['poslovna_intro']['Naziv_subjekta']\
or public_data[i]['NAZIV'] ==public_data[i]['poslovna.hr']['poslovna_intro']['Podnaslov_subjekta']\
or public_data[i]['poslovna.hr']['poslovna_intro']['Podnaslov_subjekta']==''
"""

list_of_all = [{"oib": i, "naziv":public_data[i]["NAZIV"], "doc": dict2str(i)} for i in list(public_data.keys()) if eval(FILTER_OF_RELEVANT_DOCS)]

In [8]:
from llama_index.core import VectorStoreIndex, Document
from llama_index.core import StorageContext, load_index_from_storage

list_of_doc_oibs = [i["oib"] for i in list_of_all]
list_of_names = [i["naziv"] for i in list_of_all]
list_of_docs= [Document(text=i["doc"]) for i in list_of_all]

In [9]:
from llama_index.core import SimpleDirectoryReader
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.node_parser import TokenTextSplitter
from llama_index.core.extractors import TitleExtractor
from llama_index.core.ingestion import IngestionPipeline

In [10]:
qdrant_store_vdb = "storage/qdrant_cache/q_public_data"

In [11]:
try:

     # Check if the path exists
    if not os.path.exists(qdrant_store_vdb):
        raise FileNotFoundError("The specified path does not exist.")

    client = QdrantClient(path=qdrant_store_vdb)  # replace with your Qdrant server details

    # Define the collection name where the vectors are stored
    collection_name = "public_company_data_q"
    # Create the QdrantVectorStore instance
    vector_store = QdrantVectorStore(client=client, collection_name=collection_name)
    index = VectorStoreIndex.from_vector_store(vector_store=vector_store)
    print("loading from disk")
except:

    client = QdrantClient(path=qdrant_store_vdb) # QdrantClient(location=":memory:")
    client.create_collection(
    collection_name="public_company_data_q",
    vectors_config=models.VectorParams(size=1536, distance=models.Distance.COSINE))
    vector_store = QdrantVectorStore(client=client, collection_name="public_company_data_q")
    storage_context = StorageContext.from_defaults(vector_store=vector_store)

    index = VectorStoreIndex.from_documents(
        [],
        storage_context=storage_context,
    )
    pipeline = IngestionPipeline(transformations=[TokenTextSplitter()])


    for oib, name, doc in zip(list_of_doc_oibs, list_of_names, list_of_docs):
        nodes = pipeline.run(documents=[doc])
        for node in nodes:
            node.metadata = {"oib" : oib}
            node.metadata = {"naziv" : name}
        index.insert_nodes(nodes)

loading from disk


In [12]:
qa_prompt_abstract = PromptTemplate(
    n3p.public_data_prompt

)

qe = index.as_chat_engine(qa_template= qa_prompt_abstract)

In [13]:
resp = qe.query("{'oib': '57993674442', 'naziv': 'KOFEIN d.o.o.'}")

In [15]:
index

<llama_index.core.indices.vector_store.base.VectorStoreIndex at 0x28f48b44520>

In [13]:
from llama_index.core.tools import QueryEngineTool, ToolMetadata

In [14]:
c_abstract_tool = QueryEngineTool(query_engine=qe, metadata=ToolMetadata(
        name="public_company_data",
        description="this tool finds company public information using company name"
    ))

In [16]:
c_abstract_tool.call({"input": "Kofein doo"})

ToolOutput(content='KOFEIN d.o.o. is a limited liability company based in Zagreb, specializing in advertising and promotional services. Here are some key details about the company:\n\n- **Founded:** 2012\n- **Capital:** 100% domestic\n- **Annual Revenue (2023):** €1,175,124 (a decrease from €1,240,266 in 2022)\n- **Net Profit (2023):** €103,065\n- **Net Margin (2023):** 8.77%\n- **Employees (2023):** 27 (a reduction from the previous year)\n- **Director:** Tomislav Krajačić\n\nDespite a decrease in revenue, KOFEIN d.o.o. has remained profitable over the last two years.', tool_name='public_company_data', raw_input={'input': "{'input': 'Kofein doo'}"}, raw_output=Response(response='KOFEIN d.o.o. is a limited liability company based in Zagreb, specializing in advertising and promotional services. Here are some key details about the company:\n\n- **Founded:** 2012\n- **Capital:** 100% domestic\n- **Annual Revenue (2023):** €1,175,124 (a decrease from €1,240,266 in 2022)\n- **Net Profit (20

In [17]:
from llama_index.core.agent import FunctionCallingAgentWorker, ReActAgent

agent_worker2 = FunctionCallingAgentWorker.from_tools(
    tools=[c_abstract_tool],
    verbose=True,
    system_prompt=n3p.nnn_agent
)

In [18]:
agent_360 = agent_worker2.as_agent()

In [21]:
agent_360.chat('{"input": "Kofein doo"}')

Added user message to memory: {"input": "Kofein doo"}
=== Calling Function ===
Calling function: public_company_data with args: {"input": "Kofein doo"}
=== Function Output ===
KOFEIN d.o.o. is a limited liability company based in Zagreb, specializing in advertising and promotional services. Founded in 2012, it operates with 100% domestic capital. In 2023, the company achieved an annual revenue of €1,175,124 and a net profit of €103,065, with a net margin of 8.77%. The company had 27 employees in 2023.
=== LLM Response ===
Tvrtka **KOFEIN d.o.o.** je društvo s ograničenom odgovornošću sa sjedištem u Zagrebu, specijalizirano za oglašavanje i promotivne usluge. Osnovana je 2012. godine i posluje s 100% domaćim kapitalom. U 2023. godini, tvrtka je ostvarila godišnji prihod od 1.175.124 eura i neto dobit od 103.065 eura, s neto maržom od 8,77%. Tvrtka je imala 27 zaposlenika u 2023. godini.


AgentChatResponse(response='Tvrtka **KOFEIN d.o.o.** je društvo s ograničenom odgovornošću sa sjedištem u Zagrebu, specijalizirano za oglašavanje i promotivne usluge. Osnovana je 2012. godine i posluje s 100% domaćim kapitalom. U 2023. godini, tvrtka je ostvarila godišnji prihod od 1.175.124 eura i neto dobit od 103.065 eura, s neto maržom od 8,77%. Tvrtka je imala 27 zaposlenika u 2023. godini.', sources=[ToolOutput(content='KOFEIN d.o.o. is a limited liability company based in Zagreb, specializing in advertising and promotional services. Founded in 2012, it operates with 100% domestic capital. In 2023, the company achieved an annual revenue of €1,175,124 and a net profit of €103,065, with a net margin of 8.77%. The company had 27 employees in 2023.', tool_name='public_company_data', raw_input={'input': 'Kofein doo'}, raw_output=Response(response='KOFEIN d.o.o. is a limited liability company based in Zagreb, specializing in advertising and promotional services. Founded in 2012, it ope