In [1]:
from llama_index.vector_stores.qdrant import QdrantVectorStore
from qdrant_client import QdrantClient, models
from dotenv import load_dotenv
from llama_index.core import SimpleDirectoryReader
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.node_parser import TokenTextSplitter
from llama_index.core.extractors import TitleExtractor
from llama_index.core.ingestion import IngestionPipeline
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import Settings
from llama_index.core import PromptTemplate
from storage.prompt_store.prompts import N3Prompts

import os

In [2]:
load_dotenv()
n3p = N3Prompts()

Settings.llm = OpenAI(model="gpt-4o")
Settings.embedding = OpenAIEmbedding(model="text-embedding-3-small")

In [3]:
from utils.file_loader import load_from_config

In [4]:
public_data = load_from_config("scraped_data")

In [5]:
iter_dict = lambda x:  ",\n".join([f"{k} : {v}"  for k,v in x.items() if v!=''])

def dict2str(i):
   pi = iter_dict(public_data[i]["poslovna.hr"]["poslovna_intro"])
   po = iter_dict(public_data[i]["poslovna.hr"]["poslovna_osnovno"])
   ddgo = iter_dict(public_data[i]["ddgo"])
   abs =  public_data[i]["poslovna.hr"]["abstract"]

   abs_2 ="" if abs=="" else f"\n\nDokument: \n\n{abs}"

   return f"{pi}\\n{po}\n\n{ddgo}{abs_2}"

In [6]:
FILTER_OF_RELEVANT_DOCS = """ i ==public_data[i]['poslovna.hr']['poslovna_intro']['OIB']  \
or public_data[i]['NAZIV'] ==public_data[i]['poslovna.hr']['poslovna_intro']['Naziv_subjekta']\
or public_data[i]['NAZIV'] ==public_data[i]['poslovna.hr']['poslovna_intro']['Podnaslov_subjekta']\
or public_data[i]['poslovna.hr']['poslovna_intro']['Podnaslov_subjekta']==''
"""

list_of_all = [{"oib": i, "naziv":public_data[i]["NAZIV"], "doc": dict2str(i)} for i in list(public_data.keys()) if eval(FILTER_OF_RELEVANT_DOCS)]

In [7]:
from llama_index.core import VectorStoreIndex, Document
from llama_index.core import StorageContext, load_index_from_storage

list_of_doc_oibs = [i["oib"] for i in list_of_all]
list_of_names = [i["naziv"] for i in list_of_all]
list_of_docs= [Document(text=i["doc"]) for i in list_of_all]

In [8]:
from llama_index.core import SimpleDirectoryReader
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.node_parser import TokenTextSplitter
from llama_index.core.extractors import TitleExtractor
from llama_index.core.ingestion import IngestionPipeline

In [9]:
qdrant_store_vdb = "storage/qdrant_cache/q_public_data_2"

In [10]:
try:

     # Check if the path exists
    if not os.path.exists(qdrant_store_vdb):
        raise FileNotFoundError("The specified path does not exist.")

    client = QdrantClient(path=qdrant_store_vdb)  # replace with your Qdrant server details

    # Define the collection name where the vectors are stored
    collection_name = "public_company_data_q2"
    # Create the QdrantVectorStore instance
    vector_store = QdrantVectorStore(client=client, collection_name=collection_name)
    index = VectorStoreIndex.from_vector_store(vector_store=vector_store)
    print("loading from disk")
except:

    client = QdrantClient(location=":memory:")
    client.create_collection(
    collection_name="public_company_data_q2",
    vectors_config=models.VectorParams(size=1536, distance=models.Distance.COSINE))
    vector_store = QdrantVectorStore(client=client, collection_name="public_company_data_q2")
    storage_context = StorageContext.from_defaults(vector_store=vector_store)

    index = VectorStoreIndex.from_documents(
        [],
        storage_context=storage_context,
    )
    pipeline = IngestionPipeline(transformations=[TokenTextSplitter()])


    for oib, name, doc in zip(list_of_doc_oibs, list_of_names, list_of_docs):
        nodes = pipeline.run(documents=[doc])
        for node in nodes:
            node.metadata = {"oib" : oib}
            node.metadata = {"naziv" : name}
        index.insert_nodes(nodes)

loading from disk


In [13]:
qa_prompt_abstract = PromptTemplate(
    n3p.public_data_prompt

)

qe = index.as_chat_engine(qa_template= qa_prompt_abstract)

In [14]:
resp = qe.query("{'oib': '57993674442', 'naziv': 'KOFEIN d.o.o.'}")

In [16]:
resp.response

'Tvrtka **KOFEIN d.o.o.** s OIB-om **57993674442** nalazi se u Zagrebu na adresi Pavletićeva 1. Osnovana je 2012. godine i registrirana je za djelatnost agencija za promidžbu (reklamu i propagandu). Tvrtka je u privatnom vlasništvu i financirana je 100% domaćim kapitalom.\n\n**Financijski podaci za 2023. godinu:**\n- Ukupni godišnji prihod: 1.175.124,00 € (smanjenje u odnosu na prethodnu godinu)\n- Neto dobit: 103.065,00 €\n- Broj zaposlenika: 27\n\nDirektor tvrtke je Tomislav Krajačić. Više informacija možete pronaći na njihovoj web stranici [www.kofein.hr](http://www.kofein.hr).'

In [18]:
index

<llama_index.core.indices.vector_store.base.VectorStoreIndex at 0x23584688f70>

In [20]:
from llama_index.core.tools import QueryEngineTool, ToolMetadata

In [21]:
c_abstract_tool = QueryEngineTool(query_engine=qe, metadata=ToolMetadata(
        name="public_company_data",
        description="this tool finds company public information using company name"
    ))

In [23]:
resp = c_abstract_tool.call({"input": "Kofein doo"})

In [24]:
resp.content

"KOFEIN d.o.o. is a company based in Zagreb that specializes in advertising and promotional services. Founded in 2012, it operates as a limited liability company. In 2023, the company achieved a total annual revenue of €1,175,124, which is a decrease from the previous year's revenue of €1,240,266. Despite the reduction in revenue, KOFEIN d.o.o. has remained profitable over the last two years, with a net profit of €103,065 in 2023 and a net margin of 8.77%. The company had 27 employees in 2023, a reduction from the previous year. The director of KOFEIN d.o.o. is Tomislav Krajačić."

In [25]:
resp.tool_name

'public_company_data'

In [28]:
dict(resp).keys()

dict_keys(['content', 'tool_name', 'raw_input', 'raw_output', 'is_error'])

In [44]:
resp.raw_output.source_nodes[1]

NodeWithScore(node=TextNode(id_='5e99d72f-59f8-48b7-9a0f-0e1b24b5ae5f', embedding=None, metadata={'naziv': 'KOLADU j.d.o.o.'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='4ee12028-f195-4bd0-9fb0-4c4db357bc4c', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='6b2c158244316ef0208b0c088a3c3a9aa56a03d8ecaeac310b04df5e91ccc954')}, text="OIB : 54242816396,\nAddress : Zagreb, Bolnička cesta 34e,\nNaziv_subjekta : KOLADU j.d.o.o.,\nPodnaslov_subjekta : KOLADU j.d.o.o. za usluge\\nRegistration body : Trgovački sud u Zagrebu,\nFounded : 2021,\nRegistration number : 081359838,\nIndustry : 70.22 Business and other management consultancy activities (NKD 2007),\nSize : - mikro (prema novom zakonu),\nCapital origin : 00% domaći kapital - izvor FINA,\nodgovorna_osoba : {'ime': 'Duje Kolak', 'rola': 'direktor'}\n\ntitle : KOLADU j.d.o.o.- revenues, profit, employees, analysis, contacts,\nhref : https://w

In [45]:
from llama_index.core.agent import FunctionCallingAgentWorker, ReActAgent

agent_worker2 = FunctionCallingAgentWorker.from_tools(
    tools=[c_abstract_tool],
    verbose=True,
    system_prompt=n3p.nnn_agent
)

In [46]:
agent_360 = agent_worker2.as_agent()

In [47]:
resp_ag = agent_360.chat('{"input": "Kofein doo"}')

Added user message to memory: {"input": "Kofein doo"}
=== Calling Function ===
Calling function: public_company_data with args: {"input": "Kofein doo"}
=== Function Output ===
KOFEIN d.o.o. is a private company based in Zagreb, specializing in advertising and promotional services. Established in 2012, the company operates with 100% domestic capital. In 2023, KOFEIN d.o.o. achieved a total annual revenue of €1,175,124, reflecting a decrease of €65,142 compared to the previous year. Despite the reduction in revenue, the company has been profitable over the last two years, with a net profit of €103,065 in 2023 and a net margin of 8.77%. The company had 27 employees in 2023, a reduction from the previous year.
=== LLM Response ===
**KOFEIN d.o.o.**

- **Sjedište:** Zagreb
- **Djelatnost:** Oglašavanje i promotivne usluge
- **Osnivanje:** 2012
- **Kapital:** 100% domaći
- **Prihod (2023):** €1,175,124 (smanjenje od €65,142 u odnosu na prethodnu godinu)
- **Neto dobit (2023):** €103,065
- **

In [74]:
from utils.nnn_tools import agent2

In [84]:
#resp = agent2.chat("što portal jutarnji.hr piše o tvrtki kofein")

In [85]:
[i.tool_name for i in resp.sources]

['search_company_news']

In [91]:
dict(resp.sources[0])

{'content': '1. Novi ured poznate domaće agencije Kofein u Zagrebu zamišljen je kao ugodan radni prostor te galerija, kino, kafić i mjesto za odmor. (jutarnji.hr)\n2. Kreativna zagrebačka agencija Kofein ovih je dana proširila svoj kadar, angažiranjem Silvije Zidarić kao client service direktorice i Krešimira Lastrića kao art direktora. (novac.jutarnji.hr)',
 'tool_name': 'search_company_news',
 'raw_input': {'args': ('KOFEIN d.o.o. site:jutarnji.hr',), 'kwargs': {}},
 'raw_output': '1. Novi ured poznate domaće agencije Kofein u Zagrebu zamišljen je kao ugodan radni prostor te galerija, kino, kafić i mjesto za odmor. (jutarnji.hr)\n2. Kreativna zagrebačka agencija Kofein ovih je dana proširila svoj kadar, angažiranjem Silvije Zidarić kao client service direktorice i Krešimira Lastrića kao art direktora. (novac.jutarnji.hr)',
 'is_error': False}