In [1]:
import os
os.environ["OPENAI_API_KEY"] = 'YOUR_API_KEY'

In [2]:
from llama_index import VectorStoreIndex, SimpleDirectoryReader, SimpleKeywordTableIndex
from llama_index.node_parser import SimpleNodeParser
from llama_index.indices.service_context import ServiceContext
from llama_index.llms import OpenAI

from llama_index.schema import TextNode, NodeRelationship, RelatedNodeInfo

# Building knowledge base with nodes

In [3]:
def title_text_split(string):
    return (string.split('\n')[0].strip(), "\n".join(string.split('\n')[1:]))

### Kolegiji

In [4]:
kolegiji = SimpleDirectoryReader('data', filename_as_id=True).load_data()

In [5]:
def handle_kolegij(kolegij, existing_nodes):
    name = os.path.splitext(os.path.basename(kolegij.id_))[0]
    sections = kolegij.text.split('\n\n\n')
    for section_text in sections:
        section, _ = title_text_split(section_text)
        section_id = f'{name} - {section}'
        node = TextNode(
            text = section_text,
            metadata = {
                'Type': 'Informacije o kolegiju',
                'Section': section
            },
            id_ = section_id
        )
        existing_nodes.append(node)

In [7]:
nodes = []
for kolegij in kolegiji:
#     handle_kolegij(kolegij, nodes)
    name = os.path.splitext(os.path.basename(kolegij.id_))[0]
    node = TextNode(
        text=kolegij.text,
        metadata = {
            'type': 'Informacije o kolegiju',
            'kolegij': name
        },
        id_ = name
    )
    nodes.append(node)

In [9]:
index = VectorStoreIndex(
    nodes,
    show_progress=True
)

  from .autonotebook import tqdm as notebook_tqdm
Generating embeddings: 100%|█████████████████████████████████████████████████████████| 115/115 [00:10<00:00, 10.52it/s]


In [10]:
index.storage_context.persist()

## Ispitivanje modela

In [99]:
llm = OpenAI(model="gpt-3.5-turbo-16k", temperature=0, max_tokens=2048)
service_context = ServiceContext.from_defaults(llm=llm)
query_engine = index.as_query_engine(service_context=service_context)

In [102]:
response = query_engine.query("Koji kolegiji se izvode na drugoj godini diplomskog studija financijske matematike, uključujući i izborne kolegije?")
print(response)

Na drugoj godini diplomskog studija financijske matematike izvode se kolegiji "Matematičke financije" i "Financijska i aktuarska matematika".
