In [None]:
import os
import yaml
import glob
from langchain_community.document_loaders import UnstructuredMarkdownLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain_text_splitters import MarkdownHeaderTextSplitter
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chat_models import ChatOpenAI
from langchain.vectorstores import Chroma

from langchain.schema import Document
from sklearn.metrics.pairwise import cosine_similarity
from langchain.retrievers import BM25Retriever, EnsembleRetriever
import matplotlib.pyplot as plt
import numpy as np
from pprint import pprint

from langchain.schema import (
    SystemMessage,
    HumanMessage,
    AIMessage
)

In [None]:
from utils import load_data
from utils import prints

In [None]:
with open("config.yaml", "r") as file:
    config = yaml.safe_load(file)

OPENAI_API_KEY = config["api_keys"]["openai"]
PINECONE_API_KEY = config["api_keys"]["pinecone"]
os.environ['PINECONE_API_KEY'] = PINECONE_API_KEY
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

In [None]:
embedding = OpenAIEmbeddings(chunk_size=1 )

In [None]:
paths = glob.glob("01-data/markdowns_serbian/*.md")

In [None]:
docs = load_data.load_markdown(paths)

In [None]:
md_header_splits = load_data.interpret_markdown(docs)
text_splitter = load_data.get_text_splitter(250,30)
splits = text_splitter.split_documents(md_header_splits)

In [None]:
ensemble_retriever = load_data.create_ensemble(splits,embedding)

In [None]:
#load_data.augment_prompt(query=query,ensemble_retriever)

In [None]:
query = "Koji je maksimalni nagib rampe za invalide ?"

In [None]:
messages = load_data.messages

In [None]:
chat = ChatOpenAI(
    openai_api_key=os.environ["OPENAI_API_KEY"],
    model='gpt-3.5-turbo'
)

In [None]:
# create a new user prompt
prompt = HumanMessage(
    content=load_data.augment_prompt(query)
)
# add to messages
messages.append(prompt)

res = chat(messages)

pprint(res.content)
messages = []