In [93]:
import os
import openai
import logging
import streamlit as st

from pprint import pprint
from dotenv import load_dotenv
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
import chromadb.utils.embedding_functions as embedding_functions


DB_PATH = "chroma_db"
PERSIS_DIR = "./chroma_langchain_db"
load_dotenv() #loads all env vars


os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
openai_client = openai.OpenAI(api_key=os.environ["OPENAI_API_KEY"])

In [102]:

def save_documents(release_data, embeddings):
    # embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000)
    docs = []
    for data in release_data:   
        docs.extend(text_splitter.create_documents([data]))
    
    db = Chroma.from_documents(docs, embeddings, persist_directory=PERSIS_DIR)
    db.persist()
    return db

# load documents 
def load_documents(folder_name):
    release_data = []
    
    files = os.listdir(folder_name)
    for file_name in files:
        if file_name.endswith(".txt"):
            file_path = os.path.join(folder_name, file_name)
            text_data = open(file_path, encoding="utf-8").read()
            release_data.append(text_data)              
    return release_data

def get_llm_response(context, query):
    prompt = f"Given the context: {context}, please answer the following question {query}."
    
    try :
        response = openai_client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {'role': 'system', 'content': "You have to answer question based on context given"},
                {'role': 'user', 'content': prompt}
            ],
            temperature=0.4
        )
        return response.choices[0].message.content 
    except Exception as e:
        logging.error(f'An error occured: {e}')
        return None

def main():
    embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

    if os.path.exists(PERSIS_DIR):
        chroma_db = Chroma(persist_directory=PERSIS_DIR, embedding_function=embeddings)
    else:
        release_data_list = load_documents('data')
        chroma_db = save_documents(release_data_list, embeddings)

    query = "Tell me something about German Basic Law"
    retriver = chroma_db.as_retriever()

    # print(retriver.get_relevant_documents(query)[0].page_content)
    # TODO: embedd the query and check reponse time
    relevant_docs = retriver.get_relevant_documents(query)[0].page_content

    context = " ".join(relevant_docs)
    # context = " ".join([doc.page_content for doc in relevant_docs])

    if query:
        llm_response = get_llm_response(context, query)
        if llm_response:
            pprint(llm_response)
        else:
            logging.info("No Response recived from the LLM !")
    else:
        logging.info("Please provide the search query !")


In [103]:
if __name__ == "__main__":
    main()

('The German Basic Law, which has been in effect for 75 years, is the '
 'constitution of Germany. It emphasizes the inviolability of human dignity as '
 'its defining principle and places a duty on all state authorities to respect '
 'and protect it. The Basic Law guarantees fundamental rights such as freedom '
 'of speech, freedom of the press, freedom of faith, and equal rights. In '
 'addition to individual rights, the Basic Law also includes principles '
 'regarding state structure, federal and state governments, legislative '
 'bodies, state administration, jurisdiction, and finance. Since the '
 'reunification of Germany in 1990, the Basic Law has applied to all of '
 'Germany.')
