In [2]:
import os
import openai
import logging
import streamlit as st

from pprint import pprint
from dotenv import load_dotenv
from langchain_core.prompts import ChatPromptTemplate

from langchain_community.document_loaders import TextLoader, DirectoryLoader
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_text_splitters import RecursiveCharacterTextSplitter
import chromadb.utils.embedding_functions as embedding_functions

from langchain.chains.combine_documents import create_stuff_documents_chain

from langchain.retrievers.multi_query import MultiQueryRetriever



DB_PATH = "chroma_db"
PERSIS_DIR = "./chroma_langchain_db"
load_dotenv() #loads all env vars


os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
openai_client = openai.OpenAI(api_key=os.environ["OPENAI_API_KEY"])

In [3]:

def save_documents(release_data: list[str], embeddings: OpenAIEmbeddings) -> Chroma:
    """
    Save documents to a Chroma database with embeddings.

    Args:
        release_data (List[str]): List of text data to be saved.
        embeddings (OpenAIEmbeddings): Embedding model to use for creating document embeddings.

    Returns:
        Chroma: The Chroma database object with the saved documents.
    """
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000)
    docs = []
    for data in release_data:   
        docs.extend(text_splitter.create_documents([data]))
    
    db = Chroma.from_documents(docs, embeddings, persist_directory=PERSIS_DIR)
    db.persist()
    return db


def load_text_files(directory_path: str) -> list[str]:
    """
    Load all text files from a specified directory using LangChain's DirectoryLoader.

    Args:
        directory_path (str): The path to the directory containing text files.

    Returns:
        List[str]: A list of text data loaded from the files.
    """
    loader = DirectoryLoader(directory_path, glob="**/*.txt", loader_cls=TextLoader)
    documents = loader.load()
    return [doc.page_content for doc in documents]


def retrive_docs(chroma_db: Chroma, llm: ChatOpenAI, query: str) -> list[str]:
    """
    Retrieve documents from a Chroma database using a language model.

    Args:
        chroma_db (Chroma): The Chroma database object.
        llm (ChatOpenAI): The language model to use for retrieval.
        query (str): The query string to search for.

    Returns:
        List[str]: A list of unique documents retrieved based on the query.
    """
    retriever_from_llm = MultiQueryRetriever.from_llm(
        retriever=chroma_db.as_retriever(), llm=llm
    )

    unique_docs = retriever_from_llm.invoke(query)
    return unique_docs


def get_response(llm: ChatOpenAI, docs: list[str], query: str) -> str:
    """
    Get a response from the language model based on the provided documents and query.

    Args:
        llm (ChatOpenAI): The language model to use for generating the response.
        docs (List[str]): The list of documents to use as context.
        query (str): The query string to ask the language model.

    Returns:
        str: The response generated by the language model.
    """
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", "You have to answer question based on context given:\n\n{context}"),
            ("user", "Question:\n\n{query}")
            ]
    )

    chain = create_stuff_documents_chain(llm=llm, prompt=prompt)

    llm_response = chain.invoke({"context": docs, "query": query})
    return llm_response

def main():
    """
        Main function to load or save documents, and retrieve and get responses based on a query.
    """
    embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

    if os.path.exists(PERSIS_DIR):
        chroma_db = Chroma(persist_directory=PERSIS_DIR, embedding_function=embeddings)
    else:
        release_data_list = load_text_files('data')
        chroma_db = save_documents(release_data_list, embeddings)

    query = "What are the all different partnerships and collboration made by T-Systems. List them and give some info on them"
    
    # Initialize the language model with the specified temperature and API key
    llm = ChatOpenAI(temperature=0.6, api_key=os.environ["OPENAI_API_KEY"])
    docs = retrive_docs(chroma_db, llm, query)
    
    if query: 
        llm_response = get_response(llm, docs, query)
        if llm_response:
            pprint(llm_response)
        else:
            logging.info("No Response recived from the LLM !")
    else:
        logging.info("Please provide the search query !")


In [4]:
if __name__ == "__main__":
    main()

  chroma_db = Chroma(persist_directory=PERSIS_DIR, embedding_function=embeddings)


('T-Systems has formed various partnerships and collaborations to enhance its '
 'offerings in the digital space. Here are the key partnerships mentioned in '
 'the text:\n'
 '\n'
 '1. **Partner T-Systems**: T-Systems collaborates with its partner, '
 'T-Systems, to fully migrate its IT infrastructure to the public or hybrid '
 'cloud. This partnership allows for flexibility and scalability required for '
 'their operations. Close collaboration based on agile methods is emphasized '
 'to enable integration.\n'
 '\n'
 '2. **Google, AWS, and Azure hyper-scalers**: T-Systems utilizes the services '
 'of Google, AWS, and Azure hyper-scalers to tailor cloud solutions for '
 'different workloads. This partnership allows for offering private cloud, '
 'public cloud, and hybrid cloud solutions to customers.\n'
 '\n'
 '3. **VMware**: T-Systems leverages VMware for its Private Future Cloud '
 'Infrastructure. This partnership enables T-Systems to provide cloud '
 'solutions based on VMware techn