In [1]:
# imports

import os
import glob
from dotenv import load_dotenv
import gradio as gr

In [2]:
# imports for langchain

from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_chroma import Chroma
import numpy as np
from sklearn.manifold import TSNE
import plotly.graph_objects as go
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [3]:
# price is a factor for our company, so we're going to use a low cost model

MODEL = "gpt-4o-mini"
db_name = "vector_db"

In [4]:
load_dotenv(override=True)
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')

In [5]:
# Directory containing PDFs
pdf_dir = "C:\Ofc_Docs\Abinitio\Abinitio_components_guide"

# Collect all documents from the PDFs
all_docs = []

for filename in os.listdir(pdf_dir):
    if filename.endswith(".pdf"):
        loader = PyPDFLoader(os.path.join(pdf_dir, filename))
        documents = loader.load()  # returns list of Documents (with metadata)
        all_docs.extend(documents)

# Split documents into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,  # Tune based on use case
    chunk_overlap=200,
)
chunked_docs = text_splitter.split_documents(all_docs)

print(pdf_dir)
print(f"Total chunks: {len(chunked_docs)}")
print(f"Sample content: {chunked_docs[0].page_content if chunked_docs else 'No documents'}")

C:\Ofc_Docs\Abinitio\Abinitio_components_guide
Total chunks: 0
Sample content: No documents


In [6]:
# Put the chunks of data into a Vector Store that associates a Vector Embedding with each chunk
# Chroma is a popular open source Vector Database based on SQLLite

embeddings = OpenAIEmbeddings()

# If you would rather use the free Vector Embeddings from HuggingFace sentence-transformers
# Then replace embeddings = OpenAIEmbeddings()
# with:
# from langchain.embeddings import HuggingFaceEmbeddings
# embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Delete if already exists

if os.path.exists(db_name):
    Chroma(persist_directory=db_name, embedding_function=embeddings).delete_collection()

# Create vectorstore

vectorstore = Chroma.from_documents(documents=chunked_docs, embedding=embeddings, persist_directory=db_name)
print(f"Vectorstore created with {vectorstore._collection.count()} documents")

Vectorstore created with 1414 documents


In [8]:
# create a new Chat with OpenAI
llm = ChatOpenAI(temperature=0.7, model_name=MODEL)

# set up the conversation memory for the chat
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

# the retriever is an abstraction over the VectorStore that will be used during RAG
retriever = vectorstore.as_retriever(search_kwargs={"k": 10})

# putting it together: set up the conversation chain with the GPT 4o-mini LLM, the vector store and memory
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)

  memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)


In [9]:
query = "Tell me runtime behaviour of Gather component."
result = conversation_chain.invoke({"question":query})
print(result["answer"])

The Gather component has the following runtime behavior:

- It reads data records from the flows connected to its in port.
- It combines the records arbitrarily.
- It writes the combined records to the out port.

Additionally, you can use the Gather component to:
- Reduce data parallelism by connecting a single fan-in flow to the in port.
- Reduce component parallelism by connecting multiple straight flows to the in port.

It is important to note that you do not need to use a Gather component when connecting a fan-in or all-to-all flow to the in port of a Sort, as Sort can gather internally on its in port. Also, Gather does not support using default record assignment, so the record formats of the in and out ports must be identical; otherwise, the output is unpredictable.


In [10]:
# set up a new conversation memory for the chat
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

# putting it together: set up the conversation chain with the GPT 4o-mini LLM, the vector store and memory
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)

In [11]:
# Wrapping in a function - note that history isn't used, as the memory is in the conversation_chain

def chat(message, history):
    result = conversation_chain.invoke({"question": message})
    return result["answer"]

In [12]:
# And in Gradio:

view = gr.ChatInterface(chat, type="messages").launch(share=True)

* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://c3ac6c8b3d37fc5df7.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
