In [1]:
# imports

import os
import glob
from dotenv import load_dotenv
import gradio as gr

In [2]:
# imports for langchain, plotly and Chroma

from langchain.document_loaders import DirectoryLoader, TextLoader,PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_chroma import Chroma
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import numpy as np
import plotly.graph_objects as go
from langchain.memory import ConversationBufferMemory
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, MessagesState, StateGraph
from langchain.chains import ConversationalRetrievalChain
from langchain.embeddings import HuggingFaceEmbeddings

In [3]:
# price is a factor for our company, so we're going to use a low cost model

MODEL = "gpt-4o-mini"
db_name = "vector_db_aws"

In [4]:
# Load environment variables in a file called .env

load_dotenv(override=True)
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')

In [5]:
# Read in documents using LangChain's loaders
# Take everything in all the sub-folders of our knowledgebase

folders = glob.glob("Mohan-experience/*")

def add_metadata(doc, doc_type):
    doc.metadata["doc_type"] = doc_type
    return doc

documents = []
print(folders)
for folder in folders:
    doc_type = os.path.basename(folder)
    loader = DirectoryLoader(folder, glob="**/*.pdf", loader_cls=PyPDFLoader)
    #print(loader)
    folder_docs = loader.load()
    documents.extend([add_metadata(doc, doc_type) for doc in folder_docs])

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(documents)

print(f"Total number of chunks: {len(chunks)}")
print(f"Document types found: {set(doc.metadata['doc_type'] for doc in documents)}")

['Mohan-experience\\resume']
Total number of chunks: 11
Document types found: {'resume'}


In [6]:
# Put the chunks of data into a Vector Store that associates a Vector Embedding with each chunk
# Chroma is a popular open source Vector Database based on SQLLite

embeddings = OpenAIEmbeddings()

# Delete if already exists

if os.path.exists(db_name):
    Chroma(persist_directory=db_name, embedding_function=embeddings).delete_collection()

# Create vectorstore

vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=db_name)
print(f"Vectorstore created with {vectorstore._collection.count()} documents")

Vectorstore created with 11 documents


In [7]:
# Let's investigate the vectors

collection = vectorstore._collection
count = collection.count()

sample_embedding = collection.get(limit=1, include=["embeddings"])["embeddings"][0]
dimensions = len(sample_embedding)
print(f"There are {count:,} vectors with {dimensions:,} dimensions in the vector store")

There are 11 vectors with 1,536 dimensions in the vector store


## Time to use LangChain to bring it all together

In [12]:
# create a new Chat with OpenAI
llm = ChatOpenAI(temperature=0.7, model_name=MODEL)

# Alternative - if you'd like to use Ollama locally, uncomment this line instead
# llm = ChatOpenAI(temperature=0.7, model_name='llama3.2', base_url='http://localhost:11434/v1', api_key='ollama')

# set up the conversation memory for the chat
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)


# workflow = StateGraph(state_schema=MessagesState)

# def call_model(state: MessagesState):
#     response = model.invoke(state["messages"])
#     # We return a list, because this will get added to the existing list
#     return {"messages": response}


# # Define the two nodes we will cycle between
# workflow.add_edge(START, "model")
# workflow.add_node("model", call_model)
# memory = MemorySaver()

# app = workflow.compile(
#     checkpointer=memory
# )

# the retriever is an abstraction over the VectorStore that will be used during RAG
retriever = vectorstore.as_retriever()

# putting it together: set up the conversation chain with the GPT 3.5 LLM, the vector store and memory
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)

## Now we will bring this up in Gradio using the Chat interface -

A quick and easy way to prototype a chat with an LLM

In [13]:
# Wrapping that in a function

def chat(question, history):
    result = conversation_chain.invoke({"question": question})
    return result["answer"]

In [15]:
# And in Gradio:
view = gr.ChatInterface(chat, type="messages").launch(share=True)

* Running on local URL:  http://127.0.0.1:7861
* Running on public URL: https://41bb5a7664ce37166d.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


In [14]:
# Let's investigate what gets sent behind the scenes

from langchain_core.callbacks import StdOutCallbackHandler

llm = ChatOpenAI(temperature=0.7, model_name=MODEL)

memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

retriever = vectorstore.as_retriever()

conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory, callbacks=[StdOutCallbackHandler()])

query = "Who is SAP certified?"
result = conversation_chain.invoke({"question": query})
answer = result["answer"]
print("\nAnswer:", answer)



[1m> Entering new ConversationalRetrievalChain chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
Mohan Salal
♂¶obile-alt+91-8937930647, 8104941883 • /envel⌢pemohansalal14296@gmail.com
/linkedin-inmohan-salal-861222a7
Skills
Languages: Java, JavaScript, React, Groovy, Python, SQL, NoSQL, HTML, CSS
Frameworks: Java Spring, JUnit
Cloud & DevOps : AWS, Kubernetes, Docker
Other: SAP Commerce, Solr, Swagger, GraphQL, Kafka, Microservices, Webservices, SAP Punchout,
cXML, GenAI, Git, Cassandra, PostgreSQL, MySQL
Professional Experience
Deloitte, Gurugram, Haryana
Consultant Sep 2021 – Present
○ Worked on an order data transfer solution to Azure Topic, implemented an efficient retry mechanism to automatically
rep

In [15]:
# create a new Chat with OpenAI
llm = ChatOpenAI(temperature=0.7, model_name=MODEL)

# set up the conversation memory for the chat
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

# the retriever is an abstraction over the VectorStore that will be used during RAG; k is how many chunks to use
retriever = vectorstore.as_retriever(search_kwargs={"k": 25})

# putting it together: set up the conversation chain with the GPT 3.5 LLM, the vector store and memory
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)

In [16]:
def chat(question, history):
    result = conversation_chain.invoke({"question": question})
    return result["answer"]

In [17]:
view = gr.ChatInterface(chat, type="messages").launch(inbrowser=True)

* Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.


Number of requested results 25 is greater than number of elements in index 11, updating n_results = 11
Number of requested results 25 is greater than number of elements in index 11, updating n_results = 11
Number of requested results 25 is greater than number of elements in index 11, updating n_results = 11
Number of requested results 25 is greater than number of elements in index 11, updating n_results = 11
Number of requested results 25 is greater than number of elements in index 11, updating n_results = 11
Number of requested results 25 is greater than number of elements in index 11, updating n_results = 11
Number of requested results 25 is greater than number of elements in index 11, updating n_results = 11


# Exercises

Try applying this to your own folder of data, so that you create a personal knowledge worker, an expert on your own information!