In [31]:
# Required imports
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader
from langchain_groq import ChatGroq
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from tqdm import tqdm

In [32]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["HUGGING_FACE_API_KEY"] = os.getenv("HUGGING_FACE_API_KEY")
os.environ["OPEN_AI_API"] = os.getenv("OPEN_AI_API")
os.environ["GROQ_API_KEY"] = os.getenv('GROQ_API_KEY')
groq_api_key = os.getenv('GROQ_API_KEY')
open_ai_api_key = os.getenv("OPEN_AI_API")

In [33]:
root_directory = ".\Documents"
documents = list()

In [34]:
for folder, _, files in os.walk(root_directory):
    for file in files:
        file_path = os.path.join(folder, file)
        try:
            loader = PyPDFLoader(file_path)
            docs = loader.load()
            documents.extend(docs)
        except Exception as e:
            print(f"Error loading {file_path}: {e}")

XRef object at 2945 can not be read, some object may be missing
XRef object at 2945 can not be read, some object may be missing
Ignoring wrong pointing object 2268 0 (offset 1064028)
Ignoring wrong pointing object 2269 0 (offset 1064028)


In [35]:
len(documents)

2421

In [None]:
# Split documents into manageable chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=8000, chunk_overlap=200)
chunked_documents = []

In [37]:
for doc in tqdm(documents, desc="Splitting Documents"):
    chunks = text_splitter.split_documents([doc])
    chunked_documents.extend(chunks)

Splitting Documents: 100%|██████████| 2421/2421 [00:00<00:00, 5676.30it/s]


In [41]:
embedding_model = OpenAIEmbeddings(model="text-embedding-3-large", api_key=open_ai_api_key)

In [42]:
vector_db = FAISS.from_documents(chunked_documents, embedding_model)

In [43]:
llm = ChatGroq(model='llama-3.2-11b-text-preview', groq_api_key=groq_api_key)

In [44]:
# 5. Memory setup
memory = ConversationBufferMemory(
    memory_key="chat_history",  # Key used to store conversation history
    return_messages=True        # Allows memory to be added to chain responses
)

In [45]:
# 6. Conversational Retrieval Chain
qa_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=vector_db.as_retriever(search_type="similarity", search_kwargs={"k": 5}),
    memory=memory,
    chain_type="stuff"
)

In [46]:
def chatbot(user_input: str) -> str:
    """
    This function takes user input and returns the chatbot's response,
    retaining conversation context.
    """
    response = qa_chain.run(user_input)
    return response

In [47]:
# Main execution loop for the chatbot
if __name__ == "__main__":
    print("Chatbot is running. Type 'exit' to end the conversation.")
    while True:
        user_input = input("You: ")
        if user_input.lower() == "exit":
            print("Chatbot: Goodbye!")
            break
        response = chatbot(user_input)
        print(f"Chatbot: {response}")

Chatbot is running. Type 'exit' to end the conversation.
Chatbot: Based on the provided context, here are 10 cultural norms for Japan:

1. **Bowing is preferred over shaking hands**: Bowing is an expression of respect and gratitude in Japan, and it's considered more polite than shaking hands.

2. **Use of family names is customary**: In Japan, it's customary to use family names instead of first names, especially when interacting with people of higher status or authority.

3. **Gift-giving is a significant aspect of etiquette**: Gift-giving is a highly ritualized custom in Japan, and it's essential to give nicely wrapped gifts that are not too valuable to avoid putting the recipient in a position of debt.

4. **Modesty and humility are valued**: Japanese culture places a strong emphasis on modesty and humility, so it's essential to be self-effacing and avoid drawing attention to oneself.

5. **Directness is not always appreciated**: Japanese communication often involves indirectness, an

In [48]:
memory.chat_memory.messages

[HumanMessage(content='Please provide 10 cultural norms for Japan'),
 AIMessage(content='Based on the provided context, here are 10 cultural norms for Japan:\n\n1. **Bowing is preferred over shaking hands**: Bowing is an expression of respect and gratitude in Japan, and it\'s considered more polite than shaking hands.\n\n2. **Use of family names is customary**: In Japan, it\'s customary to use family names instead of first names, especially when interacting with people of higher status or authority.\n\n3. **Gift-giving is a significant aspect of etiquette**: Gift-giving is a highly ritualized custom in Japan, and it\'s essential to give nicely wrapped gifts that are not too valuable to avoid putting the recipient in a position of debt.\n\n4. **Modesty and humility are valued**: Japanese culture places a strong emphasis on modesty and humility, so it\'s essential to be self-effacing and avoid drawing attention to oneself.\n\n5. **Directness is not always appreciated**: Japanese communic