In [28]:
import os
import openai
import faiss
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.schema import BaseRetriever
from langchain.memory import ConversationBufferMemory
from langchain.schema import HumanMessage, AIMessage

In [15]:
os.environ["OPEN_AI_API"] = os.getenv("OPEN_AI_API")
open_ai_api_key = os.getenv("OPEN_AI_API")

In [16]:
root_directory = ".\Documents"
documents = list()

In [17]:
client = openai.OpenAI(api_key=open_ai_api_key)

In [18]:
for folder, _, files in os.walk(root_directory):
    for file in files:
        file_path = os.path.join(folder, file)
        try:
            loader = PyPDFLoader(file_path)
            docs = loader.load()
            documents.extend(docs)
        except Exception as e:
            print(f"Error loading {file_path}: {e}")

XRef object at 2945 can not be read, some object may be missing
XRef object at 2945 can not be read, some object may be missing
Ignoring wrong pointing object 2268 0 (offset 1064028)
Ignoring wrong pointing object 2269 0 (offset 1064028)


In [19]:
len(documents)

2421

In [20]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=8000, chunk_overlap=200)
split_docs = text_splitter.split_documents(documents)

In [21]:
embedding_model = OpenAIEmbeddings(model="text-embedding-3-large", api_key=open_ai_api_key)

In [22]:
vector_db = FAISS.from_documents(split_docs, embedding_model)

In [10]:
retriever: BaseRetriever = vector_db.as_retriever(search_type="similarity", search_kwargs={"k": 5})

In [23]:
# Initialize ConversationBufferMemory
memory = ConversationBufferMemory(
    memory_key="chat_history",  # Key used to store conversation history
    return_messages=True        # Returns chat history in message format
)

In [29]:
# def query_gpt4(chat_history: List[Dict[str, str]], retriever: BaseRetriever, user_query: str) -> str:
#     """
#     Handles the conversational aspect and integrates the retriever with OpenAI's GPT-4.
    
#     Args:
#         chat_history: List of previous conversation turns as {"role": "user"/"assistant", "content": "message"}.
#         retriever: The retriever for querying the vector store.
#         user_query: The user's current question.
        
#     Returns:
#         A response from GPT-4.
#     """
#     # Retrieve relevant documents
#     relevant_docs = retriever.get_relevant_documents(user_query)
#     context = "\n\n".join([doc.page_content for doc in relevant_docs])

#     # Add context and chat history to the prompt
#     prompt = (
#         "You are a helpful assistant. Use the following context to answer the question:\n\n"
#         f"Context:\n{context}\n\n"
#         "Conversation History:\n" +
#         "\n".join([f"{turn['role']}: {turn['content']}" for turn in chat_history]) +
#         f"\n\nUser: {user_query}\nAssistant:"
#     )

#     # Query GPT-4
#     response = client.chat.completions.create(
#         model="gpt-4o",
#         messages=[{"role": "user", "content": prompt}]
#     )

#     # Return the GPT-4 response
#     return response.choices[0].message.content


# ---------------------------------------------------------------------------------------------------

def query_gpt4(memory: ConversationBufferMemory, retriever: BaseRetriever, user_query: str) -> str:
    """
    Handles the conversational aspect and integrates the retriever with OpenAI's GPT-4.
    
    Args:
        memory: ConversationBufferMemory to manage chat history.
        retriever: The retriever for querying the vector store.
        user_query: The user's current question.
        
    Returns:
        A response from GPT-4.
    """
    # Retrieve relevant documents
    relevant_docs = retriever.get_relevant_documents(user_query)
    context = "\n\n".join([doc.page_content for doc in relevant_docs])

    # Retrieve conversation history from memory
    chat_history = memory.chat_memory.messages

    # Add context and chat history to the prompt
    prompt = (
        "You are a helpful assistant. Use the following context to answer the question:\n\n"
        f"Context:\n{context}\n\n"
        "Conversation History:\n" +
        "\n".join([
            f"{'User' if isinstance(message, HumanMessage) else 'Assistant'}: {message.content}"
            for message in chat_history
        ]) +
        f"\n\nUser: {user_query}\nAssistant:"
    )

    # Query GPT-4
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "user", "content": prompt}]
    )

    # Add the user's query and GPT-4's response to memory
    memory.chat_memory.add_user_message(user_query)
    memory.chat_memory.add_ai_message(response.choices[0].message.content)

    return response.choices[0].message.content


In [None]:
# if __name__ == "__main__":
#     chat_history = []  # Initialize chat history
#     print("Chatbot is ready. Type 'exit' to end the conversation.")
    
#     while True:
#         user_input = input("User: ")
#         if user_input.lower() == "exit":
#             print("Goodbye!")
#             break

#         # Get GPT-4 response
#         print("User query: \n", user_input)
#         gpt4_response = query_gpt4(chat_history, retriever, user_input)
#         print(f"Assistant: \n {gpt4_response}")
        
#         # Update chat history
#         chat_history.append({"role": "user", "content": user_input})
#         chat_history.append({"role": "assistant", "content": gpt4_response})

if __name__ == "__main__":
    print("Chatbot is ready. Type 'exit' to end the conversation.")

    while True:
        user_input = input("User: ")
        print("User query: \n")
        if user_input.lower() == "exit":
            print("Goodbye!")
            break

        # Get GPT-4 response
        gpt4_response = query_gpt4(memory, retriever, user_input)
        print(f"Assistant: {gpt4_response}")

Chatbot is ready. Type 'exit' to end the conversation.
Assistant: Based on the provided context, here are 10 cultural norms for America:

1. **Individualism**: Emphasis is placed on self-reliance, personal independence, and initiative, with individuals expected to be "self-starting."

2. **Liberty and Freedom**: Liberty is highly valued, seen as freedom from unnecessary constraints, and officially associated with "the pursuit of happiness."

3. **Competitiveness**: A general value within the culture, competitiveness is expected both individually and collectively, and is particularly pronounced in business and academia.

4. **Achievement and Earned Status**: Status is ideally achieved through personal effort and accomplishments, valued over inherited advantages, with an emphasis on meritocracy.

5. **Generosity and Social Conformity**: While individualism is valued, there is also a strong emphasis on generosity and a concern for societal welfare, alongside a degree of conformity to soci

In [31]:
memory.chat_memory.messages

[HumanMessage(content='Please provide 10 cultural norms for America'),
 AIMessage(content='Based on the provided context, here are 10 cultural norms for America:\n\n1. **Individualism**: Americans highly value individualism, emphasizing personal independence, self-reliance, and initiative.\n\n2. **Liberty and Freedom**: Liberty is integral, often associated with freedom from unnecessary constraints. However, the purpose of this freedom is broadly defined as "the pursuit of happiness."\n\n3. **Competitiveness**: Competitiveness is a pervasive value, expected from both individuals and groups, and is institutionalized in various sectors such as business and education.\n\n4. **Achievement and Earned Status**: Status is ideally achieved through one\'s own efforts rather than inherited, with emphasis on self-made success.\n\n5. **Generosity and Social Conformity**: Despite the focus on individualism, generosity and concern for others, particularly those less fortunate, are valued, as is outw