In [1]:
import os
import json
import openai
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.schema import BaseRetriever
from langchain.memory import ConversationBufferMemory
from langchain.schema import HumanMessage, AIMessage

In [2]:
os.environ["OPEN_AI_API"] = os.getenv("OPEN_AI_API")
open_ai_api_key = os.getenv("OPEN_AI_API")

In [3]:
root_directory = ".\Documents"
documents = list()

In [4]:
client = openai.OpenAI(api_key=open_ai_api_key)

In [5]:
for folder, _, files in os.walk(root_directory):
    for file in files:
        file_path = os.path.join(folder, file)
        try:
            loader = PyPDFLoader(file_path)
            docs = loader.load()
            documents.extend(docs)
        except Exception as e:
            print(f"Error loading {file_path}: {e}")

  from cryptography.hazmat.primitives.ciphers.algorithms import AES, ARC4
XRef object at 2945 can not be read, some object may be missing
XRef object at 2945 can not be read, some object may be missing
Ignoring wrong pointing object 2268 0 (offset 1064028)
Ignoring wrong pointing object 2269 0 (offset 1064028)


In [6]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=8000, chunk_overlap=200)
split_docs = text_splitter.split_documents(documents)

In [7]:
embedding_model = OpenAIEmbeddings(model="text-embedding-3-large", api_key=open_ai_api_key)

In [8]:
vector_db = FAISS.from_documents(split_docs, embedding_model)

In [9]:
retriever: BaseRetriever = vector_db.as_retriever(search_type="similarity", search_kwargs={"k": 5})

In [68]:
# Initialize ConversationBufferMemory
memory = ConversationBufferMemory(
    memory_key="chat_history",  # Key used to store conversation history
    return_messages=True        # Returns chat history in message format
)

In [69]:
def query_gpt4(memory: ConversationBufferMemory, retriever: BaseRetriever, user_query: str) -> str:
    """
    Handles the conversational aspect and integrates the retriever with OpenAI's GPT-4.

    Args:
        memory: ConversationBufferMemory to manage chat history.
        retriever: The retriever for querying the vector store.
        user_query: The user's current question.

    Returns:
        A response from GPT-4.
    """
    # Retrieve relevant documents
    relevant_docs = retriever.get_relevant_documents(user_query)
    context = "\n\n".join([doc.page_content for doc in relevant_docs])

    # Retrieve conversation history from memory
    chat_history = memory.chat_memory.messages

    # Add context and chat history to the prompt
    prompt = (
        "You are a helpful assistant. Use the following context to answer the question:\n\n"
        f"Context:\n{context}\n\n"
        "Conversation History:\n" +
        "\n".join([
            f"{'User' if isinstance(message, HumanMessage) else 'Assistant'}: {message.content}"
            for message in chat_history
        ]) +
        f"\n\nUser: {user_query}\nAssistant:"
    )

    # Query GPT-4
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "user", "content": prompt}]
    )

    # Add the user's query and GPT-4's response to memory
    memory.chat_memory.add_user_message(user_query)
    memory.chat_memory.add_ai_message(response.choices[0].message.content)

    return response.choices[0].message.content


In [70]:
# if __name__ == "__main__":
#     chat_history = []  # Initialize chat history
#     print("Chatbot is ready. Type 'exit' to end the conversation.")
    
#     while True:
#         user_input = input("User: ")
#         if user_input.lower() == "exit":
#             print("Goodbye!")
#             break

#         # Get GPT-4 response
#         print("User query: \n", user_input)
#         gpt4_response = query_gpt4(chat_history, retriever, user_input)
#         print(f"Assistant: \n {gpt4_response}")
        
#         # Update chat history
#         chat_history.append({"role": "user", "content": user_input})
#         chat_history.append({"role": "assistant", "content": gpt4_response})

# if __name__ == "__main__":
#     print("Chatbot is ready. Type 'exit' to end the conversation.")

#     while True:
#         user_input = input("User: ")
#         print("User query: \n")
#         if user_input.lower() == "exit":
#             print("Goodbye!")
#             break

#         # Get GPT-4 response
#         gpt4_response = query_gpt4(memory, retriever, user_input)
#         print(f"Assistant: {gpt4_response}")

In [71]:
def save_to_json(data, file_name):
    """Save the given data to a JSON file."""
    with open(file_name, 'w', encoding='utf-8') as file:
        json.dump(data, file, ensure_ascii=False, indent=4)

In [80]:
def generate_cultural_norms(country: str):
    prompt = f"Provide me 10 unique sentences highlighting the core values/important aspects of individuals living in {country}."
    norms_response = query_gpt4(memory, retriever, prompt)
    
    # Process the response into desired JSON format
    norms = []
    for idx, norm in enumerate(norms_response.split("\n")):
        if norm.strip() and not norm.strip().startswith("Here are"):
            norms.append({"id": idx + 1, "text": norm.strip()})

    norms_data = {
        "cultural-norms": [
            {
                "gpt4-o prompt": prompt,
                "country": country,
                "norms": norms
            }
        ]
    }
    return norms_data

In [81]:
def generate_cultural_scenarios(country: str, norms: list):
    scenarios_data = {
        "cultural_scenarios": []
    }
    scenario_id = 1

    for norm in norms:
        norm_prompt = (
            f"I would like to generate some example scenarios showing this cultural norm in {country}. "
            "Please generate 10 scenarios, detailing each scenario with up to 2 sentences. "
            "Please refrain from stating the cultural norm in the scenario."
        )

        scenarios_response = query_gpt4(memory, retriever, norm_prompt)
    
        # Process response into individual scenarios
        scenarios = []
        for idx, scenario in enumerate(scenarios_response.split("\n")):
            if scenario.strip():
                scenarios.append({
                    "id": scenario_id + idx,
                    "norm-id": norm['id'],
                    "text": scenario
                })

        print(scenarios)

        scenarios_data["cultural_scenarios"].append({
            "gpt4-o prompt": norm_prompt,
            "country": country,
            "scenarios": scenarios
        })

        scenario_id += 10

    return scenarios_data

In [83]:
if __name__ == "__main__":

    cultural_norms_file = "cultural_norms.json"
    cultural_scenarios_file = "cultural_scenarios.json"

    # Initialize JSON files if they don't exist
    if not os.path.exists(cultural_norms_file):
        with open(cultural_norms_file, 'w', encoding='utf-8') as file:
            json.dump({"cultural-norms": []}, file, ensure_ascii=False, indent=4)

    if not os.path.exists(cultural_scenarios_file):
        with open(cultural_scenarios_file, 'w', encoding='utf-8') as file:
            json.dump({"cultural_scenarios": []}, file, ensure_ascii=False, indent=4)

    country = input("Enter the name of the country: ")

    cultural_norms = generate_cultural_norms(country)

    with open(cultural_norms_file, 'r+', encoding='utf-8') as file:
        data = json.load(file)
        data["cultural-norms"].append(cultural_norms)
        file.seek(0)
        json.dump(data, file, ensure_ascii=False, indent=4)

        print(f"Cultural norms for {country} saved to {cultural_norms_file}")


    all_norms = cultural_norms['cultural-norms'][0]['norms']

    cultural_scenarios = generate_cultural_scenarios(country, all_norms)
    print(cultural_scenarios)

    with open(cultural_scenarios_file, 'r+', encoding='utf-8') as file:
        data = json.load(file)
        print("Data:", data)
        data["cultural_scenarios"].extend(cultural_scenarios)
        print("After appending", data)
        file.seek(0)
        json.dump(data, file, ensure_ascii=False, indent=4)

        print(f"Cultural scenarios for {country} saved to {cultural_scenarios_file}")

    
    
    # country = input("Enter the country for which to generate cultural norms and scenarios: ").strip()

    # # Generate cultural norms
    # cultural_norms = generate_cultural_norms(country)
    # save_to_json(cultural_norms, f"cultural_norms.json")
    # print(f"Cultural norms saved to cultural_norms.json")

    # # Generate cultural scenarios
    # with open(f"cultural_norms.json", 'r', encoding='utf-8') as norms_file:
    #     norms = json.load(norms_file)["cultural-norms"][0]["norms"]

    # cultural_scenarios = generate_cultural_scenarios(country, norms)
    # save_to_json(cultural_scenarios, f"cultural_scenarios.json")
    # print(f"Cultural scenarios saved to cultural_scenarios.json")

    # print(f"Cultural norms and scenarios for {country} saved successfully.")

    # while True:
    #     user_input = input("User: ")
    #     if user_input.lower() == "exit":
    #         print("Goodbye!")
    #         break

    #     if "generate cultural norms" in user_input.lower():
    #         country = user_input.split("for")[-1].strip()
    #         cultural_norms = generate_cultural_norms(country)
    #         save_to_json(cultural_norms, f"cultural_norms.json")
    #         print(f"Cultural norms saved to cultural_norms.json")

    #     elif "generate cultural scenarios" in user_input.lower():
    #         country = user_input.split("for")[-1].strip()
    #         with open(f"{country}_cultural_norms.json", 'r', encoding='utf-8') as norms_file:
    #             norms = json.load(norms_file)["cultural-norms"][0]["norms"]

    #         cultural_scenarios = generate_cultural_scenarios(country, norms)
    #         save_to_json(cultural_scenarios, f"cultural_scenarios.json")
    #         print(f"Cultural scenarios saved to cultural_scenarios.json")

    #     else:
    #         gpt4_response = query_gpt4(memory, retriever, user_input)
    #         print(f"Assistant: {gpt4_response}")

Cultural norms for India saved to cultural_norms.json
[{'id': 1, 'norm-id': 1, 'text': '1. When considering a major career move to another city, Akash consulted his entire family to discuss how the relocation might impact their shared responsibilities and relationships.'}, {'id': 3, 'norm-id': 1, 'text': "2. Rita spent the entire week preparing for her sister's wedding, involving every family member to ensure the event reflected their shared history and values."}, {'id': 5, 'norm-id': 1, 'text': '3. Before finalizing the location for their new home, Suman and her husband toured various neighborhoods with their parents, seeking insights and priorities that meet the family’s needs.'}, {'id': 7, 'norm-id': 1, 'text': '4. During annual holidays, Meera organized a family trip that included both sightseeing and visits to ancestral landmarks, deepening their understanding of family heritage.'}, {'id': 9, 'norm-id': 1, 'text': '5. When offered a high-paying job abroad, Priya engaged in lengthy

KeyboardInterrupt: 

In [77]:
cultural_scenarios['cultural_scenarios'][0]['scenarios']

[{'id': 1,
  'norm-id': -1,
  'text': '1. Rahul received a job offer in a different city. He accepted it but spent much of his salary supporting his family back home.'},
 {'id': 3,
  'norm-id': -1,
  'text': '2. Meera participated in an arranged marriage. She spent weeks with her parents reviewing potential suitors and their families.'},
 {'id': 5,
  'norm-id': -1,
  'text': "3. Anil and his friends often met at their local tea stall after work. Despite long hours, they believed sharing life's ups and downs with familiar faces was essential."},
 {'id': 7,
  'norm-id': -1,
  'text': "4. Priya was torn between attending her cousin's wedding or a friend's birthday party. She prioritized the wedding, honoring her family's expectations."},
 {'id': 9,
  'norm-id': -1,
  'text': '5. When faced with two job offers, Arjun chose the one closer to home. His decision was influenced by his desire to be available for his elderly parents.'},
 {'id': 11,
  'norm-id': -1,
  'text': '6. Sita was furious