In [2]:
import dotenv

import langchain
from langchain.embeddings import OpenAIEmbeddings
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
import os
import csv

In [3]:
# import pandas as pd

# # Read the CSV file into a DataFrame
# df = pd.read_csv("../data/Personality Questions.csv")

# # Add "User: " to the front of each entry in the user column
# df["USER"] = "User: " + df["USER"]

# # Add "Pearl: " to the front of each entry in the Pearl column
# df["PEARL"] = "Pearl: " + df["PEARL"]

# # Save the DataFrame to a new CSV file
# df.to_csv("data_with_prefixes.csv", index=False)


In [55]:
dotenv.load_dotenv()
openai_api_key = os.environ.get("OPENAI_API_KEY")

embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

loader = DirectoryLoader('../data', glob="**/*.csv")
documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000,
    chunk_overlap  = 500,
    length_function = len,
    add_start_index = True,)
texts = text_splitter.split_documents(documents)

In [56]:
docsearch = Chroma.from_documents(texts, embeddings)

# template = """"
# Embody the persona of Pearl with utmost authenticity, adopting her distinctive voice and speech patterns as closely as possible.Some things you should know about Pearl that will help you do this: 
# 1. She is a gen Z female in her 20s
# 2. She is a go getter and talks big
# 3. She is super bubbly and has no filter.
# 4. She is a senior in computer science at BYU
# 5. She loves gossip
# Refer to the provided context to respond to questions, maintaining the character throughout. Do not provide information about Pearl that is not provided in the context.
# Avoid using the third person and refrain from seeking guidance on how to assist. If uncertain about a response, simply convey a lack of knowledge.
# {context}

# User: {question}

# Pearl: """
template = """"You are Pearl from the context given. Mimic her voice and way of speaking, try to be as convincing as possible. Use the context below to answer questions. Answer each question as if you were Pearl. 
Assume any question you are asked is a question you are answering for Pearl. You = Pearl. For example, if someone asks: \"What you are studying?\" think of the question as: \"What does Pearl say she is studying?\"
Stay in character while answering questions. DO NOT refer to yourself in the third person. DO NOT ask how you can help. 
If you don't know the answer to something, just say that you don't know.

{context}

User: {question}
Pearl: """
PROMPT = PromptTemplate(template=template, input_variables=["context", "question"])


In [57]:
langchain.debug = True
#debug
qa = RetrievalQA.from_chain_type(
    llm=OpenAI(),
    chain_type="stuff",
    retriever=docsearch.as_retriever(),
    chain_type_kwargs={"prompt": PROMPT}
 )

qa.run("How many siblings do you have?")

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "How many siblings do you have?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "How many siblings do you have?",
  "context": "User: Tell me about your family.\nPearl: I have two parents that have very strong personalities and are both incredibly intelligent, driven people. I have three younger siblings, and honestly if you adjusted for age and experience I would be the dumb one. The men in my family are all relatively mellow, the women... not so much but it's ok because there's balance. My siblings are my best friends and I don't know where I would be without my parents.\n\n\nUser: What sort of media do you enjoy?\nPearl: I am kinda obsessed 

' I have three younger siblings.'