### ⚙️  |  Settings

In [26]:
from dotenv import load_dotenv

load_dotenv()

True

In [39]:
GROQ_LLM_MODEL = "llama3-70b-8192"
TEMPERATURE = 0.0

OLLAMA_NOMIC_EMBEDDING = "nomic-embed-text"
OLLAMA_EMBEDDING_SHOW_PROGRESS = False
K_NEAREST_NEIGHBORS = 4
OLLAMA_LLAMA3_70B = "llama3:70b"

INGEST_KNOWLEDGE = False

CHROMA_DB_PATH = "chroma"

In [3]:
import pandas as pd
from langchain_community.document_loaders.csv_loader import CSVLoader

loader_csv = CSVLoader(file_path="data/westworld.csv")
df = pd.DataFrame(loader_csv.load())
df

Unnamed: 0,0,1,2
0,"(page_content, What is Westworld? A: Westworld...","(metadata, {'source': 'data/westworld.csv', 'r...","(type, Document)"
1,"(page_content, What is Westworld? A: Westworld...","(metadata, {'source': 'data/westworld.csv', 'r...","(type, Document)"
2,"(page_content, What is Westworld? A: Westworld...","(metadata, {'source': 'data/westworld.csv', 'r...","(type, Document)"
3,"(page_content, What is Westworld? A: Westworld...","(metadata, {'source': 'data/westworld.csv', 'r...","(type, Document)"
4,"(page_content, What is Westworld? A: Westworld...","(metadata, {'source': 'data/westworld.csv', 'r...","(type, Document)"
...,...,...,...
143,"(page_content, What is Westworld? A: Westworld...","(metadata, {'source': 'data/westworld.csv', 'r...","(type, Document)"
144,"(page_content, What is Westworld? A: Westworld...","(metadata, {'source': 'data/westworld.csv', 'r...","(type, Document)"
145,"(page_content, What is Westworld? A: Westworld...","(metadata, {'source': 'data/westworld.csv', 'r...","(type, Document)"
146,"(page_content, What is Westworld? A: Westworld...","(metadata, {'source': 'data/westworld.csv', 'r...","(type, Document)"


In [6]:
from langchain_community.document_loaders.merge import MergedDataLoader

loader_all = MergedDataLoader(loaders=[loader_csv])

In [7]:
docs_all = loader_all.load()

In [8]:
len(docs_all)

148

In [9]:
docs_all[0]

Document(page_content='What is Westworld? A: Westworld is an immersive, high-tech theme park that allows guests to experience the Wild West in a realistic and interactive way, populated by advanced androids called "hosts.": Where is Westworld located? A: The exact location of Westworld is kept secret, but it is situated in a remote area, accessible only by the park\'s own transportation services.', metadata={'source': 'data/westworld.csv', 'row': 0})

In [10]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(docs_all)

len(texts)

148

In [15]:
from langchain.embeddings import OllamaEmbeddings

embeddings = OllamaEmbeddings(
    model=OLLAMA_NOMIC_EMBEDDING, show_progress=OLLAMA_EMBEDDING_SHOW_PROGRESS
)

In [19]:
from langchain_chroma import Chroma

vector_db = Chroma.from_documents(
    documents=texts, embedding=embeddings, persist_directory=CHROMA_DB_PATH
)

In [21]:
retriever = vector_db.as_retriever(search_kwargs={"k": K_NEAREST_NEIGHBORS})

In [28]:
from langchain_groq import ChatGroq

groq_llm = ChatGroq(model=GROQ_LLM_MODEL, temperature=TEMPERATURE)

In [30]:
from jinja2 import Environment, FileSystemLoader

PROMPTS_PATH = "prompts"
OPENAI_PROMPT = "groq-llama3-rag.jinja"

env = Environment(loader=FileSystemLoader(PROMPTS_PATH))
prompt_template = env.get_template(OPENAI_PROMPT)
prompt = prompt_template.render()
print(prompt)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are an assistant for question-answering tasks. Use the following pieces of retrieved context to 
answer the question. If you don't know the answer, just say that you don't know. Use three sentences
maximum and keep the answer concise.

<|eot_id|><|start_header_id|>user<|end_header_id|>
QUESTION: {question}
CONTEXT:  {context}
Answer:
<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>


In [33]:
question = "What can I do in the Westworld Park?"
context = retriever.invoke(question)
context

[Document(page_content='What is Westworld? A: Westworld is an immersive, high-tech theme park that allows guests to experience the Wild West in a realistic and interactive way, populated by advanced androids called "hosts.": Are there any age-specific activities or areas within the park? A: While Westworld is primarily designed for adult guests, there are some family-friendly areas and activities available, such as horseback riding lessons, old-fashioned games, and age-appropriate storylines.', metadata={'row': 27, 'source': 'data/westworld.csv'}),
 Document(page_content='What is Westworld? A: Westworld is an immersive, high-tech theme park that allows guests to experience the Wild West in a realistic and interactive way, populated by advanced androids called "hosts.": Can I bring my own food and drinks into Westworld? A: Outside food and drinks are not permitted in the park. However, Westworld provides a wide selection of food and beverage options to cater to all tastes and dietary re

In [35]:
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser

rag_prompt = PromptTemplate(template=prompt, input_variables=["question", "context"])
rag_prompt_chain = rag_prompt | groq_llm | StrOutputParser()

result = rag_prompt_chain.invoke({"question": question, "context": context})
print(result)

In Westworld Park, you can experience the Wild West in a realistic and interactive way, interact with advanced androids called "hosts", and participate in activities such as horseback riding lessons and old-fashioned games. You can also explore the outer regions, but you'll need to use park-provided weapons and gear. Additionally, there are adult-only areas and experiences available, such as the Pariah saloon, which offer more risqué activities and storylines.


In [47]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | rag_prompt
    | groq_llm
    | StrOutputParser()
)

rag_chain.invoke({"question": "What is the Westworld Park all about?"})

'Westworld Park is an immersive, high-tech theme park that allows guests to experience the Wild West in a realistic and interactive way, populated by advanced androids called "hosts." It offers a range of activities and experiences, including family-friendly areas and adult-only areas with more risqué activities. The park is designed to provide a unique and engaging experience for its guests.'

In [48]:
# from langchain.llms import Ollama

# ollama_llama3_llm = Ollama(model=OLLAMA_LLAMA3_70B, temperature=TEMPERATURE)

# rag_chain = (
#     {"context": retriever, "question": RunnablePassthrough()}
#     | rag_prompt
#     | ollama_llama3_llm
#     | StrOutputParser()
# )

# rag_chain.invoke({"question": "What is the Westworld Park all about?"})

In [49]:
rag_chain.invoke("who are some of the hosts I can meet?")

'Some of the hosts you can meet in Westworld include Maeve, Logan, Peter Abernathy, and Clementine. You can engage in conversations and storylines with them, exploring their personalities, perspectives, and experiences within the park.'

In [50]:
rag_chain.invoke("Who is Ford?")

"Dr. Ford is a key figure in the operations of Westworld, an immersive high-tech theme park. He is involved in the creation of Westworld, but it's not possible to meet with him directly. Instead, guests can engage with a host designed to discuss the history and philosophy behind Westworld's creation, offering insights similar to those of Dr. Ford."

In [51]:
rag_chain.invoke("Can I sleep under the stars there?")

"There is no mention of sleeping under the stars in the provided context. However, it does mention emergency shelters and evacuation points in the outer regions, but it doesn't suggest that guests can sleep under the stars."

In [52]:
from langchain_groq import ChatGroq

chatgroq_llm = ChatGroq(model=GROQ_LLM_MODEL, temperature=TEMPERATURE)

In [53]:
def write_markdown_file(content, filename):
    """Writes the given content as a markdown file to the local directory.

    Args:
      content: The string content to write to the file.
      filename: The filename to save the file as.
    """
    if type(content) == dict:
        content = "\n".join(f"{key}: {value}" for key, value in content.items())
    if type(content) == list:
        content = "\n".join(content)
    with open(f"{filename}.md", "w") as f:
        f.write(content)