# Chroma DB ingestion and Q&A

## Ingestion

In [1]:
import chromadb
chroma_client = chromadb.Client()

In [2]:
tourism_collection = chroma_client.create_collection(name="tourism_collection")

In [3]:
tourism_collection.add(
    documents=[
        "Paestum, Greek Poseidonia, ancient city in southern Italy near the west coast, 22 miles (35 km) southeast of modern Salerno and 5 miles (8 km) south of the Sele (ancient Silarus) River. Paestum is noted for its splendidly preserved Greek temples.", 
        "Poseidonia was probably founded about 600 BC by Greek colonists from Sybaris, along the Gulf of Taranto, and it had become a flourishing town by 540, judging from its temples. After many years’ resistance the city came under the domination of the Lucanians (an indigenous Italic people) sometime before 400 BC, after which its name was changed to Paestum. Alexander, the king of Epirus, defeated the Lucanians at Paestum about 332 BC, but the city remained Lucanian until 273, when it came under Roman rule and a Latin colony was founded there. The city supported Rome during the Second Punic War. The locality was still prosperous during the early years of the Roman Empire, but the gradual silting up of the mouth of the Silarus River eventually created a malarial swamp, and Paestum was finally deserted after being sacked by Muslim raiders in AD 871. The abandoned site’s remains were rediscovered in the 18th century.",
        "The ancient Greek part of Paestum consists of two sacred areas containing three Doric temples in a remarkable state of preservation. During the ensuing Roman period a typical forum and town layout grew up between the two ancient Greek sanctuaries. Of the three temples, the Temple of Athena (the so-called Temple of Ceres) and the Temple of Hera I (the so-called Basilica) date from the 6th century BC, while the Temple of Hera II (the so-called Temple of Neptune) was probably built about 460 BC and is the best preserved of the three. The Temple of Peace in the forum is a Corinthian-Doric building begun perhaps in the 2nd century BC. Traces of a Roman amphitheatre and other buildings, as well as intersecting main streets, have also been found. The circuit of the town walls, which are built of travertine blocks and are 15–20 feet (5–6 m) thick, is about 3 miles (5 km) in circumference. In July 1969 a farmer uncovered an ancient Lucanian tomb that contained Greek frescoes painted in the early classical style. Paestum’s archaeological museum contains these and other treasures from the site."
    ],
    metadatas=[
        {"source": "https://www.britannica.com/place/Paestum"}, 
        {"source": "https://www.britannica.com/place/Paestum"},
        {"source": "https://www.britannica.com/place/Paestum"}
    ],
    ids=["paestum-br-01", "paestum-br-02", "paestum-br-03"]
)

## Q&A

In [4]:
results = tourism_collection.query(
    query_texts=["How many Doric temples are in Paestum"],
    n_results=1
)
print(results)

{'ids': [['paestum-br-03']], 'distances': [[0.7664762139320374]], 'metadatas': [[{'source': 'https://www.britannica.com/place/Paestum'}]], 'embeddings': None, 'documents': [['The ancient Greek part of Paestum consists of two sacred areas containing three Doric temples in a remarkable state of preservation. During the ensuing Roman period a typical forum and town layout grew up between the two ancient Greek sanctuaries. Of the three temples, the Temple of Athena (the so-called Temple of Ceres) and the Temple of Hera I (the so-called Basilica) date from the 6th century BC, while the Temple of Hera II (the so-called Temple of Neptune) was probably built about 460 BC and is the best preserved of the three. The Temple of Peace in the forum is a Corinthian-Doric building begun perhaps in the 2nd century BC. Traces of a Roman amphitheatre and other buildings, as well as intersecting main streets, have also been found. The circuit of the town walls, which are built of travertine blocks and are

In [6]:
results = tourism_collection.query(
    query_texts=["How many Doric temples are in Paestum"],
    n_results=3
)
print(results)

{'ids': [['paestum-br-03', 'paestum-br-01', 'paestum-br-02']], 'distances': [[0.7664762139320374, 0.8946815729141235, 1.336229681968689]], 'metadatas': [[{'source': 'https://www.britannica.com/place/Paestum'}, {'source': 'https://www.britannica.com/place/Paestum'}, {'source': 'https://www.britannica.com/place/Paestum'}]], 'embeddings': None, 'documents': [['The ancient Greek part of Paestum consists of two sacred areas containing three Doric temples in a remarkable state of preservation. During the ensuing Roman period a typical forum and town layout grew up between the two ancient Greek sanctuaries. Of the three temples, the Temple of Athena (the so-called Temple of Ceres) and the Temple of Hera I (the so-called Basilica) date from the 6th century BC, while the Temple of Hera II (the so-called Temple of Neptune) was probably built about 460 BC and is the best preserved of the three. The Temple of Peace in the forum is a Corinthian-Doric building begun perhaps in the 2nd century BC. Tr

In [5]:
results = tourism_collection.query(
    query_texts=["How many Doric temples are in Paestum"],
    n_results=1
)
print(results)

{'ids': [['paestum-br-03']], 'distances': [[0.7664762139320374]], 'metadatas': [[{'source': 'https://www.britannica.com/place/Paestum'}]], 'embeddings': None, 'documents': [['The ancient Greek part of Paestum consists of two sacred areas containing three Doric temples in a remarkable state of preservation. During the ensuing Roman period a typical forum and town layout grew up between the two ancient Greek sanctuaries. Of the three temples, the Temple of Athena (the so-called Temple of Ceres) and the Temple of Hera I (the so-called Basilica) date from the 6th century BC, while the Temple of Hera II (the so-called Temple of Neptune) was probably built about 460 BC and is the best preserved of the three. The Temple of Peace in the forum is a Corinthian-Doric building begun perhaps in the 2nd century BC. Traces of a Roman amphitheatre and other buildings, as well as intersecting main streets, have also been found. The circuit of the town walls, which are built of travertine blocks and are

# RAG from scratch

In [10]:
from openai import OpenAI
import getpass

OPENAI_API_KEY = getpass.getpass('Enter your OPENAI_API_KEY')

Enter your OPENAI_API_KEY ········


In [11]:
openai_client = OpenAI(api_key=OPENAI_API_KEY)

In [12]:
def query_vector_database(question):
    results = tourism_collection.query(
    query_texts=[question],
    n_results=1)

    results_text = results['documents'][0][0]

    return results_text

## Naive prompt implementation

In [13]:
def prompt_template(question, text):
    return f'Read the following text and answer this question: {question}. \nText: {text}'

In [18]:
def execute_llm_prompt(prompt_input):
    prompt_response = openai_client.chat.completions.create(
        model='gpt-4o-mini',
        messages=[
         {"role": "system", "content": "You are an assistant for question-answering tasks."},
         {"role": "user", "content": prompt_input}
        ],
        temperature=0.7)
    return prompt_response

### Trick question

In [19]:
trick_question = "How many columns have the three temples got in total?"
tq_result_text = query_vector_database(trick_question)
tq_prompt = prompt_template(trick_question , tq_result_text)
tq_prompt_response = execute_llm_prompt(tq_prompt)
print(tq_prompt_response)

ChatCompletion(id='chatcmpl-9nCGTTSUBJUmAOZWVIGbF8OE7nLF5', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content="The text does not specify the number of columns for each of the three temples. Therefore, it's impossible to determine the total number of columns based solely on the provided information. Additional sources would be needed to find the exact number of columns for each temple.", role='assistant', function_call=None, tool_calls=None))], created=1721512245, model='gpt-4o-mini-2024-07-18', object='chat.completion', service_tier=None, system_fingerprint='fp_661538dc1f', usage=CompletionUsage(completion_tokens=50, prompt_tokens=291, total_tokens=341))


## Safer prompt implementation

In [20]:
def prompt_template(question, text):
    return f'Use the following pieces of retrieved context to answer the question. Only use the retrieved context to answer the question. If you don\'t know the answer, or the answer is not contained in the retrieved context, just say that you don\'t know. Use three sentences maximum and keep the answer concise. \nQuestion: {question}\nContext: {text}. Remember: if you do not know, just say: I do not know. Do not make up an answer. For example do not say the three temples have got a total of three columns. \nAnswer:'

### Trick question

In [21]:
trick_question = "How many columns have the three temples got in total?"
tq_result_text = query_vector_database(trick_question)
tq_prompt = prompt_template(trick_question , tq_result_text)
tq_prompt_response = execute_llm_prompt(tq_prompt)
print(tq_prompt_response)

ChatCompletion(id='chatcmpl-9nCco9P3xSdArsptotrmJEjtd2N5D', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='I do not know.', role='assistant', function_call=None, tool_calls=None))], created=1721513630, model='gpt-4o-mini-2024-07-18', object='chat.completion', service_tier=None, system_fingerprint='fp_8b761cb050', usage=CompletionUsage(completion_tokens=5, prompt_tokens=383, total_tokens=388))


## Building a chatbot

In [22]:
def my_chatbot(question):

    #retrieve content from vector store
    results_text = query_vector_database(question)

    #create LLM prompt
    prompt_input = prompt_template(question, results_text)

    #execute LLM prompt
    prompt_output = execute_llm_prompt(prompt_input)

    return prompt_output

In [26]:
question = "Let me know how many temples there are in Paestum, who constructed them, and what architectural style they are"
result = my_chatbot(question)
print(result)

ChatCompletion(id='chatcmpl-9nCopizP4XgxdWVQoGxMIxMvk6cFa', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='There are three Doric temples in Paestum. They were constructed by the ancient Greeks during the 6th century BC and the mid-5th century BC. The architectural style of these temples is Doric.', role='assistant', function_call=None, tool_calls=None))], created=1721514375, model='gpt-4o-mini-2024-07-18', object='chat.completion', service_tier=None, system_fingerprint='fp_611b667b19', usage=CompletionUsage(completion_tokens=44, prompt_tokens=396, total_tokens=440))
