# [STARTER] Udaplay Project

## Part 01 - Offline RAG

In this part of the project, you'll build your VectorDB using Chroma.

The data is inside folder `project/starter/games`. Each file will become a document in the collection you'll create.
Example.:
```json
{
  "Name": "Gran Turismo",
  "Platform": "PlayStation 1",
  "Genre": "Racing",
  "Publisher": "Sony Computer Entertainment",
  "Description": "A realistic racing simulator featuring a wide array of cars and tracks, setting a new standard for the genre.",
  "YearOfRelease": 1997
}
```


### Setup

In [2]:
# Only needed for Udacity workspace

import importlib.util
import sys

# Check if 'pysqlite3' is available before importing
if importlib.util.find_spec("pysqlite3") is not None:
    import pysqlite3
    sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')

In [3]:
import os
import json
import chromadb
from chromadb.utils import embedding_functions
from dotenv import load_dotenv
from lib.vector_db import VectorStore

In [4]:
# TODO: Create a .env file with the following variables
OPENAI_API_KEY=os.getenv("OPENAI_API_KEY")
CHROMA_OPENAI_API_KEY=os.getenv("CHROMA_OPENAI_API_KEY")
TAVILY_API_KEY=os.getenv("TAVILY_API_KEY")

In [5]:
# TODO: Load environment variables
load_dotenv()

True

### VectorDB Instance

In [8]:
# TODO: Instantiate your ChromaDB Client
# Choose any path you want
chroma_client = chromadb.PersistentClient(path="chromadb")

### Collection

In [6]:
# TODO: Pick one embedding function
# If picking something different than openai, 
# make sure you use the same when loading it
embedding_fn = embedding_functions.OpenAIEmbeddingFunction()

In [11]:
chroma_client.delete_collection(name="udaplay")

In [12]:
# TODO: Create a collection
# Choose any name you want
udaplay_collection = chroma_client.create_collection(
   name="udaplay",
   embedding_function=embedding_fn
)

### Add documents

In [13]:
# Make sure you have a directory "project/starter/games"
data_dir = "games"

for file_name in sorted(os.listdir(data_dir)):
    if not file_name.endswith(".json"):
        continue

    file_path = os.path.join(data_dir, file_name)
    with open(file_path, "r", encoding="utf-8") as f:
        game = json.load(f)

    # You can change what text you want to index
    content = f"[{game['Platform']}] {game['Name']} ({game['YearOfRelease']}) - {game['Description']}"

    # Use file name (like 001) as ID
    doc_id = os.path.splitext(file_name)[0]

    udaplay_collection.add(
        ids=[doc_id],
        documents=[content],
        metadatas=[game]
    )

In [14]:
udaplay_collection.count()

15

In [15]:
udaplay_collection.peek(1)

{'ids': ['001'],
 'embeddings': array([[-0.00270652, -0.01438423, -0.0094357 , ..., -0.02086851,
         -0.00733747, -0.03789448]], shape=(1, 1536)),
 'documents': ['[PlayStation 1] Gran Turismo (1997) - A realistic racing simulator featuring a wide array of cars and tracks, setting a new standard for the genre.'],
 'uris': None,
 'included': ['embeddings', 'metadatas', 'documents'],
 'data': None,
 'metadatas': [{'Publisher': 'Sony Computer Entertainment',
   'Genre': 'Racing',
   'Name': 'Gran Turismo',
   'Platform': 'PlayStation 1',
   'YearOfRelease': 1997,
   'Description': 'A realistic racing simulator featuring a wide array of cars and tracks, setting a new standard for the genre.'}]}

In [16]:
udaplay_store = VectorStore(udaplay_collection)

In [19]:
results = udaplay_store.query(
    query_texts=["video game strategy"],
    n_results=2
)

In [20]:
results['metadatas'][0]

[{'Publisher': 'Nintendo',
  'Name': 'Pokémon Gold and Silver',
  'Platform': 'Game Boy Color',
  'YearOfRelease': 1999,
  'Genre': 'Role-playing',
  'Description': 'Second-generation Pokémon games introducing new regions, Pokémon, and gameplay mechanics.'},
 {'YearOfRelease': 2001,
  'Name': 'Super Smash Bros. Melee',
  'Description': 'A crossover fighting game featuring characters from various Nintendo franchises battling it out in dynamic arenas.',
  'Platform': 'GameCube',
  'Genre': 'Fighting',
  'Publisher': 'Nintendo'}]

In [18]:
def get_game_details(user_input: str, no_results: int) -> str:
    """
    Replace this with your real logic.
    This is just a placeholder.
    """
    # Example: echo + length
    response = udaplay_store.query(
        query_texts=user_input,
        n_results = no_results
    )
    return response['metadatas'][0]

user_input = input("Enter your text: ")
n_results = 2
print(">>>>>>>>>>>> INPUTS >>>>>>>>>>>>\n")
print(f"input String            : {user_input}")
print(f"No. of Results Requsted : {n_results}")
print(">>>>>>>>>>>> INPUTS >>>>>>>>>>>>\n\n")

results = get_game_details(user_input, n_results)

print("============ RESULTS ===========\n")
print(results)
print("============ RESULTS ===========")


>>>>>>>>>>>> INPUTS >>>>>>>>>>>>

input String            : Video game strategy
No. of Results Requsted : 2
>>>>>>>>>>>> INPUTS >>>>>>>>>>>>



[{'Genre': 'Role-playing', 'Name': 'Pokémon Gold and Silver', 'Platform': 'Game Boy Color', 'YearOfRelease': 1999, 'Description': 'Second-generation Pokémon games introducing new regions, Pokémon, and gameplay mechanics.', 'Publisher': 'Nintendo'}, {'Description': 'A crossover fighting game featuring characters from various Nintendo franchises battling it out in dynamic arenas.', 'Platform': 'GameCube', 'YearOfRelease': 2001, 'Publisher': 'Nintendo', 'Genre': 'Fighting', 'Name': 'Super Smash Bros. Melee'}]
