	
1- Load up the 1K jeopardy dataset that has 1000 objects in total, keep at least the question, answer and round properties.  
2- How do you check for the number of objects stored in the database?  
3- Search for objects that are close to the concept of “spicy food recipes” and show 4 QnA  
4- Can you find “spicy food recipes” related questions that were used in Double Jeopardy rounds?

## 1 - load the dataset
#### we ll use weaviate (the open source vector database)

In [2]:
import requests
import json

# download the data
resp = requests.get('https://raw.githubusercontent.com/weaviate-tutorials/intro-workshop/main/data/jeopardy_1k.json')
data = json.loads(resp.text)

# Parse the JSON and preview it
print(type(data), len(data))
print(json.dumps(data[1], indent=2))

<class 'list'> 1000
{
  "Air Date": "2005-11-18",
  "Round": "Jeopardy!",
  "Value": 200,
  "Category": "RHYME TIME",
  "Question": "Any pigment on the wall so faded you can barely see it",
  "Answer": "faint paint"
}


#### insert into weaviate

In [None]:

import weaviate
from weaviate.classes.config import Configure, Property, DataType

# Step 1.1: Connect to your local Weaviate instance

collection_name = 'Jeopardy'

with weaviate.connect_to_local() as client:
    
    # Delete the schema if it alredy exists
    if client.collections.exists(collection_name):
        client.collections.delete(collection_name)
        
    # Step 1.2: Create a collection
    collec = client.collections.create(
        name=collection_name,
        vector_config=Configure.Vectors.text2vec_ollama(  # Configure the Ollama embedding integration
            api_endpoint="http://ollama:11434",  # If using Docker you might need: http://host.docker.internal:11434
            model="nomic-embed-text",  # The model to use
        ),
        properties=[
            Property(name="Round", data_type=DataType.TEXT),
            Property(name="Question", data_type=DataType.TEXT),
            Property(name="Answer", data_type=DataType.TEXT)
        ]
    )

    # Step 1.3: Import three objects
    keys_to_keep = ['Round', 'Question', 'Answer']
    data_objects = [
        {k: item[k] for k in keys_to_keep if k in item}
        for item in data
    ]


    collec = client.collections.use(collection_name)
    with collec.batch.fixed_size(batch_size=200) as batch:
        for obj in data_objects:
            batch.add_object(properties=obj)

    print(f"Imported & vectorized {len(collec)} objects into the {collection_name}  collection")


## 2 - Nb of objects inserted

In [21]:
client.connect()
collec = client.collections.use(collection_name)
response = collec.aggregate.over_all(total_count=True)

print(response.total_count)

AggregateReturn(properties={}, total_count=1000)
1000


In [None]:
# Exploration

response = collec.query.fetch_objects(
    limit=3,
    return_properties=["Round", "Answer"]
)

for obj in result.objects:
    print(obj.properties)

## 3- Search for objects that are close to the concept of “spicy food recipes” and show 4 QnA
#### Semantic (vector) search

In [24]:
client.connect()
response = collec.query.near_text(
    query="spicy food recipes",
    limit=4
)

for obj in response.objects:
    print(json.dumps(obj.properties, indent=2))

{
  "answer": "Chiles Rellenos",
  "question": "The name of this Mexican dish made with chiles & cheese translates to \"stuffed peppers\"",
  "round": "Jeopardy!"
}
{
  "answer": "tripe",
  "question": "Popular in Pennsylvania, pepper pot is a peppery soup made from this stomach lining",
  "round": "Jeopardy!"
}
{
  "answer": "pasta",
  "question": "Type of food that comes in shapes of bow ties, elbows & wagon wheels",
  "round": "Jeopardy!"
}
{
  "answer": "coatimundi",
  "question": "Fruit & lizards (yum!) are favorite foods of the coati, also known by this longer name",
  "round": "Jeopardy!"
}


In [None]:
client.close()