1. Load up the 1K jeopardy dataset that has 1000 objects in total, keep at least the question, answer and round properties.
2. How do you check for the number of objects stored in the database?
3. Search for objects that are close to the concept of “spicy food recipes” and show 4 QnA
4. Can you find “spicy food recipes” related questions that were used in Double Jeopardy rounds?


### Q1: Load up the dataset,  keep at least the question, answer and round properties.

In [12]:
import requests
import json

# Download the data
resp = requests.get('https://raw.githubusercontent.com/weaviate-tutorials/intro-workshop/main/data/jeopardy_1k.json')
data = json.loads(resp.text)  # Load data

# Parse the JSON and preview it
print(type(data), len(data))
for i in range(50):
    print(json.dumps(data[i], indent=2))

<class 'list'> 1000
{
  "Air Date": "2006-11-08",
  "Round": "Double Jeopardy!",
  "Value": 800,
  "Category": "AMERICAN HISTORY",
  "Question": "Abraham Lincoln died across the street from this theatre on April 15, 1865",
  "Answer": "Ford's Theatre (the Ford Theatre accepted)"
}
{
  "Air Date": "2005-11-18",
  "Round": "Jeopardy!",
  "Value": 200,
  "Category": "RHYME TIME",
  "Question": "Any pigment on the wall so faded you can barely see it",
  "Answer": "faint paint"
}
{
  "Air Date": "1987-06-23",
  "Round": "Double Jeopardy!",
  "Value": 600,
  "Category": "AMERICAN HISTORY",
  "Question": "After the original 13, this was the 1st state admitted to the union",
  "Answer": "Vermont"
}
{
  "Air Date": "2011-01-13",
  "Round": "Jeopardy!",
  "Value": 400,
  "Category": "TRANSPORTATION",
  "Question": "In 1922 Warren Harding said that this \"gauges the speed of our present-day life. It long ago ran down simple living\"",
  "Answer": "the automobile"
}
{
  "Air Date": "2001-12-03",
 

In [3]:
import weaviate
from weaviate.classes.config import Property, DataType
from weaviate.embedded import EmbeddedOptions
import weaviate.classes.config as wvcc
import weaviate.classes as wvc
import os

In [4]:

client = weaviate.connect_to_embedded(#embedded_options=EmbeddedOptions(),
                        headers={
                            "X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"]
                        })

Started /Users/sharif.shaker/.cache/weaviate-embedded: process ID 17131


{"action":"startup","default_vectorizer_module":"none","level":"info","msg":"the default vectorizer modules is set to \"none\", as a result all new schema classes without an explicit vectorizer setting, will use this vectorizer","time":"2024-03-22T17:37:55-05:00"}
{"action":"startup","auto_schema_enabled":true,"level":"info","msg":"auto schema enabled setting is set to \"true\"","time":"2024-03-22T17:37:55-05:00"}
{"level":"info","msg":"No resource limits set, weaviate will use all available memory and CPU. To limit resources, set LIMIT_RESOURCES=true","time":"2024-03-22T17:37:55-05:00"}
{"action":"grpc_startup","level":"info","msg":"grpc server listening at [::]:50050","time":"2024-03-22T17:37:55-05:00"}
{"action":"restapi_management","level":"info","msg":"Serving weaviate at http://127.0.0.1:8079","time":"2024-03-22T17:37:55-05:00"}
{"level":"info","msg":"Completed loading shard question_RsEY66UJx6d7 in 14.222333ms","time":"2024-03-22T17:37:56-05:00"}
{"action":"hnsw_vector_cache_pre

In [5]:
#Delete the schema if it alredy exists
if client.collections.exists("Question"):
    print("deleting Questions")
    client.collections.delete("Question")
else:
    print("no comprendo")

deleting Questions


In [6]:
#Define the class that will be used to add the data
# We need to have properties for the questions, answer and round

try:
    # Create the class schema in Weaviate
    collection = client.collections.create(
        name="Question",
        vectorizer_config=wvcc.Configure.Vectorizer.text2vec_openai(),
        properties=[
            Property(name="answer", data_type=DataType.TEXT),
            Property(name="question", data_type=DataType.TEXT),
            Property(name="round", data_type=DataType.TEXT),
        ]
    )

    print("Class schema has been successfully created.")
except Exception as e:
    print(f"Failed to create class schema: {e}")

Class schema has been successfully created.


{"level":"info","msg":"Created shard question_EAKEiad2WRLo in 1.720792ms","time":"2024-03-22T17:38:07-05:00"}
{"action":"hnsw_vector_cache_prefill","count":1000,"index_id":"main","level":"info","limit":1000000000000,"msg":"prefilled vector cache","time":"2024-03-22T17:38:07-05:00","took":36375}


In [7]:
#Insert the data into Weaviate


with collection.batch.dynamic() as batch:
    for i, d in enumerate(data):  # Batch import data
            
        #Specify the properties we want to import into Weviate
        
        data_obj = {
            "answer": d["Answer"],
            "question": d["Question"],
            "round": d["Round"],
        }
        
        #Add data to Weaviate
        
        batch.add_object(properties=data_obj)

### Q2. How do you check for the number of objects stored in the database?

In [9]:
response = collection.aggregate.over_all(
        total_count=True,
)

print(response.total_count)

1000


### 3. Search for objects that are close to the concept of "spicy food recipes" and show 4 QnA

In [11]:
response = collection.query.near_text(
    query=["spicy food recipes"],
    return_properties=["question", "answer", "round"],
    limit=4,
)

for o in response.objects:
    print(o.properties)
    print(o.metadata)


{'question': 'Popular in Pennsylvania, pepper pot is a peppery soup made from this stomach lining', 'answer': 'tripe', 'round': 'Jeopardy!'}
MetadataReturn(creation_time=None, last_update_time=None, distance=None, certainty=None, score=None, explain_score=None, is_consistent=None, rerank_score=None)
{'answer': 'Chiles Rellenos', 'question': 'The name of this Mexican dish made with chiles & cheese translates to "stuffed peppers"', 'round': 'Jeopardy!'}
MetadataReturn(creation_time=None, last_update_time=None, distance=None, certainty=None, score=None, explain_score=None, is_consistent=None, rerank_score=None)
{'question': 'Dishes flavored or garnished with these rare costly fungi are referred to as "a la Perigourdine"', 'answer': 'Truffles', 'round': 'Jeopardy!'}
MetadataReturn(creation_time=None, last_update_time=None, distance=None, certainty=None, score=None, explain_score=None, is_consistent=None, rerank_score=None)
{'round': 'Jeopardy!', 'answer': 'octopi', 'question': '"Joy of Coo

### 4. Can you find "spicy food recipes" related questions that were used in Double Jeopardy rounds?

In [17]:
response = collection.query.near_text(
    query=["spicy food recipes"],
    return_properties=["question", "answer", "round"],
    filters=wvc.query.Filter.by_property("round").equal("Double Jeopardy!"),
    return_metadata=wvc.query.MetadataQuery(distance=True),
    limit=10,
)

for o in response.objects:
    print(o.properties)
    print(o.metadata.distance)

{'answer': '"Jambalaya"', 'round': 'Double Jeopardy!', 'question': 'This Creole concoction of meat & seafood is so good Hank Williams wrote a song about it in 1952'}
0.22759437561035156
{'answer': 'litmus', 'round': 'Double Jeopardy!', 'question': 'Vegetable dye that turns red in acid solutions & blue in alkaline solutions'}
0.24203848838806152
{'answer': 'Herring', 'round': 'Double Jeopardy!', 'question': 'The Bismarck type of this fish is made of fillets cured in vinegar, salt & onions'}
0.24218040704727173
{'question': 'Maple or otherwise, this thick, sweet liquid is from the Arabic for "to drink"', 'answer': 'syrup', 'round': 'Double Jeopardy!'}
0.25029999017715454
{'question': 'The Phoenicians used a liquid from several species of this gastropod to make Tyrian purple dye', 'round': 'Double Jeopardy!', 'answer': 'Snail'}
0.2528190016746521
{'round': 'Double Jeopardy!', 'question': 'Lactobacillus bulgaricus is added to milk to make this thick semi-solid dairy product', 'answer': 'yo

In [18]:
client.close()

{"action":"restapi_management","level":"info","msg":"Shutting down... ","time":"2024-03-22T17:51:42-05:00"}
{"action":"restapi_management","level":"info","msg":"Stopped serving weaviate at http://127.0.0.1:8079","time":"2024-03-22T17:51:42-05:00"}
