### Load the data

In [2]:
%config IPCompleter.greedy=True

In [3]:
import requests
import json

# Download the data
resp = requests.get('https://raw.githubusercontent.com/weaviate-tutorials/quickstart/main/data/jeopardy_tiny.json')
data = json.loads(resp.text)  # Load data

# Parse the JSON and preview it
print(type(data), len(data))
print(json.dumps(data[0], indent=2))

<class 'list'> 10
{
  "Category": "SCIENCE",
  "Question": "This organ removes excess glucose from the blood & stores it as glycogen",
  "Answer": "Liver"
}


In [4]:
def json_print(data):
    print(json.dumps(data, indent=2))

In [8]:
#Print out the rest of the data

json_print(data)

[
  {
    "Category": "SCIENCE",
    "Question": "This organ removes excess glucose from the blood & stores it as glycogen",
    "Answer": "Liver"
  },
  {
    "Category": "ANIMALS",
    "Question": "It's the only living mammal in the order Proboseidea",
    "Answer": "Elephant"
  },
  {
    "Category": "ANIMALS",
    "Question": "The gavial looks very much like a crocodile except for this bodily feature",
    "Answer": "the nose or snout"
  },
  {
    "Category": "ANIMALS",
    "Question": "Weighing around a ton, the eland is the largest species of this animal in Africa",
    "Answer": "Antelope"
  },
  {
    "Category": "ANIMALS",
    "Question": "Heaviest of all poisonous snakes is this North American rattlesnake",
    "Answer": "the diamondback rattler"
  },
  {
    "Category": "SCIENCE",
    "Question": "2000 news: the Gunnison sage grouse isn't just another northern sage grouse, but a new one of this classification",
    "Answer": "species"
  },
  {
    "Category": "SCIENCE",
   

### Now we're going to initialize Weaviate - our vector DB

In [42]:
import weaviate
from weaviate.classes.config import Property, DataType
from weaviate.embedded import EmbeddedOptions
import weaviate.classes.config as wvcc
import os

In [43]:
#Start up an instance of Weaviate

client = weaviate.connect_to_embedded(#embedded_options=EmbeddedOptions(),
                        headers={
                            "X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"]
                        })


WeaviateStartUpError: Embedded DB did not start because processes are already listening on ports http:8079 and grpc:50050use weaviate.connect_to_local(port=8079, grpc_port=50050) to connect to the existing instance

In [44]:
#Check that weaviate is up and running

json_print(client.get_meta())

{
  "hostname": "http://127.0.0.1:8079",
  "modules": {
    "generative-openai": {
      "documentationHref": "https://platform.openai.com/docs/api-reference/completions",
      "name": "Generative Search - OpenAI"
    },
    "qna-openai": {
      "documentationHref": "https://platform.openai.com/docs/api-reference/completions",
      "name": "OpenAI Question & Answering Module"
    },
    "ref2vec-centroid": {},
    "reranker-cohere": {
      "documentationHref": "https://txt.cohere.com/rerank/",
      "name": "Reranker - Cohere"
    },
    "text2vec-cohere": {
      "documentationHref": "https://docs.cohere.ai/embedding-wiki/",
      "name": "Cohere Module"
    },
    "text2vec-huggingface": {
      "documentationHref": "https://huggingface.co/docs/api-inference/detailed_parameters#feature-extraction-task",
      "name": "Hugging Face Module"
    },
    "text2vec-openai": {
      "documentationHref": "https://platform.openai.com/docs/guides/embeddings/what-are-embeddings",
      "nam

In [45]:
#Delete the schema if it alredy exists
if client.collections.exists("Question"):
    print("deleting Questions")
    client.collections.delete("Question")
else:
    print("no comprendo")


deleting Questions


In [46]:
# Define the class schema

class_schema = {
    "class": "Question",
    "properties": {
        # Define properties here if needed
    },
    "vectorizer": "text2vec-openai"  # Specify the vectorizer
}

try:
    # Create the class schema in Weaviate
    collection = client.collections.create(
        name="Question",
        vectorizer_config=wvcc.Configure.Vectorizer.text2vec_openai(),
        properties=[
            Property(name="answer", data_type=DataType.TEXT),
            Property(name="question", data_type=DataType.TEXT),
            Property(name="category", data_type=DataType.TEXT),
        ]
    )

    print("Class schema has been successfully created.")
except Exception as e:
    print(f"Failed to create class schema: {e}")

Class schema has been successfully created.


{"level":"info","msg":"Created shard question_zii33D9fEfYp in 2.905708ms","time":"2024-03-21T16:23:27-05:00"}
{"action":"hnsw_vector_cache_prefill","count":1000,"index_id":"main","level":"info","limit":1000000000000,"msg":"prefilled vector cache","time":"2024-03-21T16:23:27-05:00","took":46458}


In [47]:
with collection.batch.dynamic() as batch:
    for i, d in enumerate(data):  # Batch import data
        
        print(f"importing question: {i+1}")
            
        #Specify the properties we want to import into Weviate
        
        data_obj = {
            "answer": d["Answer"],
            "question": d["Question"],
            "category": d["Category"],
        }
        
        #Add data to Weaviate
        
        batch.add_object(properties=data_obj)

importing question: 1
importing question: 2
importing question: 3
importing question: 4
importing question: 5
importing question: 6
importing question: 7
importing question: 8
importing question: 9
importing question: 10


In [52]:
#Check how many objects we've loaded into the database
response = collection.aggregate.over_all(
    total_count=True,
)

print(response.total_count)
print(response.properties)



10
{}


In [53]:
#Extract and show any 3 questions and answers

response = collection.query.fetch_objects(
        return_properties=["question", "answer"],
        limit=3
    )

for o in response.objects:
    print(o.properties)  # Inspect returned objects

{'question': 'Heaviest of all poisonous snakes is this North American rattlesnake', 'answer': 'the diamondback rattler'}
{'question': 'Changes in the tropospheric layer of this are what gives us weather', 'answer': 'the atmosphere'}
{'answer': 'Sound barrier', 'question': 'In 70-degree air, a plane traveling at about 1,130 feet per second breaks it'}
