In [1]:
import requests
import json

# Download the data
resp = requests.get('https://raw.githubusercontent.com/weaviate-tutorials/quickstart/main/data/jeopardy_tiny.json')
data = json.loads(resp.text)  # Load data

# Parse the JSON and preview it
print(type(data), len(data))

def json_print(data):
    print(json.dumps(data, indent=2))

json_print(data[0])

<class 'list'> 10
{
  "Category": "SCIENCE",
  "Question": "This organ removes excess glucose from the blood & stores it as glycogen",
  "Answer": "Liver"
}


In [35]:
import weaviate

# Connect to Weaviate Cloud
client = weaviate.connect_to_local()

# Work with Weaviate

print(client.is_ready())

True


In [41]:
import weaviate.classes.config as wc

if client.collections.exists("Question"):
    client.collections.delete("Question")

client.collections.create(
    name="Question",
    properties=[
        wc.Property(name="category", data_type=wc.DataType.TEXT),
        wc.Property(name="question", data_type=wc.DataType.TEXT),
        wc.Property(name="answer", data_type=wc.DataType.TEXT),
    ],
    # Define the vectorizer module
    vectorizer_config=wc.Configure.Vectorizer.text2vec_ollama(api_endpoint='http://ollama:11434', model='nomic-embed-text'),
    # Define the generative module
    generative_config=wc.Configure.Generative.ollama(api_endpoint='http://ollama:11434', model='llama3.2')
)

client.close()

In [45]:
client.connect()
collection = client.collections.get("Question")

with collection.batch.dynamic() as batch:
    for data_obj in data:
        weaviate_obj = {
            "category": data_obj["Category"],
            "question": data_obj["Question"],
            "answer": data_obj["Answer"],
        }

        # The model provider integration will automatically vectorize the object
        batch.add_object(
            properties=weaviate_obj,
            # vector=vector  # Optionally provide a pre-obtained vector
        )

print(collection.batch.failed_objects)

[]
