In [1]:
import weaviate


In [None]:
client = weaviate.connect_to_local()
meta_info = client.get_meta()
print(meta_info)


In [None]:
data_rows = [
    {"title": f"Object {i+1}"} for i in range(5)
]

collection = client.collections.get("MyCollection")

with collection.batch.dynamic() as batch:
    for data_row in data_rows:
        batch.add_object(
            properties=data_row,
        )

In [2]:
import requests, json

response = requests.get(
    "https://raw.githubusercontent.com/weaviate-tutorials/intro-workshop/main/data/jeopardy_1k.json"
)

data = response.json()
with open('jeopardy_1k.json', 'w') as f:
    json.dump(data, f)

In [None]:
import ijson

# Settings for displaying the import progress
counter = 0
interval = 100  # print progress every this many records; should be bigger than the batch_size

print("JSON streaming, to avoid running out of memory on large files...")
with client.batch.fixed_size(batch_size=200) as batch:
    with open("jeopardy_1k.json", "rb") as f:
        objects = ijson.items(f, "item")
        for obj in objects:
            properties = {
                "question": obj["Question"],
                "answer": obj["Answer"],
            }
            batch.add_object(
                collection="JeopardyQuestion",
                properties=properties,
                # If you Bring Your Own Vectors, add the `vector` parameter here
                # vector=obj.vector["default"]
            )

            # Calculate and display progress
            counter += 1
            if counter % interval == 0:
                print(f"Imported {counter} articles...")


print(f"Finished importing {counter} articles.")

In [None]:
import pandas as pd

# Settings for displaying the import progress
counter = 0
interval = 100  # print progress every this many records; should be bigger than the batch_size

def add_object(obj) -> None:
    global counter
    properties = {
        "question": obj["Question"],
        "answer": obj["Answer"],
    }

    with client.batch.fixed_size(batch_size=200) as batch:
        batch.add_object(
            collection="JeopardyQuestion",
            properties=properties,
            # If you Bring Your Own Vectors, add the `vector` parameter here
            # vector=obj.vector["default"]
        )

        # Calculate and display progress
        counter += 1
        if counter % interval == 0:
            print(f"Imported {counter} articles...")


print("pandas dataframe iterator with lazy-loading, to not load all records in RAM at once...")
with client.batch.fixed_size(batch_size=200) as batch:
    with pd.read_csv(
        "jeopardy_1k.csv",
        usecols=["Question", "Answer", "Category"],
        chunksize=100,  # number of rows per chunk
    ) as csv_iterator:
        # Iterate through the dataframe chunks and add each CSV record to the batch
        for chunk in csv_iterator:
            for index, row in chunk.iterrows():
                properties = {
                    "question": row["Question"],
                    "answer": row["Answer"],
                }
                batch.add_object(
                    collection="JeopardyQuestion",
                    properties=properties,
                    # If you Bring Your Own Vectors, add the `vector` parameter here
                    # vector=obj.vector["default"]
                )

        # Calculate and display progress
        counter += 1
        if counter % interval == 0:
            print(f"Imported {counter} articles...")

print(f"Finished importing {counter} articles.")