In [1]:
%pip install weaviate-client

Note: you may need to restart the kernel to use updated packages.


In [21]:
import weaviate
import uuid
from weaviate.classes.init import AdditionalConfig, Timeout
from weaviate.classes.config import DataType, VectorDistances, Configure, Property
from weaviate.classes.data import DataObject
from weaviate.classes.query import Filter, MetadataQuery
import torch
from urllib.parse import urlparse, parse_qs

In [65]:
url = 'weaviate://localhost:8088?grpc=50051#test'

In [64]:
o = urlparse(url, allow_fragments=True)

In [5]:
print(
    o.scheme,
    o.hostname,
    o.port,
    o.path,
    o.fragment,
    o.query
)
print(o)
qargs = parse_qs(o.query)
print(qargs)

weaviate localhost 8088  test grpc=50051
ParseResult(scheme='weaviate', netloc='localhost:8088', path='', params='', query='grpc=50051', fragment='test')
{'grpc': ['50051']}


In [6]:
# with weaviate.connect_to_...() as client:

client = weaviate.connect_to_custom(
    http_host=o.hostname,
    http_port=o.port,
    grpc_host=o.hostname,
    grpc_port=qargs['grpc'][0],
    http_secure=False,
    grpc_secure=False,
    additional_config=AdditionalConfig(
        timeout=Timeout(init=2, query=45, insert=120)  # Values in seconds
    )
)
client.is_ready()


True

In [16]:

if client.collections.exists(o.fragment):
    print(f"Collection '{o.fragment}' exists.")
    collection = client.collections.get(o.fragment)
    # client.collections.delete(o.fragment)
else:
    print(f"Creating collection '{o.fragment}'.")
    collection = client.collections.create(
        name=o.fragment,
        properties=[
            Property(
                name='key',
                description='Unique ID of vector entry.',
                data_type=DataType.INT,
            ),
        ],
        vectorizer_config=Configure.Vectorizer.none(),
        vector_index_config=Configure.VectorIndex.hnsw(
            distance_metric=VectorDistances.DOT
        )
    )


Collection 'test' exists.


In [18]:
# with open('../data/jeopardy_tiny_with_vectors_all-OpenAI-ada-002.json') as fh:
#     data = json.load(fh)  # Load data
# # data
# len(data)

In [19]:
a = torch.rand(int(1e4), 5)
print(a.size())

torch.Size([10000, 5])


In [22]:
## add data

items = [ DataObject(uuid=uuid.UUID(int=i), properties={'key': i}, vector=a[i].tolist()) for i in range(int(1e4)) ]

collection.data.insert_many(items)


BatchObjectReturn(all_responses=[UUID('00000000-0000-0000-0000-000000000000'), UUID('00000000-0000-0000-0000-000000000001'), UUID('00000000-0000-0000-0000-000000000002'), UUID('00000000-0000-0000-0000-000000000003'), UUID('00000000-0000-0000-0000-000000000004'), UUID('00000000-0000-0000-0000-000000000005'), UUID('00000000-0000-0000-0000-000000000006'), UUID('00000000-0000-0000-0000-000000000007'), UUID('00000000-0000-0000-0000-000000000008'), UUID('00000000-0000-0000-0000-000000000009'), UUID('00000000-0000-0000-0000-00000000000a'), UUID('00000000-0000-0000-0000-00000000000b'), UUID('00000000-0000-0000-0000-00000000000c'), UUID('00000000-0000-0000-0000-00000000000d'), UUID('00000000-0000-0000-0000-00000000000e'), UUID('00000000-0000-0000-0000-00000000000f'), UUID('00000000-0000-0000-0000-000000000010'), UUID('00000000-0000-0000-0000-000000000011'), UUID('00000000-0000-0000-0000-000000000012'), UUID('00000000-0000-0000-0000-000000000013'), UUID('00000000-0000-0000-0000-000000000014'), U

In [42]:
# retrieve vector with ID

query_obj_uuid=uuid.UUID(int=3)

data_object = collection.query.fetch_object_by_id(
    uuid=query_obj_uuid, 
    include_vector=True, 
    return_properties=['key'], 
    return_references=[]
)
if data_object is None:
    print('Object does not exist')
else:
    print(data_object.properties)
    print(data_object.metadata)
    print(data_object.references)
    print(data_object.vector)

print("=====")

# above query is similar to below, except that metadata can be omitted here
response = collection.query.fetch_objects(
    limit=1,
    filters=Filter.by_id().equal(query_obj_uuid),
    include_vector=True,
    return_metadata=[], # ["creation_time", "last_update_time", "distance", "certainty", "score", "explain_score", "is_consistent"],
    return_properties=['key'],
    return_references=[],
)

if len(response.objects) < 1:
    print('Object does not exist')
else:
    data_object = response.objects[0]
    print(data_object.properties)
    print(data_object.metadata)
    print(data_object.references)
    print(data_object.vector)

{'key': 3}
MetadataSingleObjectReturn(creation_time=datetime.datetime(2024, 5, 18, 17, 56, 41, 690000, tzinfo=datetime.timezone.utc), last_update_time=datetime.datetime(2024, 5, 18, 17, 56, 41, 690000, tzinfo=datetime.timezone.utc), is_consistent=None)
None
{'default': [0.1833905577659607, 0.33314329385757446, 0.5373668074607849, 0.9268814921379089, 0.6297898888587952]}
=====
{'key': 3}
MetadataReturn(creation_time=None, last_update_time=None, distance=None, certainty=None, score=None, explain_score=None, is_consistent=None, rerank_score=None)
None
{'default': [0.1833905577659607, 0.33314329385757446, 0.5373668074607849, 0.9268814921379089, 0.6297898888587952]}


In [57]:
# fetch multiple objects

keys_to_fetch = [1,2,3,43,34,129846129846]
uuids_to_fetch = list(map(lambda key: uuid.UUID(int=key), keys_to_fetch))
uuid_filters = list(map(lambda uuid: Filter.by_id().equal(uuid), uuids_to_fetch))

In [58]:
response = collection.query.fetch_objects(
    limit=len(keys_to_fetch),
    filters=Filter.by_id().contains_any(uuids_to_fetch),
    include_vector=True,
    return_metadata=[], # ["creation_time", "last_update_time", "distance", "certainty", "score", "explain_score", "is_consistent"],
    return_properties=['key'],
    return_references=[],
)

print(len(response.objects))
print(response)

5
QueryReturn(objects=[Object(uuid=_WeaviateUUIDInt('00000000-0000-0000-0000-000000000001'), metadata=MetadataReturn(creation_time=None, last_update_time=None, distance=None, certainty=None, score=None, explain_score=None, is_consistent=None, rerank_score=None), properties={'key': 1}, references=None, vector={'default': [0.9235654473304749, 0.7773518562316895, 0.2986210584640503, 0.9185530543327332, 0.5190556049346924]}, collection='Test'), Object(uuid=_WeaviateUUIDInt('00000000-0000-0000-0000-000000000002'), metadata=MetadataReturn(creation_time=None, last_update_time=None, distance=None, certainty=None, score=None, explain_score=None, is_consistent=None, rerank_score=None), properties={'key': 2}, references=None, vector={'default': [0.9785481691360474, 0.03223872184753418, 0.5379719138145447, 0.7583557963371277, 0.6822077035903931]}, collection='Test'), Object(uuid=_WeaviateUUIDInt('00000000-0000-0000-0000-000000000003'), metadata=MetadataReturn(creation_time=None, last_update_time=N

In [60]:
response = collection.query.fetch_objects(
    limit=len(keys_to_fetch),
    filters=Filter.by_property(name='key').contains_any(keys_to_fetch),
    include_vector=True,
    return_metadata=[], # ["creation_time", "last_update_time", "distance", "certainty", "score", "explain_score", "is_consistent"],
    return_properties=['key'],
    return_references=[],
)

print(len(response.objects))
print(response)




5
QueryReturn(objects=[Object(uuid=_WeaviateUUIDInt('00000000-0000-0000-0000-000000000001'), metadata=MetadataReturn(creation_time=None, last_update_time=None, distance=None, certainty=None, score=None, explain_score=None, is_consistent=None, rerank_score=None), properties={'key': 1}, references=None, vector={'default': [0.9235654473304749, 0.7773518562316895, 0.2986210584640503, 0.9185530543327332, 0.5190556049346924]}, collection='Test'), Object(uuid=_WeaviateUUIDInt('00000000-0000-0000-0000-000000000002'), metadata=MetadataReturn(creation_time=None, last_update_time=None, distance=None, certainty=None, score=None, explain_score=None, is_consistent=None, rerank_score=None), properties={'key': 2}, references=None, vector={'default': [0.9785481691360474, 0.03223872184753418, 0.5379719138145447, 0.7583557963371277, 0.6822077035903931]}, collection='Test'), Object(uuid=_WeaviateUUIDInt('00000000-0000-0000-0000-000000000003'), metadata=MetadataReturn(creation_time=None, last_update_time=N

In [61]:
response = collection.query.fetch_objects(
    limit=len(keys_to_fetch),
    filters=Filter.any_of(uuid_filters),
    include_vector=True,
    return_metadata=[], # ["creation_time", "last_update_time", "distance", "certainty", "score", "explain_score", "is_consistent"],
    return_properties=['key'],
    return_references=[],
)

print(len(response.objects))
print(response)

5
QueryReturn(objects=[Object(uuid=_WeaviateUUIDInt('00000000-0000-0000-0000-000000000001'), metadata=MetadataReturn(creation_time=None, last_update_time=None, distance=None, certainty=None, score=None, explain_score=None, is_consistent=None, rerank_score=None), properties={'key': 1}, references=None, vector={'default': [0.9235654473304749, 0.7773518562316895, 0.2986210584640503, 0.9185530543327332, 0.5190556049346924]}, collection='Test'), Object(uuid=_WeaviateUUIDInt('00000000-0000-0000-0000-000000000002'), metadata=MetadataReturn(creation_time=None, last_update_time=None, distance=None, certainty=None, score=None, explain_score=None, is_consistent=None, rerank_score=None), properties={'key': 2}, references=None, vector={'default': [0.9785481691360474, 0.03223872184753418, 0.5379719138145447, 0.7583557963371277, 0.6822077035903931]}, collection='Test'), Object(uuid=_WeaviateUUIDInt('00000000-0000-0000-0000-000000000003'), metadata=MetadataReturn(creation_time=None, last_update_time=N

In [None]:

client.close()