In [None]:
%pip install weaviate-client

In [None]:
import weaviate
import uuid
from weaviate.classes.init import AdditionalConfig, Timeout
from weaviate.classes.config import DataType, VectorDistances, Configure, Property
from weaviate.classes.data import DataObject
from weaviate.classes.query import Filter, MetadataQuery
import torch
from urllib.parse import urlparse, parse_qs

In [None]:
url = 'weaviate://localhost:8088?grpc=50051#test'

In [None]:
o = urlparse(url, allow_fragments=True)

In [None]:
print(
    o.scheme,
    o.hostname,
    o.port,
    o.path,
    o.fragment,
    o.query
)
print(o)
qargs = parse_qs(o.query)
print(qargs)

In [None]:
# with weaviate.connect_to_...() as client:

client = weaviate.connect_to_custom(
    http_host=o.hostname,
    http_port=o.port,
    grpc_host=o.hostname,
    grpc_port=qargs['grpc'][0],
    http_secure=False,
    grpc_secure=False,
    additional_config=AdditionalConfig(
        timeout=Timeout(init=2, query=45, insert=120)  # Values in seconds
    )
)
client.is_ready()


In [None]:

if client.collections.exists(o.fragment):
    print(f"Collection '{o.fragment}' exists.")
    collection = client.collections.get(o.fragment)
    # client.collections.delete(o.fragment)
else:
    print(f"Creating collection '{o.fragment}'.")
    collection = client.collections.create(
        name=o.fragment,
        properties=[
            Property(
                name='key',
                description='Unique ID of vector entry.',
                data_type=DataType.INT,
            ),
        ],
        vectorizer_config=Configure.Vectorizer.none(),
        vector_index_config=Configure.VectorIndex.hnsw(
            distance_metric=VectorDistances.DOT
        )
    )


In [None]:
a = torch.rand(int(1e4), 5)
print(a.size())

In [None]:
## add data

items = [ DataObject(uuid=uuid.UUID(int=i), properties={'key': i}, vector=a[i].tolist()) for i in range(int(1e4)) ]

collection.data.insert_many(items)


In [None]:
# retrieve vector with ID

query_obj_uuid=uuid.UUID(int=3)

data_object = collection.query.fetch_object_by_id(
    uuid=query_obj_uuid, 
    include_vector=True, 
    return_properties=['key'], 
    return_references=[]
)
if data_object is None:
    print('Object does not exist')
else:
    print(data_object.properties)
    print(data_object.metadata)
    print(data_object.references)
    print(data_object.vector)

print("=====")

# above query is similar to below, except that metadata can be omitted here
response = collection.query.fetch_objects(
    limit=1,
    filters=Filter.by_id().equal(query_obj_uuid),
    include_vector=True,
    return_metadata=[], # ["creation_time", "last_update_time", "distance", "certainty", "score", "explain_score", "is_consistent"],
    return_properties=['key'],
    return_references=[],
)

if len(response.objects) < 1:
    print('Object does not exist')
else:
    data_object = response.objects[0]
    print(data_object.properties)
    print(data_object.metadata)
    print(data_object.references)
    print(data_object.vector)

In [None]:
# fetch multiple objects

keys_to_fetch = [1,2,3,43,34,129846129846]
uuids_to_fetch = list(map(lambda key: uuid.UUID(int=key), keys_to_fetch))
uuid_filters = list(map(lambda uuid: Filter.by_id().equal(uuid), uuids_to_fetch))

In [None]:
response = collection.query.fetch_objects(
    limit=len(keys_to_fetch),
    filters=Filter.by_id().contains_any(uuids_to_fetch),
    include_vector=True,
    return_metadata=[], # ["creation_time", "last_update_time", "distance", "certainty", "score", "explain_score", "is_consistent"],
    return_properties=['key'],
    return_references=[],
)

print(len(response.objects))
print(response)

In [None]:
response = collection.query.fetch_objects(
    limit=len(keys_to_fetch),
    filters=Filter.by_property(name='key').contains_any(keys_to_fetch),
    include_vector=True,
    return_metadata=[], # ["creation_time", "last_update_time", "distance", "certainty", "score", "explain_score", "is_consistent"],
    return_properties=['key'],
    return_references=[],
)

print(len(response.objects))
print(response)




In [None]:
response = collection.query.fetch_objects(
    limit=len(keys_to_fetch),
    filters=Filter.any_of(uuid_filters),
    include_vector=True,
    return_metadata=[], # ["creation_time", "last_update_time", "distance", "certainty", "score", "explain_score", "is_consistent"],
    return_properties=['key'],
    return_references=[],
)

print(len(response.objects))
print(response)

In [None]:

client.close()