In [None]:
%pip install redis

In [None]:
import redis
import numpy as np
import torch


In [None]:
#https://redis.io/docs/latest/develop/get-started/

dtype_np = np.float32
dtype_pt = torch.float32
device_embeddings = 'cpu'


In [None]:
# url='redis://localhost:6379?db=1'
# redis_client = redis.from_url(url) # redis.Redis(host=host, port=port, db=dbid)
# NOTE: knn index can only be created on db=0!!
redis_client = redis.Redis(host="localhost", port=6379, db=0, decode_responses=True)

In [None]:
try:
    res = redis_client.ping()
    print(res)
except redis.exceptions.ConnectionError as err:
    print(err)


In [None]:
dim = 5
a = torch.rand(int(1e4),dim)
print(a.size())

In [None]:
# add data
pipeline = redis_client.pipeline()
for key in range(a.shape[0]):
    pipeline.json().set(f'colname:{key}', '$', { 'key': key, 'embedding': a[key].numpy().astype(dtype_np).tolist() })
res = pipeline.execute()
print(res)

In [None]:
# res = redis_client.json().get("colname:3", "$.embedding")
# res = redis_client.json().get("colname:3")
# print(res)

In [None]:
# retrieve
keys = [987340132, 1,2,3,4,42, 314123412]
col_keys = map(lambda k: f'colname:{k}', keys)
docs = redis_client.json().mget(col_keys, '$')
print(docs)


In [None]:
# retrieve only embeddings
keys = [987340132, 1,2,3,4,42, 314123412]
col_keys = map(lambda k: f'colname:{k}', keys)
embeddings = redis_client.json().mget(col_keys, '.embedding')
print(embeddings)
tensors = torch.as_tensor([ e for e in embeddings if e is not None ], dtype=dtype_pt)
print(tensors.size())

In [None]:
# contains
keys = [12,34,4,54,2398641923]
contains = map(lambda k: f'colname:{k}' in redis_client, keys)
print(list(contains))

In [None]:
# delete all keys from current DB
redis_client.flushdb()

In [None]:
# delete all keys from all DBs
redis_client.flushall()

In [None]:
# Prepare KNN search

In [None]:
from redis.commands.search.field import NumericField, VectorField
from redis.commands.search.indexDefinition import IndexDefinition, IndexType
from redis.commands.search.query import Query

In [None]:
# create index
schema = (
    NumericField("$.key", as_name="key"),
    VectorField(
        "$.embedding",
        "FLAT",
        {
            "TYPE": "FLOAT32",
            "DIM": dim,
            "DISTANCE_METRIC": "COSINE",
        },
        as_name="vector",
    ),
)
definition = IndexDefinition(prefix=["colname:"], index_type=IndexType.JSON)
res = redis_client.ft("idx:colname_vss").create_index(
    fields=schema, definition=definition
)

In [None]:
info = redis_client.ft("idx:colname_vss").info()
num_docs = info["num_docs"]
indexing_failures = info["hash_indexing_failures"]
print(f"{num_docs} documents indexed with {indexing_failures} failures")

In [None]:
query = (
    Query('(*)=>[KNN 3 @vector $query_vector AS vector_score]')
     .sort_by('vector_score')
     .return_fields('vector_score', 'key', 'embedding')
     .dialect(2)
)

In [None]:
INDEX_NAME = "idx:colname_vss"
encoded_query = a[1]
redis_client.ft(INDEX_NAME).search(query, { 'query_vector': a[1].numpy().astype(dtype_np).tobytes() }).docs

In [None]:
# redis_client.ft(INDEX_NAME).dropindex()