# RediSearch 模块

RediSearch:

- GitHub: [RediSearch](https://github.com/RediSearch/RediSearch)
- Docs: [search-and-query](https://redis.io/docs/latest/develop/interact/search-and-query/)

Redis Python client:

- GitHub: [redisearch-py](https://github.com/RediSearch/redisearch-py)
- Docs: [python-client](https://redis.io/docs/latest/develop/connect/clients/python/)

In [1]:
import numpy as np
import redis

import util

## 1. 安装 Docker 镜像

官方 Docker：[redis-stack-server](https://hub.docker.com/r/redis/redis-stack-server/)

创建一个本地容器：

```
docker pull redis/redis-stack-server
```

运行容器：

```
docker run -d --name redis-stack -p 6379:6379 -e REDIS_ARGS="--requirepass my-password" redis/redis-stack-server:latest
```

在容器中与 Redis 交互：

```
# 打开命令行交互
docker exec -it redis-stack sh

# 打开 Redis 命令行交互
docker exec -it redis-stack redis-cli
```

In [2]:
r = redis.Redis(host='localhost', port=6379, password="my-password")

# 确认连通性
r.ping()

True

In [3]:
# 简单存取
r.set("a", "b")
r.get("a")

b'b'

## 2. 存取向量

In [4]:
# 创建索引
def create_index(index_name, text_field_name, vector_field_name, dtype='FLOAT32', dim=128, metric='L2'):
    cmd = [
        'FT.CREATE', f'{index_name}', 'ON', 'HASH', 'PREFIX', '1', 'vec:',
        'SCHEMA',
        f'{text_field_name}', 'TEXT',
        f'{vector_field_name}', 'VECTOR', 'FLAT', '6', 'TYPE', f'{dtype}', 'DIM', f'{dim}', 'DISTANCE_METRIC', f'{metric}'
    ]
    return r.execute_command(*cmd)

create_index(index_name='my_index', text_field_name='my_text', vector_field_name='my_vector')

b'OK'

In [5]:
# 删除索引
def delete_index(index_name):
    return r.execute_command(f'FT.DROPINDEX {index_name} [DD]')

# delete_index(index_name='my_index')

In [6]:
# 生成向量
vec1 = np.random.rand(128).astype(np.float32)
vec2 = np.random.rand(128).astype(np.float32)

vec1_binary = vec1.tobytes()
vec2_binary = vec2.tobytes()

data1 = {'my_text': 'text_1', 'my_vector': vec1_binary}
data2 = {'my_text': 'text_2', 'my_vector': vec2_binary}

In [7]:
def insert_index(key: str, data: dict):
    return r.hset(f'vec:{key}', mapping=data)

def get_data(key: str):
    return r.execute_command('HGETALL', f'vec:{key}')

def delete_data(key: str):
    return r.delete(f'vec:{key}')

# 插入数据
insert_index(key='data1', data=data1)
insert_index(key='data2', data=data2)

0

In [8]:
# 通过 key 获取数据
get_data('data1')

{b'my_vector': b'\x9a\x1du>\x90\xfb8?<\x05\x1f=\xd0\xf8\x11>0\xa6\xb6=K\xde\x80>\xde\xd5\xc9>tf\x85>\xdd\x1e\x0b?\x91\x8a\xf4>\xbc\x96\\?\t\xbc\xa0>5`+?\xe0\xd6\x07?K\x19\x00?P\xfd\x98>r)\x08?\x00?\xd3</\xe9\xcb>f\xfb\x8c>o\x89:>\nIr?\xd5\xdc\x1d?\xc5\xa1y>\xe1\xbb\x0e>\xb4\x15z?m\x93A=g\x98\xb6>\xab\xa9\xfb<6KS>\xb9\x1av>\x9d\x88b?\xf1\xad\x98=*\xbc\x17?\xd3\xff\x06?\xa6]4>\x90\xb2\x1e?\xa8\x87\xb5==T\xd1>\xf2cO?\xf6\xcb\x9b>\xd7\x8b"?<\xb25=N\xc3#?_#^?w\xdbr?C9\xf5>-d\x87>\x1b{g?\xe20\xc2>N\x9d\x91=\x13\x89X>\rJ0?\x96\x07\x15?\x9cy\x88>\x99\xd8;?\x8a\xa4\xa3<E#e>\xa6\xc7o?\xc4\xc7N?8\x00D?\x17\x18\n?\x85\xee\x1d?\xea\xc1y?Z#\x12?\xa8\x10<>\x0b\x1d\x07?\x94B\xdc>\x0b\n\xb8>\x19\xc0\x91>\xc8y??"^\xcd>\x95\xfc*?0\xa5d>i$T?t\x17\xce>\x93\xd8D?\xf7\'\xfd<\xc5t\x1c?Q\xb2\x93=]\x17\xa6=\x95+J?\x9e7$?\xeb\xe0\xa2<_7g?o\xfb{?\xd4\xf5\xd2<\xa3\x97\x0b?\x83\xcd.?_\xa8O?\x84~x>5\xbf\xca>\x88~\xaf>*\x13R?\x91=\xef>\xd27I?Q}\x90>\xae>\xe4>\xaf\xba\x10?\xb0\x99@?k@o>3X\xdf>@\xc8\x1d?\xc09\xba=\xf5\

In [9]:
# 通过 key 删除数据
# delete_data('data1')

## 3. 查找最近邻向量

In [10]:
# 查找最近邻向量
def search_vector(query_vec, index_name, vector_field_name, num_return_vec):
    query_vec_binary = query_vec.tobytes()
    cmd = [
        'FT.SEARCH', index_name, f'*=>[KNN {num_return_vec} @{vector_field_name} $query_vec]', 
        'PARAMS', '2', 'query_vec', query_vec_binary, 'DIALECT', '2'
    ]
    return r.execute_command(*cmd)

query_vec = np.random.rand(128).astype(np.float32)
result = search_vector(query_vec, index_name='my_index', vector_field_name='my_vector', num_return_vec=2)
result

[2,
 b'vec:data1',
 [b'__my_vector_score',
  b'20.932970047',
  b'my_text',
  b'text_1',
  b'my_vector',
  b'\x9a\x1du>\x90\xfb8?<\x05\x1f=\xd0\xf8\x11>0\xa6\xb6=K\xde\x80>\xde\xd5\xc9>tf\x85>\xdd\x1e\x0b?\x91\x8a\xf4>\xbc\x96\\?\t\xbc\xa0>5`+?\xe0\xd6\x07?K\x19\x00?P\xfd\x98>r)\x08?\x00?\xd3</\xe9\xcb>f\xfb\x8c>o\x89:>\nIr?\xd5\xdc\x1d?\xc5\xa1y>\xe1\xbb\x0e>\xb4\x15z?m\x93A=g\x98\xb6>\xab\xa9\xfb<6KS>\xb9\x1av>\x9d\x88b?\xf1\xad\x98=*\xbc\x17?\xd3\xff\x06?\xa6]4>\x90\xb2\x1e?\xa8\x87\xb5==T\xd1>\xf2cO?\xf6\xcb\x9b>\xd7\x8b"?<\xb25=N\xc3#?_#^?w\xdbr?C9\xf5>-d\x87>\x1b{g?\xe20\xc2>N\x9d\x91=\x13\x89X>\rJ0?\x96\x07\x15?\x9cy\x88>\x99\xd8;?\x8a\xa4\xa3<E#e>\xa6\xc7o?\xc4\xc7N?8\x00D?\x17\x18\n?\x85\xee\x1d?\xea\xc1y?Z#\x12?\xa8\x10<>\x0b\x1d\x07?\x94B\xdc>\x0b\n\xb8>\x19\xc0\x91>\xc8y??"^\xcd>\x95\xfc*?0\xa5d>i$T?t\x17\xce>\x93\xd8D?\xf7\'\xfd<\xc5t\x1c?Q\xb2\x93=]\x17\xa6=\x95+J?\x9e7$?\xeb\xe0\xa2<_7g?o\xfb{?\xd4\xf5\xd2<\xa3\x97\x0b?\x83\xcd.?_\xa8O?\x84~x>5\xbf\xca>\x88~\xaf>*\x13R?\

In [11]:
for i in range(1, len(result), 2):
    print(f"Key: {result[i]}")
    print(f"Distance: {result[i+1][1]}")

Key: b'vec:data1'
Distance: b'20.932970047'
Key: b'vec:data2'
Distance: b'26.0434322357'
