In [16]:
import redis
import numpy as np
from scipy.spatial.distance import cosine
from redis.commands.search.indexDefinition import IndexDefinition, IndexType
from redis.commands.search.field import TagField, NumericField
import json

In [19]:
redis_host='localhost'
redis_port=6379
redis_client = redis.Redis(host=redis_host, port=redis_port, db=0)
MODEL_INDEX_NAME = "idx:sim_item"
MODEL_KEY_BASE = "ecommerce:sim_item"

In [20]:
keys = sorted(redis_client.keys('ecommerce:product:*'))

In [21]:
# Hàm để lấy name_embeddings từ Redis
def get_name_embedding(key):
    data = redis_client.json().get(key)
    if data:
        return np.array(data.get('name_embeddings'), dtype=np.float32)
    return None

# Lấy name_embeddings cho tất cả các sản phẩm
embeddings = {key: get_name_embedding(key) for key in keys}

In [None]:
def extract_product_id(key):
    return int(key.decode('utf-8').split(':')[-1])

# Trích xuất ID sản phẩm cho tất cả các khóa
product_ids = {key: extract_product_id(key) for key in embeddings.keys()}
product_ids

In [None]:
# Hàm tính cosine similarity
def cosine_similarity(vec1, vec2):
    return 1 - cosine(vec1, vec2)

# Tính cosine similarity giữa từng cặp sản phẩm
cosine_similarities = {}
keys_with_embeddings = list(embeddings.keys())
for i, key1 in enumerate(keys_with_embeddings):
    for j, key2 in enumerate(keys_with_embeddings):
        if i < j:  # Để tránh tính lại các cặp đã tính
            sim = cosine_similarity(embeddings[key1], embeddings[key2])
            cosine_similarities[(product_ids[key1], product_ids[key2])] = sim
            print(i, j, sim)


In [12]:
def initialize_redis_index(redis_client, model_index_name, model_key_base):
    # Tạo một chỉ mục mới
    redis_client.ft(model_index_name).create_index(
        [
            TagField("$.item_u", as_name="item_id_u"),
            TagField("$.item_v", as_name="item_id_v"),
            NumericField("$.sim", as_name="sim")
        ],
        definition=IndexDefinition(
            index_type=IndexType.JSON,
            prefix=[f"{model_key_base}:"]
        )
    )

In [14]:
try:
    redis_client.ft(MODEL_INDEX_NAME).info()
    print('Index already exists!')
except:
    initialize_redis_index(redis_client, MODEL_INDEX_NAME, MODEL_KEY_BASE)

Index already exists!


In [None]:
for (item_u, item_v), sim in cosine_similarities.items():
    key = f"{MODEL_KEY_BASE}:{item_u}:{item_v}"
    value = {
        "item_u": item_u,
        "item_v": item_v,
        "sim": sim
    }
    redis_client.set(key, json.dumps(value))
    print(f'Add key: {key}')

print("Cosine similarities have been stored in Redis.")