In [7]:
import mysql.connector
import redis
import numpy as np
from scipy.spatial.distance import cosine



In [3]:
redis_host='localhost'
redis_port=6379
redis_client = redis.Redis(host=redis_host, port=redis_port, db=0)

In [8]:
keys = sorted(redis_client.keys('ecommerce:product:*'))

In [9]:
# Hàm để lấy name_embeddings từ Redis
def get_name_embedding(key):
    # name_embeddings = redis_client.json().get(f'{key}', '$.name_embeddings')
    # return name_embeddings
    data = redis_client.json().get(key)
    if data:
        return np.array(data.get('name_embeddings'), dtype=np.float32)
    return None

# Lấy name_embeddings cho tất cả các sản phẩm
embeddings = {key: get_name_embedding(key) for key in keys}

In [10]:
def extract_product_id(key):
    return int(key.decode('utf-8').split(':')[-1])

# Trích xuất ID sản phẩm cho tất cả các khóa
product_ids = {key: extract_product_id(key) for key in embeddings.keys()}
product_ids

{b'ecommerce:product:100048106': 100048106,
 b'ecommerce:product:10005396': 10005396,
 b'ecommerce:product:100083468': 100083468,
 b'ecommerce:product:100083615': 100083615,
 b'ecommerce:product:100118821': 100118821,
 b'ecommerce:product:100144196': 100144196,
 b'ecommerce:product:100157653': 100157653,
 b'ecommerce:product:100168173': 100168173,
 b'ecommerce:product:1001708': 1001708,
 b'ecommerce:product:1001712': 1001712,
 b'ecommerce:product:1001714': 1001714,
 b'ecommerce:product:100202727': 100202727,
 b'ecommerce:product:100243832': 100243832,
 b'ecommerce:product:100250633': 100250633,
 b'ecommerce:product:100253404': 100253404,
 b'ecommerce:product:100258983': 100258983,
 b'ecommerce:product:100262172': 100262172,
 b'ecommerce:product:100265361': 100265361,
 b'ecommerce:product:100270985': 100270985,
 b'ecommerce:product:100280379': 100280379,
 b'ecommerce:product:100283873': 100283873,
 b'ecommerce:product:100284316': 100284316,
 b'ecommerce:product:100285012': 100285012,
 b

In [None]:
# Hàm tính cosine similarity
def cosine_similarity(vec1, vec2):
    return 1 - cosine(vec1, vec2)

# Tính cosine similarity giữa từng cặp sản phẩm
cosine_similarities = {}
keys_with_embeddings = list(embeddings.keys())
for i, key1 in enumerate(keys_with_embeddings):
    for j, key2 in enumerate(keys_with_embeddings):
        if i < j:  # Để tránh tính lại các cặp đã tính
            sim = cosine_similarity(embeddings[key1], embeddings[key2])
            cosine_similarities[(product_ids[key1], product_ids[key2])] = sim


In [None]:
# Kết nối tới MySQL
db_conn = mysql.connector.connect(
    host='localhost',
    user='root',
    password='Password@123',
    database='ecommerce'
)
cursor = db_conn.cursor()

cursor.execute('''
    CREATE TABLE IF NOT EXISTS product_similarity (
        product1 BIGINT,
        product2 BIGINT,
        similarity FLOAT,
        PRIMARY KEY (product1, product2)
    )
''')

# Lưu kết quả vào bảng
for (product1, product2), sim in cosine_similarities.items():
    cursor.execute('''
        INSERT INTO product_similarity (product1, product2, similarity)
        VALUES (%s, %s, %s)
        ON DUPLICATE KEY UPDATE similarity = %s
    ''', (product1, product2, sim, sim))

# Commit và đóng kết nối
db_conn.commit()
cursor.close()
db_conn.close()
