In [1]:
import sqlite3
import pickle

In [2]:
# Connect to the SQLite database
db_path = '../../data/processed_data/yelp_ItemCF.db'
conn = sqlite3.connect(db_path)

In [3]:
# Function to retrieve user-business mappings from the database
def retrieve_user_business_mapping(conn):
    cursor = conn.cursor()

    # Fetch user mappings
    cursor.execute('''SELECT user_id, user_idx FROM user_mapping''')
    user_mapping = {row[0]: row[1] for row in cursor.fetchall()}

    # Fetch business mappings
    cursor.execute('''SELECT business_id, business_idx FROM business_mapping''')
    business_mapping = {row[0]: row[1] for row in cursor.fetchall()}

    return user_mapping, business_mapping

In [4]:
# Retrieve the user-business mappings
user_mapping, business_mapping = retrieve_user_business_mapping(conn)

In [5]:
# Function to get businesses a user interacted with
def get_user_businesses(user_id, conn):
    cursor = conn.cursor()
    cursor.execute('''SELECT business_id, stars_review FROM user_item_index WHERE user_id = ?''', (user_id,))
    return cursor.fetchall()

In [6]:
# Function to get top-k similar businesses for a given business
def get_top_k_similar_businesses(business_id, k, conn):
    cursor = conn.cursor()
    cursor.execute('''SELECT similarity_vector FROM item_item_similarity WHERE item_id = ?''', (business_id,))
    result = cursor.fetchone()

    if result is None:
        return []

    similarity_vector = pickle.loads(result[0])
    indices, data = similarity_vector

    # Get top-k similar businesses
    top_k = sorted(zip(indices, data), key=lambda x: -x[1])[:k]

    # Map indices to business ids
    similar_businesses = [(list(business_mapping.keys())[idx], score) for idx, score in top_k]

    return similar_businesses

In [7]:
# Function to predict user interests based on similar businesses
def predict_user_interests(user_id, k=10, conn=conn):
    user_businesses = get_user_businesses(user_id, conn)

    recommended_businesses = {}
    for business_id, _ in user_businesses:
        similar_businesses = get_top_k_similar_businesses(business_id, k, conn)

        for similar_business_id, score in similar_businesses:
            if similar_business_id in recommended_businesses:
                recommended_businesses[similar_business_id] += score
            else:
                recommended_businesses[similar_business_id] = score

    # Sort recommendations by score
    recommended_businesses = sorted(recommended_businesses.items(), key=lambda x: -x[1])

    return recommended_businesses[:k]

In [8]:
# get the top 10 users in the database
user_ids = list(user_mapping.keys())[:10]

for user_id in user_ids:
    recommendations = predict_user_interests(user_id, k=10)
    print(f"Recommendations for user {user_id}: {recommendations}")
    # get the review

# Close the database connection when done
conn.close()

Recommendations for user razUB7ciYZluvxWM6shmtw: [('X6ExflwppH41zPcnSRv0uA', np.float64(550.0)), ('KIjOP0a4gTe-zRdnVCgsGA', np.float64(396.0)), ('k9W1gDPg4TziOTqY_xjTyQ', np.float64(351.0)), ('xVFNGIsWBTcYmKB85HH6Qg', np.float64(332.0)), ('VgkRK_mHhy15M3bZk0isTw', np.float64(306.0)), ('01ikDdFTvc2-YXeyv-YXjg', np.float64(304.0)), ('297f-8AdTbu-wwVedvac6w', np.float64(303.0)), ('NsEKOHrD605IY1jzPzuf9w', np.float64(293.0)), ('jwkgVfSLydNMAF2US6bL1g', np.float64(274.0)), ('qa1kx1sBNVUYhzzZI4u4rA', np.float64(263.0))]
Recommendations for user 3YhG4h4Ok654iVfqdmkuRg: [('pqZYSvF_qrmCjkr6frf4RQ', np.float64(1860.0)), ('6FPr7wbkqnRlRmFVniwmHg', np.float64(1209.0)), ('Esf3-D_44pArPd9GqysoCg', np.float64(984.0)), ('fM6XQeGW70a4EFbjqAAy3Q', np.float64(853.0)), ('2fTfpN5SggLgW4LlzptMPg', np.float64(777.0)), ('mMnkbIMG9MUW-rb5a5Q8Og', np.float64(758.0)), ('VXPpzhD7mA262gIv1T0WPQ', np.float64(674.0)), ('26EPJeHypRPvF4UgJWi2Zw', np.float64(645.0)), ('Tqt99-TsbRT4bmsXHyA1lA', np.float64(569.0)), ('Qx3