In [2]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import numpy as np
import random
import json
import os

In [3]:

# Step 1: Read a larger number of review records
review = pd.read_json('./data/yelp_academic_dataset_review.json', lines=True, nrows=100000)

# Get the user_id and business_id that appear in the reviews
valid_user_ids = set(review['user_id'])
valid_business_ids = set(review['business_id'])

# Step 2: Extract matched user data based on review
user_iter = pd.read_json('./data/yelp_academic_dataset_user.json', lines=True, chunksize=100000)
user_chunks = []
for chunk in user_iter:
    user_chunks.append(chunk[chunk['user_id'].isin(valid_user_ids)])
user = pd.concat(user_chunks, ignore_index=True)

# Step 3: Extract matched business data based on review
business_iter = pd.read_json('./data/yelp_academic_dataset_business.json', lines=True, chunksize=100000)
business_chunks = []
for chunk in business_iter:
    business_chunks.append(chunk[chunk['business_id'].isin(valid_business_ids)])
business = pd.concat(business_chunks, ignore_index=True)

# Final step: merge all datasets
business_df = business[['business_id', 'categories', 'stars', 'review_count']]
user_df = user[['user_id', 'average_stars', 'review_count', 'fans']]
review_df = review[['user_id', 'business_id', 'stars']]

data = review_df \
    .merge(user_df, on='user_id', how='left') \
    .merge(business_df, on='business_id', how='left')

# Check for any NaN values after merging
print(data.isnull().sum())



user_id           0
business_id       0
stars_x           0
average_stars     0
review_count_x    0
fans              0
categories        1
stars_y           0
review_count_y    0
dtype: int64


In [4]:
# Construct user state: (average_stars, review_count, fans)
data['state'] = list(zip(data['average_stars'], data['review_count_x'], data['fans']))

# Construct action (business): use business_id
data['action'] = data['business_id']

# Define reward: use the user's rating of the business as the reward
data['reward'] = data['stars_x']  # i.e., the user's rating

# Convert state from tuple to string for encoding
data['state_str'] = data['state'].apply(lambda x: '_'.join(map(str, x)))

# Encode states and actions
from sklearn.preprocessing import LabelEncoder
state_encoder = LabelEncoder()
data['state_enc'] = state_encoder.fit_transform(data['state_str'])

action_encoder = LabelEncoder()
data['action_enc'] = action_encoder.fit_transform(data['action'])


In [5]:
import numpy as np
import random

num_states = data['state_enc'].nunique()
num_actions = data['action_enc'].nunique()
Q = np.zeros((num_states, num_actions))  # Initialize Q-table

# Q-learning parameters
alpha = 0.1      # learning rate
gamma = 0.9      # discount factor
epsilon = 0.1    # exploration rate

# Extract columns for training sequences
states = data['state_enc'].values
actions = data['action_enc'].values
rewards = data['reward'].values

# Train Q-learning algorithm
for epoch in range(10):  # number of training iterations
    for i in range(len(data) - 1):
        s = states[i]
        a = actions[i]
        r = rewards[i]
        s_next = states[i + 1]

        # ε-greedy policy for action selection
        if random.random() < epsilon:
            a_next = random.randint(0, num_actions - 1)  # explore
        else:
            a_next = np.argmax(Q[s_next])  # exploit

        # Q-value update
        Q[s, a] += alpha * (r + gamma * Q[s_next, a_next] - Q[s, a])

print("✅ Q-learning training completed!")



✅ Q-learning 训练完成！


In [6]:
def recommend(user_state, top_k=5):
    user_state_str = '_'.join(map(str, user_state))
    state_enc = state_encoder.transform([user_state_str])[0]
    
    top_actions = np.argsort(Q[state_enc])[::-1][:top_k]
    recommended_ids = action_encoder.inverse_transform(top_actions)

    recommended_business = business[business['business_id'].isin(recommended_ids)]
    recommended_business = recommended_business.set_index('business_id').loc[recommended_ids].reset_index()

    return recommended_business[['business_id', 'name', 'categories', 'stars', 'city']]


In [7]:
def evaluate_precision_recall_f1(user_states, top_k=5, rating_threshold=4):
    precision_list = []
    recall_list = []
    f1_list = []

    for user_state in user_states:
        # Get top-k recommended businesses for the given user state
        recommendations = recommend(user_state, top_k)['business_id'].tolist()

        # Try to find the corresponding user ID from the state
        try:
            user_id = data[data['state'] == user_state].iloc[0]['user_id']
        except:
            continue  # Skip if no matching user found

        # Ground truth: businesses the user actually liked (rating ≥ threshold)
        true_likes = set(data[(data['user_id'] == user_id) & (data['reward'] >= rating_threshold)]['business_id'])
        if not true_likes:
            continue  # Skip if no liked businesses

        # Compute hits (recommended businesses that were actually liked)
        hits = set(recommendations) & true_likes
        precision = len(hits) / top_k
        recall = len(hits) / len(true_likes)
        f1 = 2 * precision * recall / (precision + recall) if (precision + recall) else 0

        precision_list.append(precision)
        recall_list.append(recall)
        f1_list.append(f1)

    # Compute average scores
    avg_precision = np.mean(precision_list)
    avg_recall = np.mean(recall_list)
    avg_f1 = np.mean(f1_list)

    return avg_precision, avg_recall, avg_f1

# Example evaluation: take first 100 user states for testing
sample_user_states = data['state'].unique()[:100]
precision, recall, f1 = evaluate_precision_recall_f1(sample_user_states)

print(f"Precision@5: {precision:.4f}")
print(f"Recall@5: {recall:.4f}")
print(f"F1-Score@5: {f1:.4f}")


Precision@5: 0.2286
Recall@5: 0.8052
F1-Score@5: 0.3362
