In [58]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import numpy as np
import random
import json
import os


In [59]:
# 1️⃣ Load data
df = pd.read_json('./merged_interaction_cleaned_data.json', lines=True, chunksize=300000)
df = next(df)

df = df[df['user_rating'] > 0].copy()
df['business_average_rating'] = df['business_average_rating'].fillna(0)
df['business_review_count'] = df['business_review_count'].fillna(0)
df['city'] = df['city'].fillna('Unknown')



In [60]:

user_df = pd.read_json('./data/yelp_academic_dataset_user.json', lines=True, chunksize=100000)
user_df = next(user_df)

user_df = user_df[['user_id', 'review_count', 'average_stars', 'fans']]


user_df = user_df.rename(columns={
    'review_count': 'user_review_count',
    'average_stars': 'user_average_stars',
    'fans': 'user_fans'
})


df = df.merge(user_df, on='user_id', how='left')

In [61]:



# State(Business)
df['merchant_state'] = list(zip(
    df['business_average_rating'],
    df['business_review_count'],
    df['city']
))
df['state_str'] = df['merchant_state'].apply(lambda x: '_'.join(map(str, x)))
state_encoder = LabelEncoder()
df['state_enc'] = state_encoder.fit_transform(df['state_str'])

# action（User）
df['user_type'] = list(zip(
    df['user_review_count'],     # 用户一共写过多少条评论
    df['user_average_stars'],    # 用户历史打分偏好
    df['user_fans']              # 用户的粉丝数
))
df['user_type_str'] = df['user_type'].apply(lambda x: '_'.join(map(str, x)))

action_encoder = LabelEncoder()
df['action_enc'] = action_encoder.fit_transform(df['user_type_str'])

# Reward
df['reward'] = df['user_rating']

# Initialize Q-table
num_states = df['state_enc'].nunique()
num_actions = df['action_enc'].nunique()
Q = np.zeros((num_states, num_actions))

# Q-learning training
alpha = 0.1
gamma = 0.9
epsilon = 0.1

states = df['state_enc'].values
actions = df['action_enc'].values
rewards = df['reward'].values

for epoch in range(10):
    for i in range(len(df) - 1):
        s = states[i]
        a = actions[i]
        r = rewards[i]
        s_next = states[i + 1]

        if random.random() < epsilon:
            a_next = random.randint(0, num_actions - 1)
        else:
            a_next = np.argmax(Q[s_next])

        Q[s, a] += alpha * (r + gamma * Q[s_next, a_next] - Q[s, a])

print(" Q-learning training successful！")

def recommend_user_profiles_by_threshold(merchant_state, threshold=4.0):
    state_str = '_'.join(map(str, merchant_state))
    state_enc = state_encoder.transform([state_str])[0]

    q_values = Q[state_enc]
    selected_indices = np.where(q_values >= threshold)[0]

    selected_user_type_strs = action_encoder.inverse_transform(selected_indices)

    def decode_user_type(user_type_str):
        try:
            review_count, average_stars, fans = user_type_str.split('_')
            if 'nan' in [review_count, average_stars, fans]:
                return None  # 忽略无效值
            return {
                'user_review_count': int(float(review_count)),
                'user_average_stars': float(average_stars),
                'user_fans': int(float(fans))
            }
        except:
            return None

    # 跳过 None 的条目
    readable_profiles = [decode_user_type(s) for s in selected_user_type_strs]
    readable_profiles = [p for p in readable_profiles if p is not None]

    return readable_profiles


sample_state = df.iloc[0]['merchant_state']
user_profiles = recommend_user_profiles_by_threshold(sample_state)

if user_profiles:
    print("✅Recommended user profiles for this merchant:")
    for i, profile in enumerate(user_profiles, 1):
        print(f"{i}. Reviews: {profile['user_review_count']} | Avg Stars: {profile['user_average_stars']} | Fans: {profile['user_fans']}")
else:
    print("🧑‍💼No suitable user profiles found (or all were invalid).")

✅ Q-learning 训练完成！
🧑‍💼 Recommended user profiles for this merchant:
1. Reviews: 14 | Avg Stars: 4.4 | Fans: 0
2. Reviews: 141 | Avg Stars: 3.86 | Fans: 4
3. Reviews: 2 | Avg Stars: 5.0 | Fans: 0
4. Reviews: 28 | Avg Stars: 3.29 | Fans: 1
5. Reviews: 30 | Avg Stars: 4.3 | Fans: 2
6. Reviews: 32 | Avg Stars: 4.03 | Fans: 1
7. Reviews: 34 | Avg Stars: 4.38 | Fans: 2
8. Reviews: 36 | Avg Stars: 4.98 | Fans: 3
9. Reviews: 4 | Avg Stars: 4.0 | Fans: 0
10. Reviews: 425 | Avg Stars: 2.74 | Fans: 9
11. Reviews: 91 | Avg Stars: 2.98 | Fans: 1


In [62]:
#EVALUATION
def evaluate_precision_recall_f1_merchant_view(df, Q, top_k=5, rating_threshold=4.0):
    precision_list = []
    recall_list = []
    f1_list = []

    # 获取所有唯一的商户状态（merchant_state 是 tuple）
    merchant_states = df['merchant_state'].unique()

    for state in merchant_states:
        # 编码商户状态
        state_str = '_'.join(map(str, state))
        try:
            state_enc = state_encoder.transform([state_str])[0]
        except:
            continue  # 无法编码的跳过

        # 推荐的用户类型（动作）
        q_values = Q[state_enc]
        top_actions = np.argsort(q_values)[::-1][:top_k]
        recommended_types = set(top_actions)

        # 找到所有在该状态下发生过的 user_type，并 reward >= rating_threshold 的记录
        matched_rows = df[df['merchant_state'] == state]
        liked_user_types = matched_rows[matched_rows['reward'] >= rating_threshold]['action_enc'].unique()
        true_likes = set(liked_user_types)

        if not true_likes:
            continue  # 如果没有 Ground Truth，跳过

        hits = recommended_types & true_likes

        precision = len(hits) / top_k
        recall = len(hits) / len(true_likes)
        f1 = 2 * precision * recall / (precision + recall) if (precision + recall) else 0

        precision_list.append(precision)
        recall_list.append(recall)
        f1_list.append(f1)

    avg_precision = np.mean(precision_list)
    avg_recall = np.mean(recall_list)
    avg_f1 = np.mean(f1_list)

    return avg_precision, avg_recall, avg_f1


In [63]:
precision, recall, f1 = evaluate_precision_recall_f1_merchant_view(df, Q)

print(f"📊 Evaluation based on merchant-to-user-type recommendation:")
print(f"Precision@{5}: {precision:.4f}")
print(f"Recall@{5}: {recall:.4f}")
print(f"F1-Score@{5}: {f1:.4f}")


📊 Evaluation based on merchant-to-user-type recommendation:
Precision@5: 0.5819
Recall@5: 0.8204
F1-Score@5: 0.5595
