# TFRS 推荐系统训练笔记本

本笔记本用于在 Google Colab 上训练 TensorFlow Recommenders 模型

## 使用步骤
1. 在 Colab 中打开此笔记本
2. 运行时 → 更改运行时类型 → 选择 GPU
3. 点击 "运行全部"
4. 等待训练完成（约 2-4 小时）
5. 下载训练好的模型

**注意**: 本笔记本仅用于学习和研究目的

## 1. 环境设置

In [None]:
# 检查 GPU
!nvidia-smi

In [None]:
# 安装依赖
!pip install -q tensorflow==2.15.0
!pip install -q tensorflow-recommenders==0.7.3
!pip install -q pandas numpy scikit-learn
!pip install -q requests beautifulsoup4
!pip install -q tqdm

In [None]:
# 导入库
import tensorflow as tf
import tensorflow_recommenders as tfrs
import numpy as np
import pandas as pd
from typing import Dict, Text
import os
from tqdm import tqdm

print(f"TensorFlow version: {tf.__version__}")
print(f"TFRS version: {tfrs.__version__}")
print(f"GPU available: {tf.config.list_physical_devices('GPU')}")

## 2. 数据准备

### 选项 A: 使用公开数据集（推荐）

In [None]:
# 使用 MovieLens 数据集作为示例
# 实际使用时替换为您的数据

import tensorflow_datasets as tfds

# 加载 MovieLens 数据集
ratings = tfds.load("movielens/100k-ratings", split="train")
movies = tfds.load("movielens/100k-movies", split="train")

# 查看数据
for x in ratings.take(1):
    print(f"User: {x['user_id']}")
    print(f"Movie: {x['movie_id']}")
    print(f"Rating: {x['user_rating']}")

### 选项 B: 生成合成数据

In [None]:
# 生成合成数据用于测试
def generate_synthetic_data(n_users=10000, n_items=1000, n_interactions=100000):
    """
    生成合成推荐数据
    """
    np.random.seed(42)
    
    # 生成用户ID（幂律分布）
    user_ids = np.random.zipf(1.5, n_interactions) % n_users
    
    # 生成商品ID（长尾分布）
    item_ids = np.random.zipf(1.3, n_interactions) % n_items
    
    # 生成评分
    ratings = np.random.randint(1, 6, n_interactions)
    
    # 生成时间戳
    timestamps = np.random.randint(1640000000, 1700000000, n_interactions)
    
    # 创建 DataFrame
    df = pd.DataFrame({
        'user_id': [f'user_{i}' for i in user_ids],
        'item_id': [f'item_{i}' for i in item_ids],
        'rating': ratings,
        'timestamp': timestamps
    })
    
    return df

# 生成数据
df = generate_synthetic_data()
print(f"Generated {len(df)} interactions")
print(df.head())

# 转换为 TensorFlow Dataset
ratings = tf.data.Dataset.from_tensor_slices(dict(df))

## 3. 数据预处理

In [None]:
# 提取唯一的用户和商品ID
user_ids = ratings.map(lambda x: x['user_id'])
item_ids = ratings.map(lambda x: x['item_id'])

unique_user_ids = np.unique(np.concatenate(list(user_ids.batch(1000))))
unique_item_ids = np.unique(np.concatenate(list(item_ids.batch(1000))))

print(f"Unique users: {len(unique_user_ids)}")
print(f"Unique items: {len(unique_item_ids)}")

In [None]:
# 划分训练集和测试集
tf.random.set_seed(42)
shuffled = ratings.shuffle(100_000, seed=42, reshuffle_each_iteration=False)

train_size = int(0.8 * len(df))
train = shuffled.take(train_size)
test = shuffled.skip(train_size).take(len(df) - train_size)

print(f"Train size: {train_size}")
print(f"Test size: {len(df) - train_size}")

## 4. 构建双塔模型

In [None]:
class UserModel(tf.keras.Model):
    """用户塔模型"""
    
    def __init__(self, unique_user_ids, embedding_dim=64):
        super().__init__()
        
        # 用户ID嵌入
        self.user_embedding = tf.keras.Sequential([
            tf.keras.layers.StringLookup(
                vocabulary=unique_user_ids,
                mask_token=None
            ),
            tf.keras.layers.Embedding(
                len(unique_user_ids) + 1,
                embedding_dim
            )
        ])
        
        # 深度网络
        self.dense_layers = tf.keras.Sequential([
            tf.keras.layers.Dense(128, activation='relu'),
            tf.keras.layers.Dropout(0.2),
            tf.keras.layers.Dense(64, activation='relu'),
        ])
    
    def call(self, inputs):
        user_embedding = self.user_embedding(inputs['user_id'])
        return self.dense_layers(user_embedding)


class ItemModel(tf.keras.Model):
    """商品塔模型"""
    
    def __init__(self, unique_item_ids, embedding_dim=64):
        super().__init__()
        
        # 商品ID嵌入
        self.item_embedding = tf.keras.Sequential([
            tf.keras.layers.StringLookup(
                vocabulary=unique_item_ids,
                mask_token=None
            ),
            tf.keras.layers.Embedding(
                len(unique_item_ids) + 1,
                embedding_dim
            )
        ])
        
        # 深度网络
        self.dense_layers = tf.keras.Sequential([
            tf.keras.layers.Dense(128, activation='relu'),
            tf.keras.layers.Dropout(0.2),
            tf.keras.layers.Dense(64, activation='relu'),
        ])
    
    def call(self, inputs):
        item_embedding = self.item_embedding(inputs['item_id'])
        return self.dense_layers(item_embedding)


class TwoTowerModel(tfrs.Model):
    """双塔推荐模型"""
    
    def __init__(self, user_model, item_model, items_dataset):
        super().__init__()
        
        self.user_model = user_model
        self.item_model = item_model
        
        # 检索任务
        self.task = tfrs.tasks.Retrieval(
            metrics=tfrs.metrics.FactorizedTopK(
                candidates=items_dataset.batch(128).map(
                    lambda x: (x['item_id'], self.item_model(x))
                )
            )
        )
    
    def compute_loss(self, features, training=False):
        user_embeddings = self.user_model(features)
        item_embeddings = self.item_model(features)
        return self.task(user_embeddings, item_embeddings)

print("Model classes defined")

In [None]:
# 创建模型实例
user_model = UserModel(unique_user_ids)
item_model = ItemModel(unique_item_ids)

# 创建商品数据集（用于候选生成）
items_dataset = ratings.map(lambda x: {'item_id': x['item_id']})

# 创建双塔模型
model = TwoTowerModel(user_model, item_model, items_dataset)

print("Model created")

## 5. 训练模型

In [None]:
# 编译模型
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))

# 配置回调
callbacks = [
    tf.keras.callbacks.EarlyStopping(
        monitor='val_factorized_top_k/top_100_categorical_accuracy',
        patience=3,
        restore_best_weights=True
    ),
    tf.keras.callbacks.ModelCheckpoint(
        filepath='./checkpoints/model_{epoch:02d}',
        save_weights_only=False,
        save_freq='epoch'
    )
]

print("Model compiled")

In [None]:
# 训练模型
history = model.fit(
    train.batch(4096),
    validation_data=test.batch(4096),
    epochs=10,
    callbacks=callbacks,
    verbose=1
)

print("Training completed!")

## 6. 评估模型

In [None]:
# 评估模型
metrics = model.evaluate(test.batch(4096), return_dict=True)

print("\nEvaluation Metrics:")
for key, value in metrics.items():
    print(f"{key}: {value:.4f}")

In [None]:
# 可视化训练历史
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.title('Training and Validation Loss')

plt.subplot(1, 2, 2)
plt.plot(history.history['factorized_top_k/top_100_categorical_accuracy'], label='Train Acc')
plt.plot(history.history['val_factorized_top_k/top_100_categorical_accuracy'], label='Val Acc')
plt.xlabel('Epoch')
plt.ylabel('Top-100 Accuracy')
plt.legend()
plt.title('Training and Validation Accuracy')

plt.tight_layout()
plt.show()

## 7. 测试推荐

In [None]:
# 创建推荐索引
index = tfrs.layers.factorized_top_k.BruteForce(model.user_model)
index.index_from_dataset(
    items_dataset.batch(100).map(
        lambda x: (x['item_id'], model.item_model(x))
    )
)

print("Index created")

In [None]:
# 为测试用户生成推荐
test_user_id = unique_user_ids[0]
_, recommendations = index(tf.constant([test_user_id]))

print(f"\nTop 10 recommendations for {test_user_id}:")
for i, item_id in enumerate(recommendations[0, :10].numpy()):
    print(f"{i+1}. {item_id.decode('utf-8')}")

## 8. 导出模型

In [None]:
# 保存完整模型
model_path = './saved_models/two_tower'
tf.saved_model.save(model, model_path)
print(f"Model saved to {model_path}")

# 保存索引
index_path = './saved_models/index'
tf.saved_model.save(index, index_path)
print(f"Index saved to {index_path}")

In [None]:
# 压缩模型文件
!zip -r saved_models.zip ./saved_models
print("Model compressed to saved_models.zip")
print("\n下载此文件并上传到 Railway 进行部署")

## 9. 下载模型

运行下面的代码下载训练好的模型：

In [None]:
from google.colab import files

# 下载压缩的模型文件
files.download('saved_models.zip')

print("\n模型已下载！")
print("\n下一步:")
print("1. 解压 saved_models.zip")
print("2. 将 saved_models 文件夹上传到项目的 models/ 目录")
print("3. 部署到 Railway")
print("4. 测试 API: curl https://your-app.railway.app/api/recommend/user_1")

## 完成！

恭喜！您已经成功训练了一个 TFRS 推荐模型。

### 下一步
1. 下载训练好的模型
2. 上传到 GitHub 仓库
3. 在 Railway 部署
4. 测试 API

### 改进建议
- 使用真实数据替换合成数据
- 添加更多特征（用户年龄、商品类别等）
- 尝试不同的模型架构
- 调整超参数
- 实现 A/B 测试