# 12. 模型部署与推理（Serving Demo）

这个 Notebook 的目标：把前面训练好的模型（召回 + 排序）串起来，做一个**可本地运行的“推荐服务”Demo**。

你可以把它当成面试/实习时解释“线上推理链路”的样板：

1. **召回（Retrieval）**：双塔 / SASRec → FAISS 检索 TopK 候选
2. **特征（Features）**：复用离线特征表 + 计算实时交互特征
3. **排序（Ranking）**：LGBM / DeepFM / DIN 预测分数并 rerank
4. **冷启动与兜底**：无历史用户回落到热门
5. **导出（Export）**：SavedModel（可选 ONNX，若环境有依赖）

> 注：本仓库默认依赖里没有 `onnxruntime/tf2onnx`，所以 ONNX 部分会做成可选跳过，不影响 notebook 运行。


In [None]:
import os
import time
import math
import pickle
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, List, Optional, Tuple

import numpy as np
import pandas as pd
import tensorflow as tf
import faiss
import lightgbm as lgb
from dotenv import find_dotenv, load_dotenv

tf.get_logger().setLevel('ERROR')


def find_repo_root(start: Path) -> Path:
    cur = start
    for _ in range(10):
        if (cur / 'pyproject.toml').exists() or (cur / '.git').exists():
            return cur
        if cur.parent == cur:
            break
        cur = cur.parent
    return start


REPO_ROOT = find_repo_root(Path.cwd())
load_dotenv(find_dotenv(), override=True)

RAW_DATA_PATH = Path(os.getenv('FUNREC_RAW_DATA_PATH', REPO_ROOT / 'data'))
PROCESSED_DATA_PATH = Path(os.getenv('FUNREC_PROCESSED_DATA_PATH', REPO_ROOT / 'tmp'))

DATA_PATH = RAW_DATA_PATH / 'dataset' / 'news_recommendation'
if not DATA_PATH.exists():
    DATA_PATH = RAW_DATA_PATH / 'news_recommendation'

PROJECT_PATH = PROCESSED_DATA_PATH / 'projects' / 'news_recommendation_system'
PROJECT_PATH.mkdir(parents=True, exist_ok=True)

print('DATA_PATH   :', DATA_PATH)
print('PROJECT_PATH:', PROJECT_PATH)


## 1) 加载基础产物（历史、特征表、热门兜底）

Serving 里最关键的是 **offline/online 一致性**：
- 训练和推理要用**一致的特征定义**、**一致的 ID 编码**
- 召回与排序的候选构建、过滤（比如去掉已看）要一致

这里默认复用基础版（1-6）产物：
- `train_hist.pkl` / `valid_last.pkl`
- `user_features.pkl` / `item_features.pkl` / `user_last_click.pkl`
- `popular_items.pkl`（冷启动兜底）


In [None]:
required = [
    'train_hist.pkl',
    'valid_last.pkl',
    'user_features.pkl',
    'item_features.pkl',
    'user_last_click.pkl',
    'popular_items.pkl',
    'feature_cols.pkl',
    'lgb_ranker.txt',
]
missing = [f for f in required if not (PROJECT_PATH / f).exists()]
if missing:
    print('[warn] missing baseline artifacts:', missing)
    print('请先运行基础版 notebooks 1-6（尤其是 5.feature_engineering 与 6.ranking）')

train_hist = pd.read_pickle(PROJECT_PATH / 'train_hist.pkl') if (PROJECT_PATH / 'train_hist.pkl').exists() else pd.DataFrame()
valid_last = pd.read_pickle(PROJECT_PATH / 'valid_last.pkl') if (PROJECT_PATH / 'valid_last.pkl').exists() else pd.DataFrame()
user_features = pd.read_pickle(PROJECT_PATH / 'user_features.pkl') if (PROJECT_PATH / 'user_features.pkl').exists() else pd.DataFrame()
item_features = pd.read_pickle(PROJECT_PATH / 'item_features.pkl') if (PROJECT_PATH / 'item_features.pkl').exists() else pd.DataFrame()
user_last_click = pd.read_pickle(PROJECT_PATH / 'user_last_click.pkl') if (PROJECT_PATH / 'user_last_click.pkl').exists() else pd.DataFrame()
popular_items = pickle.load(open(PROJECT_PATH / 'popular_items.pkl', 'rb')) if (PROJECT_PATH / 'popular_items.pkl').exists() else []
feature_cols = pickle.load(open(PROJECT_PATH / 'feature_cols.pkl', 'rb')) if (PROJECT_PATH / 'feature_cols.pkl').exists() else []

if not train_hist.empty:
    train_hist = train_hist.sort_values(['user_id', 'click_timestamp'])
    user_hist = train_hist.groupby('user_id')['click_article_id'].apply(list).to_dict()
else:
    user_hist = {}

articles = pd.read_csv(DATA_PATH / 'articles.csv') if (DATA_PATH / 'articles.csv').exists() else pd.DataFrame()

print('train_hist:', train_hist.shape)
print('valid_last:', valid_last.shape)
print('user_features:', user_features.shape)
print('item_features:', item_features.shape)
print('popular_items:', len(popular_items))
print('articles:', articles.shape)


### （可选）加载文章向量计算 `emb_sim_last`

`articles_emb.csv` 很大（36w × 250），Serving/面试里重点是链路与一致性：
- 默认 `LOAD_EMB=False`，直接把 `emb_sim_last=0`
- 如果你想更真实地复用基础特征，可以改成 `True` 加载并计算相似度


In [None]:
LOAD_EMB = False

emb_path = DATA_PATH / 'articles_emb.csv'
article_emb = None
id2idx = None
emb_matrix = None

if LOAD_EMB and emb_path.exists():
    t0 = time.time()
    article_emb = pd.read_csv(emb_path)
    emb_cols = [c for c in article_emb.columns if c.startswith('emb_')]
    emb_matrix = article_emb[emb_cols].values.astype('float32')
    emb_matrix /= np.linalg.norm(emb_matrix, axis=1, keepdims=True) + 1e-12
    article_ids = article_emb['article_id'].values.astype(int)
    id2idx = {int(aid): int(i) for i, aid in enumerate(article_ids)}
    print('loaded articles_emb:', article_emb.shape, 'time:', round(time.time() - t0, 2), 's')
else:
    print('[info] skip loading articles_emb.csv (LOAD_EMB=False)')


## 2) 召回 Serving（Two-Tower / SASRec / Popular）

召回阶段的“线上形态”通常是：
- **用户塔**：输入用户特征/序列 → 输出 user embedding
- **向量检索**：FAISS/ANN → TopK item_id
- **过滤规则**：去掉已看、黑名单、时间窗口等

下面我们提供 3 种候选生成：
- `two_tower`：Notebook 7 训练的双塔 + FAISS
- `sasrec`：Notebook 9 训练的 SASRec user encoder + FAISS
- `popular`：冷启动兜底


In [None]:
@dataclass(frozen=True)
class IdMap:
    name: str
    classes_: np.ndarray
    offset: int = 1

    @property
    def vocab_size(self) -> int:
        return int(len(self.classes_) + self.offset)

    def transform(self, values: np.ndarray) -> np.ndarray:
        values = np.asarray(values)
        idx = pd.Index(self.classes_)
        # get_indexer returns -1 for missing
        pos = idx.get_indexer(values)
        out = np.where(pos >= 0, pos + self.offset, 0).astype('int32')
        return out


def pad_left(seqs: List[List[int]], max_len: int, pad: int = 0) -> np.ndarray:
    out = np.full((len(seqs), max_len), pad, dtype=np.int32)
    for i, s in enumerate(seqs):
        if not s:
            continue
        s = s[-max_len:]
        out[i, -len(s):] = np.asarray(s, dtype=np.int32)
    return out


def load_two_tower_runtime() -> Optional[Tuple[Dict[str, IdMap], tf.keras.Model, faiss.Index]]:
    two_dir = PROJECT_PATH / 'artifacts' / 'two_tower' / 'dssm_inbatch'
    maps_path = two_dir / 'id_maps.pkl'
    user_path = two_dir / 'user_tower.keras'
    index_path = two_dir / 'faiss_index.bin'
    if not (maps_path.exists() and user_path.exists() and index_path.exists()):
        return None
    id_maps = pickle.load(open(maps_path, 'rb'))
    user_tower = tf.keras.models.load_model(user_path, compile=False)
    index = faiss.read_index(str(index_path))
    return id_maps, user_tower, index


def load_sasrec_runtime() -> Optional[Tuple[IdMap, tf.keras.Model, faiss.Index]]:
    seq_dir = PROJECT_PATH / 'artifacts' / 'sequence' / 'sasrec_inbatch'
    map_path = seq_dir / 'item_id_map.pkl'
    user_path = seq_dir / 'user_tower.keras'
    index_path = seq_dir / 'faiss_index.bin'
    if not (map_path.exists() and user_path.exists() and index_path.exists()):
        return None
    item_id_map = pickle.load(open(map_path, 'rb'))
    user_tower = tf.keras.models.load_model(user_path, compile=False)
    index = faiss.read_index(str(index_path))
    return item_id_map, user_tower, index


def retrieve_two_tower(user_id: int, topk: int = 100, max_seq_len: int = 30) -> pd.DataFrame:
    runtime = load_two_tower_runtime()
    if runtime is None:
        print('[skip] missing two_tower artifacts, run 7.two_tower_recall.ipynb first')
        return pd.DataFrame(columns=['user_id', 'article_id', 'recall_score', 'recall_rank'])
    id_maps, user_tower, index = runtime
    user_id_map: IdMap = id_maps['user']
    item_id_map: IdMap = id_maps['item']

    hist = user_hist.get(int(user_id), [])
    if not hist:
        return retrieve_popular(user_id=user_id, topk=topk)

    u_enc = user_id_map.transform(np.asarray([int(user_id)], dtype=np.int64))
    h_enc = item_id_map.transform(np.asarray(hist, dtype=np.int64)).tolist()
    X_hist = pad_left([h_enc], max_len=max_seq_len)

    feats = {'user_id': u_enc, 'hist_article_id': X_hist}
    u_vec = user_tower.predict(feats, verbose=0).astype('float32')
    faiss.normalize_L2(u_vec)

    search_k = int(topk + max_seq_len + 10)
    D, I = index.search(u_vec, search_k)
    hist_set = set(hist)

    rows = []
    for score, aid in zip(D[0].tolist(), I[0].tolist()):
        aid = int(aid)
        if aid <= 0:
            continue
        if aid in hist_set:
            continue
        rows.append((int(user_id), aid, float(score)))
        if len(rows) >= topk:
            break

    out = pd.DataFrame(rows, columns=['user_id', 'article_id', 'recall_score'])
    out['recall_rank'] = np.arange(1, len(out) + 1)
    return out


def retrieve_sasrec(user_id: int, topk: int = 100, max_seq_len: int = 50) -> pd.DataFrame:
    runtime = load_sasrec_runtime()
    if runtime is None:
        print('[skip] missing sasrec artifacts, run 9.sequence_modeling.ipynb first')
        return pd.DataFrame(columns=['user_id', 'article_id', 'recall_score', 'recall_rank'])
    item_id_map, user_tower, index = runtime

    hist = user_hist.get(int(user_id), [])
    if not hist:
        return retrieve_popular(user_id=user_id, topk=topk)

    h_enc = item_id_map.transform(np.asarray(hist, dtype=np.int64)).tolist()
    X_seq = pad_left([h_enc], max_len=max_seq_len)

    u_vec = user_tower.predict({'seq_ids': X_seq}, verbose=0).astype('float32')
    faiss.normalize_L2(u_vec)

    search_k = int(topk + max_seq_len + 10)
    D, I = index.search(u_vec, search_k)
    hist_set = set(hist)

    rows = []
    for score, aid in zip(D[0].tolist(), I[0].tolist()):
        aid = int(aid)
        if aid <= 0:
            continue
        if aid in hist_set:
            continue
        rows.append((int(user_id), aid, float(score)))
        if len(rows) >= topk:
            break

    out = pd.DataFrame(rows, columns=['user_id', 'article_id', 'recall_score'])
    out['recall_rank'] = np.arange(1, len(out) + 1)
    return out


def retrieve_popular(user_id: int, topk: int = 100) -> pd.DataFrame:
    hist = set(user_hist.get(int(user_id), []))
    rows = []
    for i, aid in enumerate(popular_items[: max(0, topk * 2)]):
        aid = int(aid)
        if aid in hist:
            continue
        # popular 没有自然相似度分数，这里用一个递减分数占位
        rows.append((int(user_id), aid, float(1.0 / (i + 1))))
        if len(rows) >= topk:
            break
    out = pd.DataFrame(rows, columns=['user_id', 'article_id', 'recall_score'])
    out['recall_rank'] = np.arange(1, len(out) + 1)
    return out


def retrieve_candidates(user_id: int, method: str = 'two_tower', topk: int = 100) -> pd.DataFrame:
    method = method.lower()
    if method == 'two_tower':
        return retrieve_two_tower(user_id=user_id, topk=topk)
    if method == 'sasrec':
        return retrieve_sasrec(user_id=user_id, topk=topk)
    if method == 'popular':
        return retrieve_popular(user_id=user_id, topk=topk)
    raise ValueError(f'unknown retrieval method: {method}')


## 3) 构建排序特征（复用离线特征定义）

这里复用基础版 notebook 5 的特征逻辑，保证一致性：
- merge：`user_features` + `user_last_click` + `item_features`
- 计算：`is_same_category` / `item_age_hours` / `time_gap_hours` / `emb_sim_last`


In [None]:
def compute_emb_sim_last(df: pd.DataFrame) -> np.ndarray:
    if emb_matrix is None or id2idx is None:
        return np.zeros(len(df), dtype='float32')
    cand_idx = df['article_id'].map(id2idx)
    last_idx = df['last_click_article_id'].map(id2idx)
    mask = cand_idx.notna() & last_idx.notna()
    sim = np.zeros(len(df), dtype='float32')
    if mask.any():
        a = emb_matrix[cand_idx[mask].astype(int)]
        b = emb_matrix[last_idx[mask].astype(int)]
        sim[mask.values] = (a * b).sum(axis=1)
    return sim


def build_ranking_features(cand_df: pd.DataFrame) -> pd.DataFrame:
    if cand_df.empty:
        return cand_df
    df = (
        cand_df.merge(user_features, on='user_id', how='left')
        .merge(user_last_click, on='user_id', how='left')
        .merge(item_features, on='article_id', how='left')
    )

    df['is_same_category'] = (df['category_id'] == df['user_top_category']).astype(int)
    df['item_age_hours'] = (df['last_click_timestamp'] - df['created_at_ts']) / 3600_000
    df['time_gap_hours'] = (df['last_click_timestamp'] - df['item_last_click_ts']) / 3600_000
    df[['item_age_hours', 'time_gap_hours']] = df[['item_age_hours', 'time_gap_hours']].fillna(0)

    df['emb_sim_last'] = compute_emb_sim_last(df)
    return df


## 4) 排序 Serving（LGBM / DeepFM / DIN）

- **LGBMRanker**：基础版 notebook 6 的模型（最稳的 fallback）
- **DeepFM / DIN**：Plus notebook 8 的深度排序模型（更贴近工业面试）

DeepFM/DIN 的推理需要：
- 和训练一致的 `scaler.pkl`（dense 标准化）
- DeepFM 的 `deepfm_factorizers.pkl`（稀疏特征编码表）
- DIN 的 `din_encoders.pkl`（user/item 的 id 编码）


In [None]:
def load_lgbm_ranker() -> Optional[lgb.Booster]:
    path = PROJECT_PATH / 'lgb_ranker.txt'
    if not path.exists():
        return None
    return lgb.Booster(model_file=str(path))


def rank_with_lgbm(df: pd.DataFrame, topn: int = 10) -> pd.DataFrame:
    booster = load_lgbm_ranker()
    if booster is None or not feature_cols:
        print('[skip] missing lgb_ranker.txt or feature_cols.pkl')
        return df.head(topn)
    X = df[feature_cols].fillna(0).values
    s = booster.predict(X)
    out = df.copy()
    out['rank_score'] = s
    out = out.sort_values('rank_score', ascending=False).head(topn)
    return out


def load_deep_ranking_artifacts():
    art_dir = PROJECT_PATH / 'artifacts' / 'ranking' / 'deep_models'
    deepfm_path = art_dir / 'deepfm.keras'
    din_path = art_dir / 'din.keras'
    scaler_path = art_dir / 'scaler.pkl'
    deepfm_fac_path = art_dir / 'deepfm_factorizers.pkl'
    din_enc_path = art_dir / 'din_encoders.pkl'
    if not art_dir.exists():
        return None
    if not (scaler_path.exists() and deepfm_fac_path.exists() and din_enc_path.exists()):
        return None
    scaler = pickle.load(open(scaler_path, 'rb'))
    deepfm_fac = pickle.load(open(deepfm_fac_path, 'rb'))
    din_enc = pickle.load(open(din_enc_path, 'rb'))
    deepfm = tf.keras.models.load_model(deepfm_path) if deepfm_path.exists() else None
    din = tf.keras.models.load_model(din_path) if din_path.exists() else None
    return {'dir': art_dir, 'scaler': scaler, 'deepfm_factorizers': deepfm_fac, 'din_encoders': din_enc, 'deepfm': deepfm, 'din': din}


def encode_with_uniques(series: pd.Series, uniq_list: List[str]) -> np.ndarray:
    idx = {v: i + 1 for i, v in enumerate(uniq_list)}
    return np.asarray([idx.get(str(x), 0) for x in series.values], dtype=np.int32)


def rank_with_deepfm(df: pd.DataFrame, topn: int = 10) -> pd.DataFrame:
    art = load_deep_ranking_artifacts()
    if art is None or art['deepfm'] is None:
        print('[skip] missing deepfm artifacts, run 8.deep_ranking.ipynb first')
        return df.head(topn)

    sparse_cols = art['deepfm_factorizers']['sparse_cols']
    dense_cols = art['deepfm_factorizers']['dense_cols']
    uniques = art['deepfm_factorizers']['uniques']
    scaler = art['scaler']

    inputs = {c: encode_with_uniques(df[c].fillna(0).astype(str), uniques[c]) for c in sparse_cols}
    inputs['dense'] = scaler.transform(df[dense_cols].fillna(0).values.astype('float32'))

    p = art['deepfm'].predict(inputs, batch_size=4096, verbose=0)
    p = np.asarray(p).reshape(-1)
    out = df.copy()
    out['rank_score'] = p
    out = out.sort_values('rank_score', ascending=False).head(topn)
    return out


def rank_with_din(df: pd.DataFrame, topn: int = 10, max_hist_len: int = 50) -> pd.DataFrame:
    art = load_deep_ranking_artifacts()
    if art is None or art['din'] is None:
        print('[skip] missing din artifacts, run 8.deep_ranking.ipynb first')
        return df.head(topn)
    scaler = art['scaler']
    raw_to_user_enc = art['din_encoders']['raw_to_user_enc']
    raw_to_item_enc = art['din_encoders']['raw_to_item_enc']

    # DIN 输入：user_id_enc / hist_items_enc / target_item_enc / dense
    user_id = int(df['user_id'].iloc[0])
    hist = user_hist.get(user_id, [])[-max_hist_len:]
    hist_enc = [raw_to_item_enc.get(int(x), 0) for x in hist]
    hist_mat = pad_left([hist_enc] * len(df), max_len=max_hist_len)

    user_enc = np.full(len(df), raw_to_user_enc.get(user_id, 0), dtype=np.int32)
    item_enc = np.asarray([raw_to_item_enc.get(int(x), 0) for x in df['article_id'].values], dtype=np.int32)

    dense_cols = ['recall_score','recall_rank','user_click_count','user_unique_items','item_click_count','words_count','item_age_hours','time_gap_hours','emb_sim_last','is_same_category']
    dense = scaler.transform(df[dense_cols].fillna(0).values.astype('float32'))

    inputs = {
        'user_id': user_enc,
        'hist_items': hist_mat,
        'target_item': item_enc,
        'dense': dense,
    }
    p = art['din'].predict(inputs, batch_size=4096, verbose=0)
    p = np.asarray(p).reshape(-1)
    out = df.copy()
    out['rank_score'] = p
    out = out.sort_values('rank_score', ascending=False).head(topn)
    return out


def rerank(df: pd.DataFrame, ranker: str = 'lgbm', topn: int = 10) -> pd.DataFrame:
    ranker = ranker.lower()
    if df.empty:
        return df
    if ranker == 'lgbm':
        return rank_with_lgbm(df, topn=topn)
    if ranker == 'deepfm':
        return rank_with_deepfm(df, topn=topn)
    if ranker == 'din':
        return rank_with_din(df, topn=topn)
    raise ValueError(f'unknown ranker: {ranker}')


## 5) End-to-End：`recommend(user_id)`

这个函数体现了典型线上链路：
1) 召回 TopK
2) 构建特征
3) 排序 TopN

你可以在面试里用它解释：
- 召回/排序分工
- Feature 的 offline/online 一致性
- 冷启动兜底策略


In [None]:
def recommend(user_id: int, recall: str = 'two_tower', ranker: str = 'lgbm', topk: int = 100, topn: int = 10) -> pd.DataFrame:
    cand = retrieve_candidates(user_id=user_id, method=recall, topk=topk)
    feats = build_ranking_features(cand)
    ranked = rerank(feats, ranker=ranker, topn=topn)
    # 方便展示：拼回文章元信息
    if not articles.empty:
        ranked = ranked.merge(articles, on='article_id', how='left', suffixes=('', '_meta'))
    return ranked


# 选择一个有历史的用户做 demo
if not valid_last.empty:
    demo_user = int(valid_last['user_id'].sample(1, random_state=42).iloc[0])
elif user_hist:
    demo_user = int(next(iter(user_hist.keys())))
else:
    demo_user = 1

print('demo_user:', demo_user, 'hist_len:', len(user_hist.get(demo_user, [])))

# 你可以切换 recall={'two_tower','sasrec','popular'}，ranker={'lgbm','deepfm','din'}
res = recommend(demo_user, recall='two_tower', ranker='lgbm', topk=100, topn=10)
res[['user_id','article_id','recall_score','recall_rank','rank_score','category_id','words_count']].head(10)


## 6) 导出（SavedModel / 可选 ONNX）与简要性能测试

工业里常见做法：
- 训练产物：`*.keras` / SavedModel
- 检索产物：item embedding + ANN index（FAISS/HNSW/ScaNN）
- 推理服务：TensorFlow Serving / 自研服务 + 特征服务

下面演示：
- 如何把 Keras 模型导出为 SavedModel
- 如果环境装了 `tf2onnx`，可选导出 ONNX


In [None]:
SERVING_DIR = PROJECT_PATH / 'artifacts' / 'serving'
SERVING_DIR.mkdir(parents=True, exist_ok=True)

# 示例：导出 DeepFM / DIN（如果已经训练过）
art = load_deep_ranking_artifacts()
if art is None:
    print('[skip] deep ranking artifacts not found, run 8.deep_ranking.ipynb first')
else:
    if art['deepfm'] is not None:
        export_path = SERVING_DIR / 'deepfm_savedmodel'
        art['deepfm'].save(export_path)
        print('saved:', export_path)
    if art['din'] is not None:
        export_path = SERVING_DIR / 'din_savedmodel'
        art['din'].save(export_path)
        print('saved:', export_path)

# 可选：ONNX（依赖缺失时直接跳过）
try:
    import tf2onnx  # type: ignore
except Exception as e:
    tf2onnx = None

if tf2onnx is None:
    print('[optional] tf2onnx not installed, skip ONNX export')
else:
    if art is not None and art['din'] is not None:
        spec = (
            tf.TensorSpec((None,), tf.int32, name='user_id'),
            tf.TensorSpec((None, 50), tf.int32, name='hist_items'),
            tf.TensorSpec((None,), tf.int32, name='target_item'),
            tf.TensorSpec((None, 10), tf.float32, name='dense'),
        )
        onnx_path = SERVING_DIR / 'din.onnx'
        _ = tf2onnx.convert.from_keras(art['din'], input_signature=spec, output_path=str(onnx_path))
        print('saved:', onnx_path)
