In [None]:
# 示例：电商CTR建模

import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, classification_report

# 1. 读取数据
data = pd.read_csv("../data/sample_ctr.csv")

# 2. 简单特征筛选与处理
use_cols = [
    'user_id', 'item_id', 'age', 'user_level', 'price', 'dayofweek', 'hour',
    'holiday', 'temp_c', 'prev_clicks', 'is_click'
]
X = data[use_cols].copy()
X['user_id'] = X['user_id'] % 1000   # 映射/脱敏
X['item_id'] = X['item_id'] % 1000

y = X.pop('is_click')

# 3. 划分数据集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. 模型训练
clf = lgb.LGBMClassifier(n_estimators=60, learning_rate=0.12, random_state=42)
clf.fit(X_train, y_train)

# 5. 评估
y_pred = clf.predict(X_test)
y_proba = clf.predict_proba(X_test)[:, 1]
auc = roc_auc_score(y_test, y_proba)
print("Test AUC: ", auc)
print(classification_report(y_test, y_pred))

# 6. 简单画图（AUC曲线等）
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve

fpr, tpr, thresholds = roc_curve(y_test, y_proba)
plt.figure(figsize=(6,4))
plt.plot(fpr, tpr, label='LightGBM (AUC = %.3f)' % auc)
plt.plot([0,1],[0,1],'--',color='gray')
plt.xlabel('FPR')
plt.ylabel('TPR')
plt.title('ROC Curve - CTR Prediction')
plt.legend()
plt.show()