In [None]:
import pandas as pd
import xgboost as xgb

# ======================
# 1. 数据加载（修改文件路径）
# ======================
train_df = pd.read_csv("/kaggle/input/houkong-moai/customer_churn.csv")  # 训练集
test_df = pd.read_csv("/kaggle/input/houkong-moai/customer_churn.csv")    # 测试集

# ======================
# 2. 列配置（根据实际数据修改！）
# ======================
TARGET_COL = "churn"           # 目标列名（必须是二分类0/1）
ID_COL = "customer_id"         # 测试集ID列名
DROP_COLS = ["gender", "date"] # 要删除的无用列
FEATURE_COLS = [               # 使用的特征列
    "age",
    "monthly_bill",
    "total_usage",
    "service_calls"
]

# ======================
# 3. 数据预处理（自动容错）
# ======================
# 训练集处理
X_train = train_df[[col for col in FEATURE_COLS if col in train_df.columns]]
y_train = train_df[TARGET_COL].astype(int)  # 强制转换为整数

# 测试集处理
test_ids = test_df[ID_COL]
X_test = test_df[X_train.columns]  # 保持与训练集相同特征

# 删除无用列（安全删除）
for df in [X_train, X_test]:
    df.drop(columns=[col for col in DROP_COLS if col in df.columns], inplace=True)

# ======================
# 4. 训练XGBoost分类器
# ======================
model = xgb.XGBClassifier(
    objective="binary:logistic",  # 二分类任务
    n_estimators=150,             # 树的数量
    max_depth=4,                  # 树的最大深度
    learning_rate=0.1,            # 学习率
    subsample=0.8,                # 防止过拟合
    random_state=42               # 随机种子
)
model.fit(X_train, y_train)

# ======================
# 5. 预测并保存0/1结果
# ======================
pd.DataFrame({
    ID_COL: test_ids,
    f"pred_{TARGET_COL}": model.predict(X_test)  # 直接输出0/1分类
}).to_csv("xgb_submission.csv", index=False)

# ======================
# 6. 打印运行摘要
# ======================
print("="*50)
print(f"XGBoost分类结果已保存至 xgb_submission.csv")
print(f"使用的特征: {list(X_train.columns)}")
print(f"样本数量: 训练集={len(X_train)}, 测试集={len(X_test)}")
print("="*50)
