<a href="https://colab.research.google.com/github/prime29haruno/Kaggle_Titanic/blob/main/Titanic15.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from xgboost import XGBClassifier
import os

# ===== データ読み込み =====
train = pd.read_csv("/kaggle/input/titanic/train.csv")
test  = pd.read_csv("/kaggle/input/titanic/test.csv")

# ===== 欠損処理 =====
train["Age"] = train["Age"].fillna(train["Age"].median())
test["Age"]  = test["Age"].fillna(train["Age"].median())
train["Fare"] = train["Fare"].fillna(train["Fare"].median())
test["Fare"]  = test["Fare"].fillna(test["Fare"].median())
train["Embarked"] = train["Embarked"].fillna("S")
test["Embarked"]  = test["Embarked"].fillna("S")

# ===== 家族サイズ & IsAlone =====
train["FamilySize"] = train["SibSp"] + train["Parch"] + 1
test["FamilySize"]  = test["SibSp"] + test["Parch"] + 1
train["IsAlone"] = (train["FamilySize"] == 1).astype(int)
test["IsAlone"]  = (test["FamilySize"] == 1).astype(int)

# ===== Title =====
for df in [train, test]:
    df["Title"] = df["Name"].str.extract(" ([A-Za-z]+)\.")
    df["Title"] = df["Title"].replace(["Mlle","Ms"], "Miss")
    df["Title"] = df["Title"].replace(["Mme"], "Mrs")
    df["Title"] = df["Title"].replace(
        ["Dr","Rev","Col","Major","Capt","Sir","Lady","Don",
         "Countess","Jonkheer","Dona"], "Rare"
    )
all_titles = pd.concat([train["Title"], test["Title"]]).unique()
title_map = {title: idx for idx, title in enumerate(all_titles)}
train["Title"] = train["Title"].map(title_map)
test["Title"]  = test["Title"].map(title_map)

# ===== AgeGroup & FareGroup =====
train["AgeGroup"] = pd.qcut(train["Age"], 4, labels=False)
test["AgeGroup"]  = pd.qcut(test["Age"], 4, labels=False)
train["FareGroup"] = pd.qcut(train["Fare"], 4, labels=False)
test["FareGroup"]  = pd.qcut(test["Fare"], 4, labels=False)

# ===== Sex, Embarked =====
train["Sex"] = train["Sex"].map({"male": 0, "female": 1})
test["Sex"]  = test["Sex"].map({"male": 0, "female": 1})
train["Embarked"] = train["Embarked"].map({"S": 0, "C": 1, "Q": 2})
test["Embarked"]  = test["Embarked"].map({"S": 0, "C": 1, "Q": 2})

# ===== 特徴量 =====
features = [
    "Pclass", "Sex", "Age", "Fare", "FamilySize", "IsAlone",
    "Embarked", "Title", "AgeGroup", "FareGroup"
]

X = train[features]
y = train["Survived"]
X_test = test[features]

# ===== XGBoost モデル =====
xgb = XGBClassifier(
    n_estimators=500,
    max_depth=4,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42,
    use_label_encoder=False,
    eval_metric="logloss"
)
xgb.fit(X, y)

# ===== 予測 & 提出ファイル作成 =====
pred = xgb.predict(X_test)

submission = pd.DataFrame({
    "PassengerId": test["PassengerId"],
    "Survived": pred
})
submission.to_csv("/kaggle/working/submission.csv", index=False)

# ===== 確認 =====
print("作成OK?:", os.path.exists("/kaggle/working/submission.csv"))
print("working 内:", os.listdir("/kaggle/working"))
print(submission.head())