<a href="https://colab.research.google.com/github/prime29haruno/Kaggle_Titanic/blob/main/Titanic12.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# データ読み込み
train = pd.read_csv("/kaggle/input/titanic/train.csv")
test  = pd.read_csv("/kaggle/input/titanic/test.csv")

# ===== 欠損処理 =====
train["Age"] = train["Age"].fillna(train["Age"].median())
test["Age"]  = test["Age"].fillna(train["Age"].median())
train["Fare"] = train["Fare"].fillna(train["Fare"].median())
test["Fare"]  = test["Fare"].fillna(train["Fare"].median())
train["Embarked"] = train["Embarked"].fillna("S")
test["Embarked"]  = test["Embarked"].fillna("S")

# ===== 家族サイズ =====
train["FamilySize"] = train["SibSp"] + train["Parch"] + 1
test["FamilySize"]  = test["SibSp"] + test["Parch"] + 1

# Title 抽出
for df in [train, test]:
    df["Title"] = df["Name"].str.extract(" ([A-Za-z]+)\.")

# Title をまとめる
for df in [train, test]:
    df["Title"] = df["Title"].replace(["Mlle","Ms"], "Miss")
    df["Title"] = df["Title"].replace(["Mme"], "Mrs")
    df["Title"] = df["Title"].replace(
        ["Dr","Rev","Col","Major","Capt","Sir","Lady","Don","Countess","Jonkheer","Dona"],
        "Rare"
    )
# train+test 両方のユニーク値を使って map を作成
all_titles = pd.concat([train["Title"], test["Title"]]).unique()
title_map = {title: idx for idx, title in enumerate(all_titles)}

# 数値化
train["Title"] = train["Title"].map(title_map)
test["Title"]  = test["Title"].map(title_map)



# ===== Sex, Embarked を数値化 =====
train["Sex"] = train["Sex"].map({"male": 0, "female": 1})
test["Sex"]  = test["Sex"].map({"male": 0, "female": 1})
train["Embarked"] = train["Embarked"].map({"S": 0, "C": 1, "Q": 2})
test["Embarked"]  = test["Embarked"].map({"S": 0, "C": 1, "Q": 2})

# ===== 特徴量 =====
features = ["Pclass", "Sex", "Age", "Fare", "FamilySize", "Embarked", "Title"]

X = train[features]
y = train["Survived"]
X_test = test[features]

# ===== 学習データとバリデーションに分割 =====
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# ===== ランダムフォレスト =====
rf = RandomForestClassifier(
    n_estimators=300,
    max_depth=6,
    min_samples_leaf=2,
    random_state=42
)
rf.fit(X_train, y_train)

# バリデーション精度
y_pred = rf.predict(X_val)
print("Validation accuracy:", accuracy_score(y_val, y_pred))

# 提出用予測
pred = rf.predict(X_test)

submission = pd.DataFrame({
    "PassengerId": test["PassengerId"],
    "Survived": pred
})
submission.to_csv("/kaggle/working/submission.csv", index=False)

print("submission.csv を作成しました！")