# 🛠️ Titanic Survival Prediction - Feature Engineering

This notebook is part of the Titanic Survival Prediction project. In this phase, We'll transform raw data into meaningful features to help with future analysis or model training.

In [1]:
import numpy as np
import pandas as pd

df = pd.read_csv("/kaggle/input/titanic-cleaned/titanic_cleaned.csv")

In [2]:
df.sample(5)

Unnamed: 0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Fare,Embarked
326,0,3,"Nysveen, Mr. Johan Hansen",male,61.0,0,0,6.2375,S
522,0,3,"Lahoud, Mr. Sarkis",male,28.0,0,0,7.225,C
217,0,2,"Jacobsohn, Mr. Sidney Samuel",male,42.0,1,0,27.0,S
722,0,2,"Gillespie, Mr. William Henry",male,34.0,0,0,13.0,S
285,0,3,"Stankovic, Mr. Ivan",male,33.0,0,0,8.6625,C


## 👨‍👩‍👧‍👦 Feature: Family Size and Alone Indicator

In [3]:
df["FamilySize"] = df["SibSp"] + df["Parch"] + 1
df["isAlone"] = (df["FamilySize"] > 1).astype(int)

## 🧑 Feature: Extract Title from Name

In [4]:
df["Title"] = df['Name'].str.split(", ", expand=True)[1].str.split(".", expand=True)[0]

## 🪙📊 Feature Binning: Fare and Age

In [5]:
df["FareBin"] = pd.qcut(df["Fare"], 4)
df["AgeBin"] = pd.cut(df["Age"].astype(int), 5)

## 🧑 Feature: Extract Title from Name

In [6]:
top_min = 4
top_title_names = df["Title"].value_counts().head(top_min).index

df["Title"] = df["Title"].apply(lambda x: "Misc" if x not in top_title_names else x)

In [7]:
df["Title"].value_counts()

Title
Mr        517
Miss      182
Mrs       125
Master     40
Misc       27
Name: count, dtype: int64

## 🔍 Preview Data

In [8]:
df.sample(5)

Unnamed: 0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Fare,Embarked,FamilySize,isAlone,Title,FareBin,AgeBin
218,1,1,"Bazzani, Miss. Albina",female,32.0,0,0,76.2917,C,1,0,Miss,"(31.0, 512.329]","(16.0, 32.0]"
693,0,3,"Saad, Mr. Khalil",male,25.0,0,0,7.225,C,1,0,Mr,"(-0.001, 7.91]","(16.0, 32.0]"
778,0,3,"Kilgannon, Mr. Thomas J",male,28.0,0,0,7.7375,Q,1,0,Mr,"(-0.001, 7.91]","(16.0, 32.0]"
884,0,3,"Sutehall, Mr. Henry Jr",male,25.0,0,0,7.05,S,1,0,Mr,"(-0.001, 7.91]","(16.0, 32.0]"
222,0,3,"Green, Mr. George Henry",male,51.0,0,0,8.05,S,1,0,Mr,"(7.91, 14.454]","(48.0, 64.0]"


In [9]:
df.to_csv("titanic_fe.csv", index=False)