# Titanic Survival Prediction

## Using Classification


In [None]:
# 1. Import libraries

import pandas as pd#pandas: for loading and handling data
import numpy as np #numpy: for numerical operations
from sklearn.model_selection import train_test_split #train_test_split: to split data into training and test sets
from sklearn.preprocessing import LabelEncoder #LabelEncoder: to convert text labels (like "male") into numbers
from sklearn.ensemble import RandomForestClassifier#RandomForestClassifier: the ML model
from sklearn.metrics import accuracy_score #accuracy_score: to measure model performance


## load the data set




In [None]:
# 2. Load the dataset
data = pd.read_csv('train.csv') # Reads the Titanic dataset into a DataFrame


In [None]:
# 3. Look at the data
print(data.head()) # display the first five rows of the data set


In [None]:
# 4. Select features and target
features = ['Pclass', 'Sex', 'Age', 'Fare']
X = data[features]
y = data['Survived']

#We choose 4 simple features:
#Pclass: Passenger class (1st, 2nd, 3rd)
#Sex: male/female
#Age, Fare: numeric features
#X: input features
#y: target (what we want to predict)




In [None]:
# 5. Handle missing values
X['Age'].fillna(X['Age'].median(), inplace=True)
#The Age column has missing values (NaNs)
#We replace them with the median age



In [None]:
# 6. Convert categorical to numbers
le = LabelEncoder()
X['Sex'] = le.fit_transform(X['Sex'])
#Sex is a text column ("male", "female")
#We encode it to 0 and 1:
#male → 1
#female → 0



In [None]:
# 7. Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
#Split the data:
#80% for training
#20% for testing
#random_state=42 makes the split reproducible

In [None]:
# 8. Train the model
model = RandomForestClassifier()
model.fit(X_train, y_train)
#We create a Random Forest classifier
#.fit() trains the model using the training data

In [None]:
# 9. Make predictions
y_pred = model.predict(X_test)
#We use the trained model to predict survival on the test set

In [None]:
# 10. Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
#We check how many predictions were correct using accuracy
#Print it (e.g., "Accuracy: 0.82")

In [None]:
Accuracy: 0.82


In [None]:
# Predict survival of a new passenger
new_passenger = pd.DataFrame({
    'Pclass': [3],
    'Sex': ['male'],
    'Age': [25],
    'Fare': [7.25]
})

new_passenger['Sex'] = le.transform(new_passenger['Sex'])
prediction = model.predict(new_passenger)
print(f"Survived? {prediction[0]}")
