In [2]:
# Task 7: Titanic Survival Prediction using Random Forest

# Step 1: Import Libraries
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, precision_score, recall_score, f1_score

# Step 2: Load Titanic Dataset (Make sure train.csv is in your working directory)
data = pd.read_csv("train.csv")

# Step 3: Data Preprocessing
# Drop unnecessary columns
data = data.drop(['Name', 'Ticket', 'Cabin', 'PassengerId'], axis=1)

# Fill missing values
data['Age'] = data['Age'].fillna(data['Age'].median())
data['Embarked'] = data['Embarked'].fillna(data['Embarked'].mode()[0])

# Convert categorical to numeric
data['Sex'] = data['Sex'].map({'male': 0, 'female': 1})
data['Embarked'] = data['Embarked'].map({'S': 0, 'C': 1, 'Q': 2})

# Define features and target
X = data.drop("Survived", axis=1)
y = data["Survived"]

# Step 4: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Train Random Forest Model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Step 6: Predictions
y_pred = model.predict(X_test)

# Step 7: Evaluation Metrics
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)




Precision: 0.8115942028985508
Recall: 0.7567567567567568
F1 Score: 0.7832167832167832
