In [None]:
import pandas as pd
df = pd.read_csv("data/Titanic-Dataset.csv")

# FEATURE ENGINEERING
# Handle missing Age values (use median)
df['Age'] = df['Age'].fillna(df['Age'].median())

# Create FamilySize feature
df['FamilySize'] = df['SibSp'] + df['Parch'] + 1

# Convert Sex to binary
df['Sex'] = df['Sex'].map({'male': 0, 'female': 1})

# Drop unnecessary columns
df.drop(columns=[
    'PassengerId',
    'Name',
    'Cabin',
    'Embarked',
    'Ticket'
], inplace=True)

# Final check
df.isnull().sum()


In [None]:
# DECISION TREE , TRAINING AND TESTING
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Split features and target
X = df.drop('Survived', axis=1)
y = df['Survived']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Initialize Decision Tree model
dt_model = DecisionTreeClassifier(
    random_state=42,
    max_depth=None
)

# Train the model
dt_model.fit(X_train, y_train)

# Predictions
y_pred = dt_model.predict(X_test)

# Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

In [None]:
#  Random Forest
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Initialize Random Forest model
rf_model = RandomForestClassifier(
    n_estimators=100,
    random_state=42,
    n_jobs=-1
)

# Train the model
rf_model.fit(X_train, y_train)

# Predictions
y_pred_rf = rf_model.predict(X_test)

# Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred_rf))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred_rf))
print("\nClassification Report:\n", classification_report(y_test, y_pred_rf))

In [None]:
# SAVING MODEL
import joblib

joblib.dump(dt_model, "decision_tree_model.pkl")
joblib.dump(rf_model, "random_forest_model.pkl")