## Import the required libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.tree import plot_tree

## Load and preview data

In [None]:
# Load dataset from a local folder
file_path = 'Titanic/Titanic-Dataset.csv' # sets the path of the local folder
data = pd.read_csv(file_path) #reads the file into a dataframe

# Display first 5 rows
print(data.head())

print("\n\nView the dataframe Info")
data.info()

## Data Preprocessing

In [None]:
# Select features
features = ['Pclass', 'Sex', 'Age', 'Fare']
X = data[features]

# Convert categorical variable
X = pd.get_dummies(X, columns=['Sex'], drop_first=True)

# Display list of missing values by column
print("\n\nMissing values per column")
print(X.isnull().sum())

# Impute missing values
X.fillna(X['Age'].median(), inplace=True)

# Check that imputation was done
print("\n\nConfirm missing values have been imputed")
missing_count_per_column = X.isnull().sum()
print(missing_count_per_column)

# Target variable
y = data['Survived']

## Split the data into sets for training and testing

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

## Build the decision tree model

In [None]:
# Create the classifier model
tree_model = DecisionTreeClassifier(
    criterion='gini',  # can use 'entropy'
    max_depth=4,
    random_state=42
)

# Train the model
tree_model.fit(X_train, y_train)

## Make the predictions

In [None]:
y_pred = tree_model.predict(X_test)

## Evaluate the model

In [None]:
# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy * 100, "%")

# Confusion matrix
print(confusion_matrix(y_test, y_pred))

# Classification report
print(classification_report(y_test, y_pred))

## Visualize the model

In [None]:
plt.figure(figsize=(12,8))
plot_tree(
    tree_model,
    feature_names=X.columns,
    class_names=['Not Survived', 'Survived'],
    filled=True
)
plt.show()