In [None]:
# NOTE
# This notebook was executed and validated in Google Colab.
# Environment-specific commands (e.g., pip install, file download)
# are included for reproducibility and convenience.

# Automated EDA was performed using ydata-profiling.
# The generated HTML reports are saved and attached in the reports/eda directory.

!pip -q install --upgrade pip
!pip -q install "numpy==2.0.2" "scipy==1.14.1" "scikit-learn==1.6.1" pandas matplotlib seaborn

In [None]:
import os
os.kill(os.getpid(), 9)

In [None]:
import numpy as np, scipy, sklearn
print("numpy:", np.__version__)
print("scipy:", scipy.__version__)
print("sklearn:", sklearn.__version__)

In [None]:
!pip -q install h2o autogluon tpot shap ydata-profiling
print("installed automl + xai + eda")

In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, roc_curve, auc, precision_recall_curve

import shap
from ydata_profiling import ProfileReport

print("imports OK")

In [None]:
TRAIN_PATH = "project-root/data/raw/train.csv"
TEST_PATH  = "project-root/data/raw/test.csv"

train = pd.read_csv(TRAIN_PATH)
test  = pd.read_csv(TEST_PATH)

print(train.shape, test.shape)
print(train.columns)
train.head()

In [None]:
profile_train = ProfileReport(train, title="Titanic Train EDA", explorative=True)
profile_train.to_file("/content/titanic_train_eda.html")

combo = pd.concat([train.assign(_dataset="train"), test.assign(_dataset="test")], ignore_index=True)
profile_combo = ProfileReport(combo, title="Titanic Train+Test EDA", explorative=True)
profile_combo.to_file("/content/titanic_train_test_eda.html")

print("Saved HTML reports")

In [None]:
from google.colab import files
files.download("/content/titanic_train_eda.html")
files.download("/content/titanic_train_test_eda.html")