In [112]:
# Add the project root to the Python path so we can import from sibling folders like `utils/`
import sys
import os
sys.path.append(os.path.abspath(".."))


# Imports
# data analysis and wrangling
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# visualization
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from IPython.display import display


# machine learning
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split

# utils
import importlib
from utils import visualize, feature_engineering
importlib.reload(visualize)
importlib.reload(feature_engineering)

from utils.visualize import visualize_overview, plot_numerics, plot_categoricals, describe_custom, split_and_append_column, sum_columns, plot_violin, plot_count
from utils.feature_engineering import prepare_features, encode_features


In [113]:
# Load and engineer features
train_df = pd.read_csv("../data/train.csv")
train_df = prepare_features(train_df)
train_df = encode_features(train_df, ['HomePlanet', 'Destination', 'CabinDeck', 'CabinSide'])

describe_custom(train_df)



  df['VIP'] = df['VIP'].fillna(False)


Unnamed: 0,count,unique,top,freq,mean,std,min,25%,50%,75%,max,null,null%,dtype
PassengerId,8693.0,8693.0,0001_01,1.0,,,,,,,,0,0.0,object
CryoSleep,8693.0,,,,0.360635,0.480212,0.0,0.0,0.0,1.0,1.0,0,0.0,int64
Cabin,8494.0,6560.0,G/734/S,8.0,,,,,,,,199,2.289198,object
Age,8693.0,,,,28.790291,14.341404,0.0,20.0,27.0,37.0,79.0,0,0.0,float64
VIP,8693.0,,,,0.022892,0.149568,0.0,0.0,0.0,0.0,1.0,0,0.0,int64
RoomService,8512.0,,,,224.687617,666.717663,0.0,0.0,0.0,47.0,14327.0,181,2.082135,float64
FoodCourt,8510.0,,,,458.077203,1611.48924,0.0,0.0,0.0,76.0,29813.0,183,2.105142,float64
ShoppingMall,8485.0,,,,173.729169,604.696458,0.0,0.0,0.0,27.0,23492.0,208,2.39273,float64
Spa,8510.0,,,,311.138778,1136.705535,0.0,0.0,0.0,59.0,22408.0,183,2.105142,float64
VRDeck,8505.0,,,,304.854791,1145.717189,0.0,0.0,0.0,46.0,24133.0,188,2.16266,float64


In [114]:

# Define target and features
target = 'Transported'
exclude_cols = ['Cabin', 'Name', 'PassengerId', 'CabinNum']
features = [col for col in train_df.columns if col not in exclude_cols + ['Transported']]

# Extract X and y
X = train_df[features].to_numpy()
y = train_df[target].to_numpy().astype(int)  # Convert to 0/1

pd.DataFrame(X).head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24
0,0.0,39.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,0.0,24.0,0.0,109.0,9.0,25.0,549.0,44.0,736.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
2,0.0,58.0,1.0,43.0,3576.0,0.0,6715.0,49.0,10383.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,0.0,33.0,0.0,0.0,1283.0,371.0,3329.0,193.0,5176.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,0.0,16.0,0.0,303.0,70.0,151.0,565.0,2.0,1091.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0


In [135]:
# Train-test split
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.33, random_state=42)

X_train = X_train.T  # Now shape is (6, 891)
y_train = y_train.reshape(1, -1)  # Make y a row vector of shape (1, 891)

X_val = X_val.T  # Now shape is (6, 891)
y_val = y_val.reshape(1, -1)  # Make y a row vector of shape (1, 891)

# Print shapes
print(f"X_train: {X_train.shape}, X_val: {X_val.shape}")
print(f"y_train: {y_train.shape}, y_val: {y_val.shape}")

print(pd.DataFrame(X_train).dtypes.unique())
print(np.isnan(X_train).any(), np.isinf(X_train).any())

print("NaNs before fixing:", np.isnan(X_train).sum())
X_train = np.nan_to_num(X_train)
X_val = np.nan_to_num(X_val)
print("NaNs after fixing:", np.isnan(X_train).sum())


X_train: (25, 5824), X_val: (25, 2869)
y_train: (1, 5824), y_val: (1, 2869)
[dtype('float64')]
True False
NaNs before fixing: 605
NaNs after fixing: 0


In [153]:
# Load and engineer features
test_df = pd.read_csv("../data/test.csv")
test_df = prepare_features(test_df)
test_df = encode_features(test_df, ['HomePlanet', 'Destination', 'CabinDeck', 'CabinSide'])

describe_custom(test_df)

  df['VIP'] = df['VIP'].fillna(False)


Unnamed: 0,count,unique,top,freq,mean,std,min,25%,50%,75%,max,null,null%,dtype
PassengerId,4277.0,4277.0,0013_01,1.0,,,,,,,,0,0.0,object
CryoSleep,4277.0,,,,0.369885,0.48283,0.0,0.0,0.0,1.0,1.0,0,0.0,int64
Cabin,4177.0,3265.0,G/160/P,8.0,,,,,,,,100,2.338087,object
Age,4277.0,,,,28.60159,14.032629,0.0,20.0,26.0,37.0,79.0,0,0.0,float64
VIP,4277.0,,,,0.017302,0.130409,0.0,0.0,0.0,0.0,1.0,0,0.0,int64
RoomService,4195.0,,,,219.266269,607.011289,0.0,0.0,0.0,53.0,11567.0,82,1.917232,float64
FoodCourt,4171.0,,,,439.484296,1527.663045,0.0,0.0,0.0,78.0,25273.0,106,2.478373,float64
ShoppingMall,4179.0,,,,177.295525,560.821123,0.0,0.0,0.0,33.0,8292.0,98,2.291326,float64
Spa,4176.0,,,,303.052443,1117.186015,0.0,0.0,0.0,50.0,19844.0,101,2.361468,float64
VRDeck,4197.0,,,,310.710031,1246.994742,0.0,0.0,0.0,36.0,22272.0,80,1.87047,float64


In [157]:
test_features = [col for col in test_df.columns if col not in exclude_cols]

# Extract X and y
X_test = test_df[test_features].to_numpy().T

print(f"X_test: {X_test.shape}")

print("NaNs before fixing:", np.isnan(X_test).sum())
X_test = np.nan_to_num(X_test)
print("NaNs after fixing:", np.isnan(X_test).sum())

X_test: (25, 4277)
NaNs before fixing: 467
NaNs after fixing: 0


In [116]:
# Define Sigmoid function and its derivative

def sigmoid(x):
    x = np.clip(x, -500, 500) 
    return 1/ (1 + np.exp(-x))

def sigmoid_derivative(a):
    return a * (1 - a)

In [117]:
# Initialize parameters 

def initialize_parameters(n_x, n_h, n_y):

    w1 = np.random.randn(n_h, n_x) * 0.01
    b1 = np.zeros((n_h, 1))

    w2 = np.random.randn(n_y, n_h) * 0.01
    b2 = np.zeros((n_y, 1))

    initial_lr = 0.1
    learning_rate = initial_lr

    return(w1, b1, w2, b2, learning_rate)

In [118]:
# Forward propagation 

def forward_propagation(X, w1, b1, w2, b2):
    #Input to hidden layer
    z1 = np.dot(w1, X) + b1
    a1 = sigmoid(z1)

    #Hidden layer to output
    z2 = np.dot(w2, a1) + b2
    a2 = sigmoid(z2)

    cache = {"z1": z1, "a1": a1, "z2": z2, "a2": a2}

    return(a2, cache)

In [129]:
# Compute loss

def compute_loss(a2, y):
    loss = -np.mean(y * np.log(a2 + 1e-9) + (1 - y) * np.log(1 - a2 + 1e-9))
    return(loss)

In [130]:
# Back propagation

def back_propagation(w1, w2, cache, X, y):
    a1 = cache["a1"]
    a2 = cache["a2"]

    m = X.shape[1]
  
    d2 = a2 - y
    dw2 = np.dot(d2, a1.T)/ m
    db2 = np.sum(d2, axis=1, keepdims=True) / m

    d1 = np.dot(w2.T, d2) * (a1 * (1 - a1))
    dw1 = np.dot(d1, X.T)/ m
    db1 = np.sum(d1, axis=1, keepdims=True) / m

    grads = {"dw1": dw1, "db1": db1, "dw2": dw2, "db2": db2}
    return grads

In [131]:
# Update weights and biases

def update_parameters(w1, b1, w2, b2, grads, learning_rate):
    dw1 = grads["dw1"]
    db1 = grads["db1"]
    dw2 = grads["dw2"]
    db2 = grads["db2"]
    
    w1 -= learning_rate * dw1
    b1 -= learning_rate * db1
    w2 -= learning_rate * dw2
    b2 -= learning_rate * db2
    return w1, b1, w2, b2

In [132]:
# Training variable
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []

early_stopping_patience = 2000     # How many epochs to wait after no improvement
best_val_loss = float('inf')      # Best validation loss seen so far
patience_counter = 0              # How long since we saw improvement

In [137]:
# Training loop

w1, b1, w2, b2, learning_rate = initialize_parameters(25, 32, 1)

X_train = X_train.astype(float)
X_val = X_val.astype(float)
y_train = y_train.astype(float)
y_val = y_val.astype(float)

for epoch in range(15000):

    # --- Forward on training set ---
    a2_train, cache_train = forward_propagation(X_train, w1, b1, w2, b2)
    train_loss = compute_loss(a2_train, y_train)
    train_pred = (a2_train > 0.5).astype(int)
    train_acc = np.mean(train_pred == y_train)

    # --- Forward on validation set ---
    a2_val, _ = forward_propagation(X_val, w1, b1, w2, b2)
    val_loss = compute_loss(a2_val, y_val)
    val_pred = (a2_val > 0.5).astype(int)
    val_acc = np.mean(val_pred == y_val)

    # --- Backprop + update ---
    grads = back_propagation(w1, w2, cache_train, X_train, y_train)
    w1, b1, w2, b2 = update_parameters(w1, b1, w2, b2, grads, learning_rate)

    # --- Log metrics ---
    train_losses.append(train_loss)
    val_losses.append(val_loss)
    train_accuracies.append(train_acc)
    val_accuracies.append(val_acc)

    if epoch % 500 == 0:
        print(f"Epoch {epoch:4} | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Train Acc: {train_acc:.2%} | Val Acc: {val_acc:.2%}")

    # --- Early stopping check (moved here!) ---
    if val_loss < best_val_loss - 1e-4:
        best_val_loss = val_loss
        patience_counter = 0
        best_weights = (w1.copy(), b1.copy(), w2.copy(), b2.copy())
    else:
        patience_counter += 1

    if patience_counter > early_stopping_patience:
        print(f"Early stopping triggered at epoch {epoch}")
        break

    # Learning rate decay
    if patience_counter % 200 == 0 and patience_counter > 0:
        learning_rate *= 0.5
        print(f"Reducing learning rate to {learning_rate:.6f}")

Epoch    0 | Train Loss: 0.6948 | Val Loss: 0.6950 | Train Acc: 50.36% | Val Acc: 50.37%
Epoch  500 | Train Loss: 0.4746 | Val Loss: 0.4927 | Train Acc: 79.67% | Val Acc: 78.70%
Epoch 1000 | Train Loss: 0.4576 | Val Loss: 0.4817 | Train Acc: 79.04% | Val Acc: 78.01%
Epoch 1500 | Train Loss: 0.4488 | Val Loss: 0.4714 | Train Acc: 79.52% | Val Acc: 78.46%
Epoch 2000 | Train Loss: 0.4425 | Val Loss: 0.4660 | Train Acc: 80.00% | Val Acc: 78.18%
Reducing learning rate to 0.050000
Epoch 2500 | Train Loss: 0.4385 | Val Loss: 0.4629 | Train Acc: 79.79% | Val Acc: 77.90%
Reducing learning rate to 0.025000
Reducing learning rate to 0.012500
Epoch 3000 | Train Loss: 0.4369 | Val Loss: 0.4626 | Train Acc: 79.93% | Val Acc: 78.39%
Reducing learning rate to 0.006250
Reducing learning rate to 0.003125
Reducing learning rate to 0.001563
Epoch 3500 | Train Loss: 0.4366 | Val Loss: 0.4623 | Train Acc: 79.89% | Val Acc: 78.39%
Reducing learning rate to 0.000781
Reducing learning rate to 0.000391
Epoch 40

In [161]:
a2_test, cache = forward_propagation(X_test, w1, b1, w2, b2)

predictions_test = (a2_test > 0.5).astype(int).flatten()  # shape: (1, m)

submission = pd.DataFrame({
    'PassengerId': test_df['PassengerId'].values,
    'Transported': predictions_test.astype(bool)
})

submission.to_csv('submission.csv', index=False)