In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import StandardScaler


In [2]:
df = pd.read_csv('train.csv')
y = df['Survived'].values.reshape(-1,1)
data = df.drop(["Name",'Ticket','Cabin','Embarked','PassengerId','Survived'], axis=1)
data['Age'] = data['Age'].fillna(data['Age'].mean())
data['Sex'] = data['Sex'].map({'male': 1 ,'female':2})
m = data.shape[0]


In [3]:
sc = StandardScaler()
def sigmoid(z):
    return (1/(1+np.exp(-z)))
def sigmoidder(z):
    return sigmoid(z)*(1-sigmoid(z))
def leaky_relu(z):
    return np.where(z > 0, z, z * 0.01)
def leaky_reluder(z):
    dz = np.ones_like(z)
    dz[z <= 0] = 0.01
    return dz


In [11]:

def initialize_parameters(layer_dims):

    L = len(layer_dims)
    W = {}
    b = {}

    for l in range(1, L):
        W[l] = np.random.randn(layer_dims[l], layer_dims[l-1]) * np.sqrt(2 / layer_dims[l-1])
        b[l] = np.zeros((1, layer_dims[l]))

    return W, b


layer_dims = [6, 4, 1]
W, b = initialize_parameters(layer_dims)


In [13]:
x = data
x = sc.fit_transform(x)
alpha = 0.01
z = {}
a={}
dz = {}
dw = {}
db = {} 
L = len(layer_dims)
a[0] = x
for j in range(1000):
    for i in range(1,L):
        z[i] = np.dot(a[i-1],W[i].T) + b[i]
        if i == L-1:    
            a[i] = sigmoid(z[i])
        else:
            a[i] =leaky_relu(z[i])


    dz[L-1] = a[L-1] - y 
    
    # 2. Loop backwards from the last layer to the first
    for i in range(L-1, 0, -1):
        # Calculate gradients for the current layer (i)
        dw[i] = (1/m) * np.dot(dz[i].T, a[i-1]) 
        db[i] = (1/m) * np.sum(dz[i], axis=0, keepdims=True)
        
        # CALCULATE DZ FOR THE NEXT ITERATION (Move this inside!)
        if i > 1: 
            # This creates dz[3] when i is 4, dz[2] when i is 3, etc.
            dz[i-1] = np.dot(dz[i], W[i]) * leaky_reluder(z[i-1])
    for i in range(1,L):
        W[i] = W[i] - alpha * dw[i]
        b[i] = b[i] - alpha * db[i]  
        # print(W[i])

    eps = 1e-8
    AL = np.clip(a[L-1], eps, 1-eps)

    J = -(1/m)*np.sum(y*np.log(AL) + (1-y)*np.log(1-AL))
    print("Cost after ",j+1," iteration:",J)



df_test = pd.read_csv('test.csv')
Xt = df_test.drop(["Name",'Ticket','Cabin','Embarked','PassengerId','Survived'], axis=1)
Xt['Age'] = Xt['Age'].fillna(Xt['Age'].mean())
Xt['Fare'] = Xt['Fare'].fillna(Xt['Fare'].mean())
Yt = df_test['Survived'].values.reshape(-1,1)
m1 = Xt.shape[0]

# 1. Complete Preprocessing (Don't forget the 'Sex' mapping!)
Xt['Sex'] = Xt['Sex'].map({'male': 1, 'female': 2}) 

# 2. Scale the test features using the training scaler
Xt_scaled = sc.transform(Xt)

# 3. Forward Pass through the trained network
# We initialize a_test[0] with our scaled test data
a_test = {0: Xt_scaled}
z_test = {}

for i in range(1, L):
    z_test[i] = np.dot(a_test[i-1], W[i].T) + b[i]
    if i == L-1: # Output layer
        a_test[i] = sigmoid(z_test[i])
    else:        # Hidden layers
        a_test[i] = leaky_relu(z_test[i])

# 4. Final Predictions
# a_test[L-1] contains probabilities between 0 and 1
predictions = (a_test[L-1] >= 0.5).astype(int)

# 5. Calculate Accuracy 
accuracy = np.mean(predictions == Yt) * 100

# 6. Calculate Test Loss (Jt)
eps = 1e-8
AL_t = np.clip(a_test[L-1], eps, 1-eps)
test_loss = -(1/m1) * np.sum(Yt * np.log(AL_t) + (1-Yt) * np.log(1-AL_t))

print(f"--- Test Results ---")
print(f"Test Accuracy: {accuracy:.2f}%")
print(f"Test Loss:     {test_loss:.4f}")

Cost after  1  iteration: 0.49776849037643467
Cost after  2  iteration: 0.4976766810866278
Cost after  3  iteration: 0.49758502311323266
Cost after  4  iteration: 0.497493512664105
Cost after  5  iteration: 0.49740219074182035
Cost after  6  iteration: 0.4973110570091634
Cost after  7  iteration: 0.4972201111289953
Cost after  8  iteration: 0.49712935276425896
Cost after  9  iteration: 0.49703877250247763
Cost after  10  iteration: 0.496948259752451
Cost after  11  iteration: 0.49685793358990704
Cost after  12  iteration: 0.49676779367857193
Cost after  13  iteration: 0.4966778396822766
Cost after  14  iteration: 0.4965880712649608
Cost after  15  iteration: 0.4964984880906777
Cost after  16  iteration: 0.4964090447028567
Cost after  17  iteration: 0.49631966191502414
Cost after  18  iteration: 0.49623091681623466
Cost after  19  iteration: 0.496142979086775
Cost after  20  iteration: 0.49605522298340654
Cost after  21  iteration: 0.49596764817196404
Cost after  22  iteration: 0.495880

In [20]:
print(W[1].shape)
print(x.shape)
print(b[1].shape)   

(4, 6)
(499, 1)
(4,)
