In [2]:
import pandas as pd
import numpy as np

train_data = pd.read_csv('../data/processed/train_data.csv')

train_data

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,22,23,24,25,26,27,28,29,30,31
0,0.0,0.096928,0.257694,0.103656,0.045387,0.487226,0.373965,0.733365,0.217445,0.530808,...,0.084667,0.283316,0.075153,0.034285,0.508684,0.397018,1.000000,0.601375,0.524936,0.409681
1,1.0,0.667755,0.570172,0.683505,0.495228,0.554934,0.809214,0.582709,0.743539,0.674242,...,0.667022,0.571962,0.627970,0.467902,0.514627,0.709327,0.541534,0.997595,0.499310,0.481175
2,0.0,0.103744,0.140345,0.106489,0.049799,0.221901,0.208975,0.140300,0.108350,0.646970,...,0.073995,0.192164,0.075601,0.030697,0.179555,0.136324,0.111581,0.174811,0.338459,0.195855
3,0.0,0.173648,0.524518,0.167369,0.086320,0.396678,0.162444,0.055740,0.080268,0.422727,...,0.153682,0.617537,0.137308,0.066482,0.519910,0.109158,0.089856,0.210859,0.363493,0.173357
4,0.0,0.150930,0.174839,0.143459,0.071432,0.548614,0.187811,0.025398,0.064115,0.850000,...,0.109925,0.144723,0.096867,0.045075,0.371987,0.069244,0.017316,0.088625,0.392667,0.165027
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
450,0.0,0.090255,0.166723,0.103656,0.042630,0.408053,0.410159,0.201640,0.142744,0.425253,...,0.064141,0.097281,0.060511,0.024381,0.327082,0.209865,0.114537,0.164467,0.135817,0.349993
451,0.0,0.220503,0.291512,0.216847,0.114104,0.555836,0.252500,0.165651,0.173211,0.374242,...,0.185343,0.459488,0.174810,0.082703,0.644720,0.231598,0.229473,0.418557,0.244628,0.235668
452,0.0,0.345923,0.240446,0.321401,0.207466,0.105263,0.022606,0.016987,0.031064,0.226263,...,0.248310,0.230011,0.219284,0.122739,0.095754,0.022383,0.030879,0.114536,0.176030,0.040404
453,1.0,0.331251,0.335137,0.327068,0.193425,0.481809,0.288080,0.263824,0.321223,0.307576,...,0.324084,0.500533,0.316201,0.168133,0.595192,0.319692,0.325000,0.627835,0.318155,0.330972


## Initialization

In [30]:

def init(data, hidden_layer_nb=2, outputs_nb=2, weights_initializer='heUniform', hidden_nodes_nb=None):
    X_train = data.iloc[:, 1:]
    y_train = data.iloc[:, 0]
    # One-hot encoding with 1 and 0
    y_train = pd.get_dummies(y_train).values
    
    if hidden_nodes_nb is None:
        hidden_nodes_nb = int(((X_train.shape[1] + outputs_nb) / 2))

    weights = []
    biases = []
    
    for layer in range(hidden_layer_nb + 1):
        # Input Layer to Hidden Layer
        if layer == 0:
            nodes_in = X_train.shape[1]
            nodes_out = hidden_nodes_nb
        # Hidden Layer to Hidden Layer
        elif layer < hidden_layer_nb:
            nodes_in = hidden_nodes_nb
            nodes_out = hidden_nodes_nb
        # Hidden Layer to Output Layer
        else:
            nodes_in = hidden_nodes_nb
            nodes_out = outputs_nb
            
        if weights_initializer == 'xavier':
            limit = np.sqrt(6 / (nodes_in + nodes_out))
        else:
            limit = np.sqrt(6 / nodes_in)
        weights.append(np.random.uniform(-limit, limit, (nodes_out, nodes_in)))
        biases.append(np.zeros(nodes_out))
    
    return hidden_nodes_nb, weights, biases, X_train, y_train
    
hidden_nodes_nb, weights, biases, X_train, y_train = init(train_data)

for i in range(len(weights)):
    print(f'weights[{i}]: {weights[i].shape}')
    print(f'biases[{i}]: {biases[i].shape}\n')


weights[0]: (16, 30)
biases[0]: (16,)

weights[1]: (16, 16)
biases[1]: (16,)

weights[2]: (2, 16)
biases[2]: (2,)



## Activation function

In [31]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)


## Loss function

In [32]:
def evaluate(y_train, y_pred, loss='binary_cross_entropy'):
    if loss == 'binary_cross_entropy':
        epsilon = 1e-15
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
        N = len(y_train)

        loss = -(1/N) * np.sum(
            y_train * np.log(y_pred) + 
            (1 - y_train) * np.log(1 - y_pred)
        )
    return loss

## Forwardpropagation

In [44]:
def forward_propagation(X, weights, biases, activation='sigmoid', output_activation='softmax'):
    layers = [X]
    Z = []  # Store pre-activation values
    for i in range(len(weights)):
        z = np.dot(layers[i], weights[i].T) + biases[i]
        Z.append(z)
        
        if i == len(weights) - 1:
            if output_activation == 'softmax':
                layers.append(pd.DataFrame(softmax(z)))
        else:
            if activation == 'sigmoid':
                layers.append(pd.DataFrame(sigmoid(z)))
    return layers, Z

## Backpropagation


In [45]:
def backward_propagation(y_true, activations, Z, weights):
    gradients = {"dW": [], "db": []}
    num_layers = len(weights)

    delta = (activations[-1] - y_true) / y_true.shape[0]

    for i in reversed(range(num_layers)):
        dW = np.dot(activations[i].T, delta)
        db = np.sum(delta, axis=0)
        
        gradients["dW"].insert(0, dW)
        gradients["db"].insert(0, db)

        if i > 0:
            delta = np.dot(delta, weights[i].T) * sigmoid_derivative(sigmoid(Z[i-1]))  # Apply to Z instead of A

    return gradients


In [46]:
def update_parameters(weights, biases, gradients, learning_rate):
    for i in range(len(weights)):
        weights[i] -= learning_rate * gradients["dW"][i]
        biases[i] -= learning_rate * gradients["db"][i]
    return weights, biases

## Train the model

In [57]:
def train(train_data, hidden_layer_nb=2, output_nb = 2,  epochs=34, learning_rate=0.01, batch_size=8):
    
    # Initialize weights/biases for variable hidden layers + output
    hidden_nodes_nb, weights, biases, X_train, y_train = init(train_data, hidden_layer_nb, output_nb)

    n_samples = X_train.shape[0]

    for epoch in range(epochs):
        # # Shuffle data
        # permutation = np.random.permutation(n_samples)
        # print('x', X_train)
        # X_shuffled = X_train[permutation]
        # y_shuffled = y_train[permutation]

        epoch_loss = 0

        for i in range(0, n_samples, batch_size):
            X_batch = X_train[i:i+batch_size]
            y_batch = y_train[i:i+batch_size]

            # Forward pass
            activations, Z = forward_propagation(X_batch, weights, biases)

            # Compute loss
            loss = evaluate(activations[-1], y_batch)
            epoch_loss += loss

            # Backward pass
            gradients = backward_propagation(y_batch, activations, Z, weights)

            # Update parameters
            weights, biases = update_parameters(weights, biases, gradients, learning_rate)

        # Print loss every 100 epochs
        if epoch % 100 == 0:
            avg_loss = epoch_loss / (n_samples // batch_size)
            print(f"Epoch {epoch}, Loss: {avg_loss:.4f}")

    return weights, biases

In [58]:
train(train_data)

ValueError: the 'keepdims' parameter is not supported in the pandas implementation of sum()