In [None]:
%pip install numpy
%pip install pandas
%pip install matplotlib
%pip install itertools
%pip install sklearn

In [124]:
import pandas as pd
import numpy as np
from itertools import chain
from sklearn.neural_network import MLPRegressor
import matplotlib.pyplot as plt

In [125]:
# import filterwarnings
from warnings import filterwarnings

filterwarnings('ignore')

# Train Test Split Implementation

In [126]:
# take an np array x and return values at indices
def get_val_at_indices(x, indices):
    return x[indices]

def train_test_split(*arrays, test_size=0.25, shufffle=True, random_state=1):
    # get length of first array
    length = len(arrays[0])

    # split lengths
    len_test = int(np.ceil(length*test_size))
    len_train = length - len_test

    if shufffle:
        perm = np.random.RandomState(random_state).permutation(length)
        test_indices = perm[:len_test]
        train_indices = perm[len_test:]
    else:
        train_indices = np.arange(len_train)
        test_indices = np.arange(len_train, length)

    return list(chain.from_iterable((get_val_at_indices(x, train_indices), get_val_at_indices(x, test_indices)) for x in arrays))

# Custom Class Implementation

In [207]:
class CNN:
    def __init__(self, hidden_layer_sizes=(100,), learning_rate=0.1, epochs=200, batch_size=32, random_state=1, tol=1e-4, hidden_layer_activation='logistic', output_layer_activation='linear', optim_algo='SGD', loss_function='MSE', early_stopping=False):
        # learning rate should be casted to np.float64
        self.learning_rate = np.float64(learning_rate)
        self.epochs = epochs
        self.batch_size = batch_size
        self.random_state = random_state
        self.tol = tol
        self.hidden_layer_sizes = hidden_layer_sizes
        self.is_first_pass = True
        if hidden_layer_activation == 'logistic':
            self.hidden_layer_activation = self.sigmoid
            self.hidden_layer_activation_derivative = self.sigmoid_derivative
        elif hidden_layer_activation == 'ReLU':
            self.hidden_layer_activation = self.ReLU
            self.hidden_layer_activation_derivative = self.ReLU_derivative
        elif hidden_layer_activation == 'linear':
            self.hidden_layer_activation = self.linear
            self.hidden_layer_activation_derivative = self.linear_derivative
        else:
            # default to sigmoid
            self.hidden_layer_activation = self.sigmoid
            self.hidden_layer_activation_derivative = self.sigmoid_derivative

        if output_layer_activation == 'logistic':
            self.output_layer_activation = self.sigmoid
            self.output_layer_activation_derivative = self.sigmoid_derivative
        elif output_layer_activation == 'ReLU':
            self.output_layer_activation = self.ReLU
            self.output_layer_activation_derivative = self.ReLU_derivative
        elif output_layer_activation == 'linear':
            self.output_layer_activation = self.linear
            self.output_layer_activation_derivative = self.linear_derivative
        else:
            # default to sigmoid
            self.output_layer_activation = self.sigmoid
            self.output_layer_activation_derivative = self.sigmoid_derivative

        if optim_algo == 'SGD':
            self.optim_algo = self.msgd
    
        if loss_function == 'MSE':
            self.loss_function = self.MSE

        self.early_stopping = early_stopping

    # train test split to store the train and test data
    def train_test_split(self, *arrays, test_size=0.25, shufffle=True, random_state=1):
        # get length of first array
        length = len(arrays[0])

        # split lengths
        len_test = int(np.ceil(length*test_size))
        len_train = length - len_test

        if shufffle:
            perm = np.random.RandomState(random_state).permutation(length)
            test_indices = perm[:len_test]
            train_indices = perm[len_test:]
        else:
            train_indices = np.arange(len_train)
            test_indices = np.arange(len_train, length)

        self.X_train, self.X_test, self.y_train, self.y_test = list(chain.from_iterable((get_val_at_indices(x, train_indices), get_val_at_indices(x, test_indices)) for x in arrays))

    def sigmoid(self, X):
        return 1 / (1 + np.exp(-X.astype(np.float64)))
    
    def sigmoid_derivative(self, X):
        return X * (1-X)
    
    def ReLU(self, X):
        return np.maximum(0, X)
    
    def ReLU_derivative(self, X):
        return 1 * (X > 0)
    
    def linear(self, X):
        return X
    
    def linear_derivative(self, X):
        return 1

    def load_data(self, X, y):
        mini_batches = []

        num_batches = int(np.ceil(X.shape[0] / self.batch_size))

        for i in range(num_batches):
            start = i * self.batch_size
            end = start + self.batch_size

            if end > X.shape[0]:
                end = X.shape[0]

            mini_batches.append(((start, end)))

        return mini_batches

    # MSE loss
    def MSE(self, y, y_pred):
        return np.mean((y - y_pred)**2)

    def msgd(self, X, y):
        activations = [None]*(self.num_layers)
        deltas = [None] * (self.num_layers-1)

        mini_batches = self.load_data(X, y)

        train_losses = []
        test_losses = []

        for i in range(self.epochs):
            epoch_loss = 0
            for mini_batch in mini_batches:
                start, end = mini_batch

                activations[0] = self.X_train[start:end]

                self.forward(activations)

                epoch_loss += self.backward(activations, deltas, self.y_train[start:end], (end-start+1)) * (end-start+1)

            if i%10 == 9 and self.plot_losses:
                train_losses.append(epoch_loss / X.shape[0])
                y_pred = self.predict(self.X_test)
                test_losses.append(self.loss_function(self.y_test, y_pred) / self.X_test.shape[0])

            # If early stopping is enabled, stop if loss is less than tolerance
            if self.early_stopping:
                if epoch_loss < self.tol:
                    break

        self.train_losses = train_losses
        self.test_losses = test_losses

    def weight_initialization(self, layer_sizes):
        np.random.seed(self.random_state)

        self.weights = []
        self.biases = []

        for i in range(self.num_layers - 1):
            weight, bias = self.add_layer(layer_sizes[i], layer_sizes[i+1])

            self.weights.append(weight)
            self.biases.append(bias)
    
    def add_layer(self, input_size, output_size):
        # random weights and biases between -1 and 1
        weight = np.array(((np.random.rand(input_size, output_size) - 0.5) * 2), dtype=np.float64)
        bias = np.array(((np.random.rand(1, output_size) - 0.5) * 2), dtype=np.float64)

        return weight, bias

    def first_pass(self, layer_sizes):
        self.num_layers = len(layer_sizes)

        self.weight_initialization(layer_sizes)

    def forward(self, activations):
        activations[0] = self.hidden_layer_activation(activations[0])

        for i in range(self.num_layers-1):
            # if not output layer, apply hidden layer activation function
            if i != self.num_layers-2:
                activations[i+1] = self.hidden_layer_activation(np.dot(activations[i], self.weights[i]) + self.biases[i])
            else:
                activations[i+1] = self.output_layer_activation(np.dot(activations[i], self.weights[i]) + self.biases[i])

        return activations

    def backward(self, activations, deltas, y, current_batch_size):
        loss = self.loss_function(y, activations[-1])

        deltas[self.num_layers-2] = (activations[-1] - y) * self.output_layer_activation_derivative(activations[-1])

        self.weights[self.num_layers-2] -= (np.dot(activations[self.num_layers-2].T, deltas[self.num_layers-2]).astype(np.float64) / current_batch_size) * self.learning_rate
        self.biases[self.num_layers-2] -= (np.sum(deltas[self.num_layers-2], axis=0).astype(np.float64) / current_batch_size) * self.learning_rate

        for i in range(self.num_layers-2, 0, -1):
            deltas[i-1] = np.dot(deltas[i], self.weights[i].T) * self.hidden_layer_activation_derivative(activations[i])

            self.weights[i-1] -= (np.dot(activations[i-1].T, deltas[i-1]).astype(np.float64) / current_batch_size) * self.learning_rate
            self.biases[i-1] -= (np.sum(deltas[i-1], axis=0).astype(np.float64) / current_batch_size) * self.learning_rate

        return loss

    def train(self, plot_losses=False):
        if not hasattr(self, 'X_train') or not hasattr(self, 'y_train'):
            raise AttributeError('X_train and y_train not found. Please run train test split first.')

        self.plot_losses = plot_losses

        # if y is 1D, convert to 2D
        if len(self.y_train.shape) == 1:
            self.y_train = self.y_train.reshape(-1, 1)

        # Initialize weights and biases if it's the first pass
        if self.is_first_pass:
            self.first_pass([self.X_train.shape[1]] + list(self.hidden_layer_sizes) + [self.y_train.shape[1]])
            self.is_first_pass = False

        # Train using the optimization algorithm set while initializing the model
        self.optim_algo(self.X_train, self.y_train)

        if plot_losses:
            plt.plot(self.train_losses, label='Train Loss')
            plt.plot(self.test_losses, label='Test Loss')
            plt.xlabel('Epochs')
            plt.ylabel('Loss')
            plt.legend()
            plt.show()

    def predict(self, X):
        activation = X

        activation = self.hidden_layer_activation(activation) 
        
        for i in range(self.num_layers-1):
            activation = np.dot(activation, self.weights[i]) + self.biases[i]

            if i != self.num_layers-2:
                activation = self.hidden_layer_activation(activation)
            else:
                activation = self.output_layer_activation(activation)

        return activation
    
    def score(self, X, y):
        # Get the prediction of the model on the test data
        y_pred = self.predict(X)

        # return accuracy
        return self.loss_function(y, y_pred)
    

# Main

## Reading data

In [128]:
data = pd.read_csv('./taxi.csv')

# print column names
print(data.columns)

Index(['Unnamed: 0', 'key', 'fare_amount', 'pickup_datetime',
       'pickup_longitude', 'pickup_latitude', 'dropoff_longitude',
       'dropoff_latitude', 'passenger_count'],
      dtype='object')


## Pre Processing

In [1]:
# from the df remove first 2 columns
data = data.iloc[:, 2:]

# convert to numpy array
data = data.values

print(data.shape)

print(data[0])

# for each data point, the second column is a date time object of the format 'YYYY-MM-DD HH:MM:SS UTC', convert this to a float denoting hours*3600 + mins*60 + seconds 
data[:, 1] = [x.split(" ") for x in data[:, 1]]
data[:, 1] = [np.float64(x[1].split(":")[0])*3600 + np.float64(x[1].split(":")[1])*60 + np.float64(x[1].split(":")[2]) for x in data[:, 1]]

print(data[0])

NameError: name 'data' is not defined

In [130]:
# fares is the first column values
y = data[:, 0]

# features are the rest of the columns
X = data[:, 1:]

print(f"X: {X.shape}, y: {y.shape}")

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Reshape y_train and y_test to be 2D arrays so that np computations can be done without issues
y_train = y_train.reshape(-1, 1)
y_test = y_test.reshape(-1, 1)

print(f"X_train: {X_train.shape}, y_train: {y_train.shape}")

X: (200000, 6), y: (200000,)
X_train: (160000, 6), y_train: (160000, 1)


## Case 1

* No of hidden layers: 1
* No. of neurons in hidden layer: 32
* Activation function in the hidden layer: Sigmoid
* 1 neuron in the output layer.
* Activation function in the output layer: Linear
* Optimisation algorithm: Mini Batch Stochastic Gradient Descent (SGD)
* Loss function: Mean Squared Error (MSE)
* Learning rate: 0.01
* No. of epochs = 200

### Custom Class Implementation

In [208]:
CustomNN = CNN(
        hidden_layer_sizes=(32,), 
        hidden_layer_activation='logistic', 
        output_layer_activation='linear',
        optim_algo='SGD',
        loss_function='MSE',
        learning_rate=0.01,
        epochs=200,
        random_state=42,
    )

Training the model using X_train and y_train

In [209]:
CustomNN.train_test_split(X, y)

In [210]:
CustomNN.train(plot_losses=True)

print(f"train score for the custom model: {CustomNN.predict(X_train)}")

Testing the model using X_test

In [None]:
test_score = CustomNN.score(X_test, y_test)

print(f"test score for the custom model: {test_score}")

### Sklearn Implementation

In [None]:
MLP = MLPRegressor(
    hidden_layer_sizes=(32,),
    activation='logistic',
    solver='sgd',
    learning_rate_init=0.01,
    max_iter=200,
    random_state=42
)

Training the model using X_train and y_train

In [None]:
MLP.fit(X_train, y_train)

print(f"train score for the MLP model: {MLP.score(X_train, y_train)}")

Testing the model using X_test

In [None]:
MLP_test_score = MLP.score(X_test, y_test)

print(f"test score for the MLP model: {MLP_test_score}")

## Case 2
* No of hidden layers: 2
* No. of neurons in the 1st hidden layer: 64
* No. of neurons in the 2nd hidden layer: 32
* Activation function in both the hidden layers: ReLU
* 1 neuron in the output layer.
* Activation function in the output layer: Linear
* Optimisation algorithm: Mini Batch Stochastic Gradient Descent (SGD)
* Loss function: Mean Squared Error (MSE)
* Learning rate: 0.01
* No. of epochs = 200

### Custom Class Implementation

In [None]:
CustomNN = CNN(
                hidden_layer_sizes=(64, 32,),
                hidden_layer_activation='ReLU',
                output_layer_activation='linear',
                optim_algo='SGD',
                loss_function='MSE',
                learning_rate=0.01,
                epochs=200,
                random_state=42,
        )

Training the model using X_train and y_train

In [None]:
CustomNN.train(X_train, y_train)

print(f"train score for the custom model: {CustomNN.score(X_train, y_train)}")

Testing the model using X_test

In [None]:
CustomNN_test_score = CustomNN.score(X_test, y_test)

print(f"test score for the custom model: {CustomNN_test_score}")