In [12]:
import numpy as np

In [13]:
#activation functions 

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_prime(x):
    return sigmoid(x) * (1 - sigmoid(x))

def relu(x):
    return np.maximum(x, 0)

def relu_prime(x):
    return (x > 0).astype(int)

def tanh(x):
    return np.tanh(x)

def tanh_prime(x):
    return 1 - np.tanh(x)**2

In [14]:
#loss functions
def mse(y_true, y_pred):
    return np.mean(np.power(y_true - y_pred, 2))

def mse_prime(y_true, y_pred):
    return 2 * (y_pred - y_true) / y_true.size

def hinge(y_true, y_pred):
    return np.mean(np.maximum(1 - y_true * y_pred, 0))

def hinge_prime(y_true, y_pred):
    temp = 1 - y_true * y_pred
    return -y_true * (temp > 0) / y_true.size

In [15]:
def get_activation(activation):
    if activation == "sigmoid":
        return sigmoid
    if activation == "relu":
        return relu
    if activation == "tanh":
        return tanh

def get_activation_derivative(activation):
    if activation == "sigmoid":
        return sigmoid_prime
    if activation == "relu":
        return relu_prime
    if activation == "tanh":
        return tanh_prime

def get_loss(loss):
    if loss == "mse":
        return mse
    if loss =="hinge":
        return hinge

def get_loss_derivative(loss):
    if loss == "mse":
        return mse_prime
    if loss =="hinge":
        return hinge_prime
    

In [16]:

class Layer: 
    def __init__(self):
        pass
    
    def forward_propagation(self, input_data):
        raise NotImplementedError
    
    def backward_propagation(self, output_error, learning_rate):
        raise NotImplementedError

In [17]:
class FCLayer(Layer):
    def __init__(self, input_size, output_size):
        self.weights = np.random.rand(input_size, output_size) - 0.5
        self.bias = np.random.rand(1, output_size) - 0.5
        
    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = np.dot(self.input, self.weights.T) + self.bias
        return self.output
    
    def backward_propagation(self, output_error, learning_rate):
        input_error = np.dot(output_error, self.weights.T)
        weights_error = np.dot(self.input.T, output_error)
        
        self.weights -= learning_rate * weights_error
        self.bias -= learning_rate * output_error
        
        return input_error
    

In [18]:
    
class ActivationLayer(Layer):
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime

    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = self.activation(self.input)
        return self.output

    def backward_propagation(self, output_error, learning_rate):
        return output_error * self.activation_prime(self.input)

In [19]:
class Network:
    def __init__(self, epochs=1000, learning_rate=1, loss="mse", activation="sigmoid", hidden_layer_sizes=(10,2)):
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.loss = loss
        self.activation = activation
        self.hidden_layer_sizes = hidden_layer_sizes
        
        self.layers = []
        self.layers.append(FCLayer(784, hidden_layer_sizes[0]))
        self.layers.append(ActivationLayer(get_activation(activation), get_activation_derivative(activation)))
        for i in range(1, len(hidden_layer_sizes)):
            self.layers.append(FCLayer(hidden_layer_sizes[i-1], hidden_layer_sizes[i]))
            self.layers.append(ActivationLayer(get_activation(activation), get_activation_derivative(activation)))
        self.layers.append(FCLayer(hidden_layer_sizes[-1], 1))
        self.layers.append(ActivationLayer(get_activation(activation), get_activation_derivative(activation)))
   
    def fit(self, X, y):
        for epoch in range(self.epochs):
            for i in range(len(X)):
                # forward propagation
                output = np.array(X[i])
                for layer in self.layers:
                    output = layer.forward_propagation(output)
                
                # compute error and loss
                loss = get_loss(self.loss)(y[i], output)
                
                # backward propagation
                error = get_loss_derivative(self.loss)(y[i], output)
                for layer in reversed(self.layers):
                    error = layer.backward_propagation(error, self.learning_rate)
                    
           
            print("Epoch:", epoch, "Loss:", loss)
                
    def predict(self, X):
        predictions = []
        for i in range(len(X)):
            output = np.array(X[i])

            for layer in self.layers:
                output = layer.forward_propagation(output)
            predictions.append(output)
        return np.array(predictions)



In [20]:
import gzip
import pickle
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
train_set, valid_set, test_set = None , None, None 
with gzip.open('./mnist-1.pkl.gz', 'rb') as f:
    train_set, valid_set, test_set  = pickle.load(f, encoding='latin1')

# Check that the datasets are loaded correctly
print("Train set:", train_set)
print("Valid set:", valid_set)
print("Test set:", test_set)

# Access the train_set variable
X_train, y_train = train_set
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)

X_valid, y_valid = valid_set
print("X_valid shape:", X_valid.shape)
print("y_valid shape:", y_valid.shape)

X_test, y_test = test_set
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

# Preprocess validation and test data sets
from sklearn.preprocessing import StandardScaler,MinMaxScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.fit_transform(X_valid)
X_test = scaler.fit_transform(X_test)
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.fit_transform(X_valid)
X_test = scaler.fit_transform(X_test)

Train set: (array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32), array([5, 0, 4, ..., 8, 4, 8], dtype=int64))
Valid set: (array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32), array([3, 8, 6, ..., 5, 6, 8], dtype=int64))
Test set: (array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32), array([7, 2, 1, ..., 4, 5, 6], dtype=int64))
X_train shape: (50000, 784)
y_train shape: (50000,)
X_valid

In [21]:
# this is under the assumption that the validation data which we loaded earlier is not there in the training data and needs to be appended to the end 
# we append validation set to the end of the training set so validation fraction can take it as validation data when we do grid search
X_train= np.concatenate((X_train,X_valid))
y_train= np.concatenate((y_train,y_valid))
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)



X_train shape: (60000, 784)
y_train shape: (60000,)


In [22]:


# network
net = Network()

# train

net.fit(X_train,y_train.reshape(-1, 1))

# test
out = net.predict(X_train)
print(out)

ValueError: shapes (784,) and (10,784) not aligned: 784 (dim 0) != 10 (dim 0)

In [None]:
print(out)