In [47]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [116]:
class NeuralNetwork:
    def __init__(self, x, y, num_hidden, epochs, learning_rate, num_nodes_layers, activation_function, batch_size):
        self.x = x
        self.y = y
        

        self.num_data = np.shape(x)[1]  # no. of data points    # no. of rows
        self.k = np.shape(x)[0]  # no. of features   # no. of cols
        self.n_out = np.shape(y)[0]

        self.batch_size = batch_size
        self.activation_function = activation_function
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.num_hidden = num_hidden
        self.num_layers = num_hidden + 1  # +1 for output layer

        self.num_nodes_layers = num_nodes_layers

        # inserting input and output nodes to the list
        self.num_nodes_layers.insert(0, self.k)
        self.num_nodes_layers.append(self.n_out)

        self.leaky_slope = 0.01
        self.weights = []
        
        # parameters: weight and bias
        # weight[l] : (num_layers * num_layers-1 ) * num_layers : (no. of nodes in layer l * no. of nodes in layer (l-1)) * no. of layers
    def initialize_parameters_random(self):

        for l in range(1, self.num_layers + 1):
            self.weights.append(
                np.random.rand(self.num_nodes_layers[l], self.num_nodes_layers[l - 1]))

    # Use this when activation function is tanh or sigmoid
    def initialize_parameters_xavier(self):

        for l in range(1, self.num_layers + 1):
            self.weights.append(np.random.randn(self.num_nodes_layers[l], self.num_nodes_layers[l - 1]) * np.sqrt(
                1 / self.num_nodes_layers[l - 1]))

    # Use this when activation function is ReLU or Leaky ReLu
    def initialize_parameters_he(self):
        for l in range(1, self.num_layers + 1):
            self.weights.append(np.random.randn(self.num_nodes_layers[l], self.num_nodes_layers[l - 1]) * np.sqrt(
                2 / self.num_nodes_layers[l - 1]))

    # Activation Functions
    def activation(self, x):
        if self.activation_function == "linear":
            return x
        if self.activation_function == "sigmoid":
            return 1.0 / (1.0 + np.exp(-x))
        if self.activation_function == "tanh":
            return np.tanh(x)
        if self.activation_function == "relu":
            a = np.zeros_like(x)
            return np.maximum(a, x)
        if self.activation_function == "leaky_relu":
            a = self.leaky_slope * x
            return np.maximum(a, x)

    def softmax(self, x):
        exp_x = np.exp(x)
        return exp_x / exp_x.sum(axis=0, keepdims=True)

    def gradient_activation(self, X):
        if self.activation_function == "linear":
            return np.ones_like(X)
        elif self.activation_function == "sigmoid":
            return self.activation(X) * (1 - self.activation(X))
        elif self.activation_function == "tanh":
            return (1 - np.square(X))
        elif self.activation_function == "relu":
            grad = np.zeros_like(X)
            grad[X > 0] = 1.0
            return grad
        elif self.activation_function == "leaky_relu":
            grad = np.ones_like(X)
            grad[X <= 0] = self.leaky_slope
            return grad

    def forward_propogation(self, x):
        # dim of A vector: (no. of hidden nodes * num_data) *(no. of layers)
        A = []
        Z = []
        A.append(x)
        A_prev = x

        for l in range(0, self.num_layers-1):
            z = np.matmul(self.weights[l], A_prev)
            a = self.activation(z)
            A_prev = a
            A.append(a)
            Z.append(z)
        z = np.matmul(self.weights[-1], A_prev)
        # ******* Can apply different activation to differnt nodes in last layer?****
        a = self.activation(z)
        A.append(a)
        Z.append(z)
        return (A, Z)

    def back_propogation(self, A, Z, y):

        delta_z = [None for i in range(self.num_layers)]
        delta_weight = [None for i in range(self.num_layers)]

        delta_z[-1] = (y - A[-1])
        delta_weight[-1] = np.matmul(delta_z[-1], A[-2].T)

        for l in range(self.num_layers - 2, -1, -1):
            delta_z[l] = np.multiply(np.matmul(self.weights[l + 1].T, delta_z[l + 1]), self.gradient_activation(Z[l]) )
            delta_weight[l] = np.matmul( delta_z[l], A[l].T )

        return delta_weight


    def update_weight(self, A, delta_weight):
        # weight = weight + learning_rate * error * input
        m = A[-1].shape[1]
        for l in range(self.num_layers):
            self.weights[l] = self.weights[l] + (self.learning_rate * delta_weight[l])/m

    def predict(self, x_test, isMissing):
        A,Z = self.forward_propogation(x_test)
        prediction = A[-1]
        predFinal = np.where(isMissing < 1, prediction, x_test)
        return predFinal

    def loss_function(self, y, out):
#             return (0.5 * np.mean((y - out) ** 2))
        return (np.mean(np.sum((y - out) ** 2, axis = 1)))

    def model(self):
        mini_batch = int((self.num_data) / (self.batch_size))
        
        self.initialize_parameters_he()
        
#         if self.activation_function == "linear":
#             self.initialize_parameters_random()
#         elif self.activation_function == "sigmoid" or self.activation_function == "tanh":
#             self.initialize_parameters_xavier()
#         else:
#             self.initialize_parameters_he()

        for e in range(self.epochs):

            print("Epoch =", e)
            end = 0
            for n in range(mini_batch + 1):

                if (n != mini_batch):
                    start = n * self.batch_size
                    end = (n + 1) * self.batch_size
                    x_ = self.x[:, start:end]
                    y_ = self.y[:, start:end]

                else:
                    if ((self.num_data % self.batch_size) != 0):
                        x_ = self.x[:, end:]
                        y_ = self.y[:, end:]
                    else:
                        break

                A,Z = self.forward_propogation(x_)
                delta_weight = self.back_propogation(A, Z, y_)
                self.update_weight(A, delta_weight)

                loss = self.loss_function(A[-1], y_)

            print("loss = ", loss)
        return

In [117]:
X_train = np.load('Data/Train/X.npy')
X_prime_train = np.load('Data/Train/X_prime.npy')
feature_info_train = np.load('Data/Train/feature_information.npy')

In [118]:
X_test = np.load('Data/Test/X.npy')
X_prime_test = np.load('Data/Test/X_prime.npy')
feature_info_test = np.load('Data/Test/feature_information.npy')

In [119]:
trainX = X_prime_train.T
trainY = X_train.T
testX = X_prime_test.T
testY = X_test.T

In [120]:
print(trainX.shape)
print(testX.shape)

(106, 30162)
(106, 15060)


In [121]:
nn = NeuralNetwork(trainX, trainY, num_hidden= 1, epochs= 5, learning_rate=0.01, num_nodes_layers=[10],
                       activation_function="tanh", batch_size = 1)
nn.model()

Epoch = 0
loss =  0.03026098204572844
Epoch = 1
loss =  0.031515579216232485
Epoch = 2
loss =  0.031891478778654093
Epoch = 3
loss =  0.03206930559648685
Epoch = 4
loss =  0.031721469834786326


In [122]:
nn.weights[0]

array([[-1.68455750e-02, -1.50436359e-02, -1.61787211e-02, ...,
         4.19256436e-02,  1.01633094e-01, -4.52208378e-02],
       [-1.39532279e-02,  7.56963579e-03, -6.91640883e-02, ...,
        -1.97108010e-02, -1.10134761e-01, -6.09950407e-02],
       [ 1.65501999e-02, -1.34529383e-02,  2.17425447e-05, ...,
         1.43382728e-02,  1.03637190e-02, -2.09316627e-02],
       ...,
       [-2.01002139e-02, -4.64195135e-03, -7.80183340e-03, ...,
         1.23410790e-01, -7.35299057e-02, -8.55047242e-03],
       [ 4.00907446e-03,  3.36051736e-03,  4.21666089e-02, ...,
         8.62435832e-03,  1.79551590e-01, -1.74776994e-03],
       [ 1.35841362e-03, -1.83141620e-03,  2.05965573e-03, ...,
        -1.09133804e-02,  1.28614162e-01,  2.42154018e-02]])

In [123]:
pred = nn.predict(testX, feature_info_test.T)

In [124]:
testErr = np.sqrt(np.mean(np.sum((pred - testY) ** 2, axis = 1)))
# testErr = nn.loss_function(pred, testY)
print("Testing error:", testErr)

Testing error: 6.465487038866718


In [46]:
pred_df = pd.DataFrame(pred)
pred_df.iloc[:25,2]

0     0.150685
1     0.219011
2     0.733333
3     0.000000
4     0.000000
5     0.397959
6     0.000000
7     1.000000
8     0.000000
9     0.000000
10    0.000000
11    0.000000
12    0.000000
13    0.000000
14    0.000000
15    0.000000
16    0.000000
17    0.000000
18    0.000000
19    0.000000
20    1.000000
21    0.000000
22    0.000000
23    0.000000
24    0.000000
Name: 2, dtype: float64

In [45]:
testX_df = pd.DataFrame(testX)
testX_df.iloc[:25,2]

0     0.150685
1     0.219011
2     0.733333
3     0.000000
4     0.000000
5     0.397959
6     0.000000
7     1.000000
8     0.000000
9     0.000000
10    0.000000
11    0.000000
12    0.000000
13    0.000000
14    0.000000
15    0.000000
16    0.000000
17    0.000000
18    0.000000
19    0.000000
20    1.000000
21    0.000000
22    0.000000
23    0.000000
24    0.000000
Name: 2, dtype: float64

In [43]:
testY_df = pd.DataFrame(testY)
testY_df.iloc[:25,2]

0     0.150685
1     0.219011
2     0.733333
3     0.000000
4     0.000000
5     0.397959
6     0.000000
7     1.000000
8     0.000000
9     0.000000
10    0.000000
11    0.000000
12    0.000000
13    0.000000
14    0.000000
15    0.000000
16    0.000000
17    0.000000
18    0.000000
19    0.000000
20    1.000000
21    0.000000
22    0.000000
23    0.000000
24    0.000000
Name: 2, dtype: float64

In [42]:
feature_info_test_df = pd.DataFrame(feature_info_test)
feature_info_test_df.iloc[:25,2]

0     1
1     1
2     1
3     0
4     0
5     1
6     1
7     1
8     1
9     1
10    1
11    1
12    1
13    1
14    1
15    1
16    0
17    1
18    1
19    1
20    1
21    0
22    1
23    1
24    0
Name: 2, dtype: int64