finalized the examples for mnist and mnist_orig and added some functi…

…ons for calculation of confusion matrix as well as its visualization
manassharma07 · Jan 16, 2022 · 7b4f10c · 7b4f10c
1 parent c0fedc7
commit 7b4f10c
Show file tree

Hide file tree

Showing 20 changed files with 1,299 additions and 487 deletions.
diff --git a/crysx_nn.egg-info/requires.txt b/crysx_nn.egg-info/requires.txt
@@ -9,3 +9,4 @@ numexpr
 tomli
 Pillow
 opencv-python
+seaborn
diff --git a/crysx_nn/activation.py b/crysx_nn/activation.py
@@ -27,7 +27,7 @@
 try:
     import cupy as cp                     
 except ImportError:
-    print('CuPy could not be imported!')
+    print('Warning: CuPy could not be imported! You can only use CPU for computations.')
 
 
 
@@ -46,7 +46,7 @@ def Softmax(x):
     # The following is wrong! We should be taking hte maximum of each row of data and then subtracting that
     # e_x = np.exp(x - np.max(x)) # For stability as it is prone to overflow and underflow
     # Alternative 1 (Doesn't work with NUMBA)
-    # max_x = np.amax(x, 1.).reshape(x.shape[0],1)
+    # max_x = np.amax(x, 1).reshape(x.shape[0],1)
     # e_x = np.exp(x - max_x)
     # return e_x / e_x.sum(axis=1, keepdims=True) # only difference
     # Alternative 2
@@ -296,7 +296,8 @@ def Softmax_cupy(x):
     Returns:
     Softmax activated (N,k) ndarray (N: no. of samples, k: no. of nodes)
     '''
-    e_x = cp.exp(x - cp.max(x)) # For stability as it is prone to overflow and underflow
+    max_x = cp.amax(x, 1).reshape(x.shape[0],1)
+    e_x = cp.exp(x - max_x) # For stability as it is prone to overflow and underflow
 #     return e_x / e_x.sum(axis=1, keepdims=True) # only difference
     return e_x / e_x.sum(axis=1).reshape((-1, 1)) # Alternative of keepdims=True for Numba compatibility
 

diff --git a/crysx_nn/loss.py b/crysx_nn/loss.py
@@ -10,7 +10,7 @@
 try:
     import cupy as cp                     
 except ImportError:
-    print('CuPy could not be imported!')
+    print('Warning: CuPy could not be imported! You can only use CPU for computations.')
 
 @njit(cache=True,fastmath=False, parallel=True)
 def MSE_loss(outi, out0):
@@ -171,7 +171,7 @@ def BCE_loss_grad(predictions, targets):
     #             grad[i,j]=0.
     # return grad
 
-@njit(cache=True,fastmath=False)
+@njit(cache=True,fastmath=True)
 def CCE_loss(predictions, targets, epsilon=1e-7):
     """
     Computes categorical cross entropy between targets (encoded as one-hot vectors)

diff --git a/crysx_nn/network.py b/crysx_nn/network.py
@@ -25,7 +25,7 @@
 try:
     import cupy as cp                     
 except ImportError:
-    print('CuPy could not be imported!')
+    print('Warning: CuPy could not be imported! You can only use CPU for computations.')
 
 
 
@@ -432,7 +432,7 @@ def nn_optimize(inputs, outputs, activationFunc, nLayers, nEpochs=10, batchSize=
     return weights, biases, errors
 
 # @njit(cache=False,fastmath=True)
-def nn_optimize_fast(inputs, outputs, activationFunc, nLayers, nEpochs=10, batchSize=None, eeta=0.5, weights=None, biases=None, errorFunc=loss.MSE_loss, gradErrorFunc=loss.MSE_loss_grad,miniterEpoch=1,batchProgressBar=False,miniterBatch=100):
+def nn_optimize_fast(inputs, outputs, activationFunc, nLayers, nEpochs=10, batchSize=None, eeta=0.5, weights=None, biases=None, errorFunc=loss.MSE_loss, gradErrorFunc=loss.MSE_loss_grad,miniterEpoch=1,batchProgressBar=False,miniterBatch=100, get_accuracy=False):
     '''
     Performs the optimization of neural network weights and biases using Stochastic gradient descent.
     Parameters:
@@ -472,8 +472,12 @@ def nn_optimize_fast(inputs, outputs, activationFunc, nLayers, nEpochs=10, batch
         biases = []
     errors=[]
     nBatches = int(inputs.shape[0]/batchSize)
+    if get_accuracy:
+        accuracies = []
     for iEpoch in tqdm(range(nEpochs),leave=True,miniters=miniterEpoch):
         errorEpoch = 0.0
+        if get_accuracy:
+            accuracy_epoch = 0.0
         # for iBatch in range(nBatches):
         for iBatch in tqdm(range(nBatches),leave=False,miniters=miniterBatch,disable=not(batchProgressBar)):
             offset = iBatch*batchSize
@@ -492,6 +496,10 @@ def nn_optimize_fast(inputs, outputs, activationFunc, nLayers, nEpochs=10, batch
             #     print(z[nLayers-1].dtype)
             #     print(iEpoch)
             #     print(iBatch)
+            # Accuracy
+            if get_accuracy:
+                bool_mask = np.argmax(outExpected,axis=1)==np.argmax(a[nLayers],axis=1)
+                accuracy_epoch += np.sum(bool_mask)/batchSize
             # Error
             errorBatch = errorFunc(a[nLayers],outExpected)
             # if np.isnan(errorBatch):
@@ -521,13 +529,17 @@ def nn_optimize_fast(inputs, outputs, activationFunc, nLayers, nEpochs=10, batch
 
         # Average over the batches
         errors.append(errorEpoch/nBatches)
+        if get_accuracy:
+            accuracies.append(accuracy_epoch/nBatches)
 
         if(iEpoch==0):
             print('Average Error with initial weights and biases:', errorEpoch/nBatches)
 
 
-
-    return weights, biases, errors
+    if get_accuracy:
+        return weights, biases, errors, accuracies
+    else:
+        return weights, biases, errors
 
 
 # act_func_dict = {'Sigmoid':activation.Sigmoid,'ReLU':activation.ReLU,'Softmax':activation.Softmax}
@@ -971,6 +983,7 @@ def __init__(self, nInputs=None, neurons_per_layer=None, activation_func_names=N
         self.weights = self.init_weights
         self.biases = self.init_biases
         self.errors = []
+        self.accuracy = []
         self.opt_method = 'SGD'
         self.lr = 0.5
 
@@ -1003,7 +1016,7 @@ def details(self):
         print('Learning rate: ', self.lr)
         print('----------------------------------------------------------------------------------')
 
-    def optimize(self, inputs, outputs, method=None, lr=None, nEpochs=100,loss_func_name=None, miniterEpoch=1,batchProgressBar=False,miniterBatch=100):
+    def optimize(self, inputs, outputs, method=None, lr=None, nEpochs=100,loss_func_name=None, miniterEpoch=1,batchProgressBar=False,miniterBatch=100, get_accuracy=False):
         if method is None:
             method = self.opt_method
         if lr is None:
@@ -1015,7 +1028,10 @@ def optimize(self, inputs, outputs, method=None, lr=None, nEpochs=100,loss_func_
             else:
                 loss_func = utils.loss_func_dict[loss_func_name]
                 loss_func_grad = utils.loss_func_grad_dict[loss_func_name]
-            self.weights, self.biases, self.errors = nn_optimize_fast(inputs, outputs, self.activation_func_names, self.nLayers, nEpochs=nEpochs, batchSize=self.batch_size, eeta=lr, weights=self.weights, biases=self.biases, errorFunc=loss_func, gradErrorFunc=loss_func_grad,miniterEpoch=miniterEpoch,batchProgressBar=batchProgressBar,miniterBatch=miniterBatch)
+            if get_accuracy:
+                self.weights, self.biases, self.errors, self.accuracy = nn_optimize_fast(inputs, outputs, self.activation_func_names, self.nLayers, nEpochs=nEpochs, batchSize=self.batch_size, eeta=lr, weights=self.weights, biases=self.biases, errorFunc=loss_func, gradErrorFunc=loss_func_grad,miniterEpoch=miniterEpoch,batchProgressBar=batchProgressBar,miniterBatch=miniterBatch, get_accuracy=get_accuracy)
+            else:
+                self.weights, self.biases, self.errors = nn_optimize_fast(inputs, outputs, self.activation_func_names, self.nLayers, nEpochs=nEpochs, batchSize=self.batch_size, eeta=lr, weights=self.weights, biases=self.biases, errorFunc=loss_func, gradErrorFunc=loss_func_grad,miniterEpoch=miniterEpoch,batchProgressBar=batchProgressBar,miniterBatch=miniterBatch)
         if self.device=='GPU':
             if loss_func_name is None:
                 loss_func = loss.MSE_loss_cupy
@@ -1025,49 +1041,94 @@ def optimize(self, inputs, outputs, method=None, lr=None, nEpochs=100,loss_func_
                 loss_func_grad = utils.loss_func_grad_dict_cupy[loss_func_name]
             self.weights, self.biases, self.errors = nn_optimize_fast_cupy(inputs, outputs, self.activation_func_names, self.nLayers, nEpochs=nEpochs, batchSize=self.batch_size, eeta=lr, weights=self.weights, biases=self.biases, errorFunc=loss_func, gradErrorFunc=loss_func_grad,miniterEpoch=miniterEpoch,batchProgressBar=batchProgressBar,miniterBatch=miniterBatch)
 
-    def predict(self, inputs, outputs=None, loss_func_name=None):
+    def save_model_weights(self, filename):
+        np.savez(filename, *self.weights)
+
+    def save_model_biases(self, filename):
+        np.savez(filename, *self.biases)
+
+    def load_model_weights(self, filename):
+        outfile = np.load(filename+'.npz')
+        for i in range(len(outfile.files)):
+            self.weights[i] = outfile[outfile.files[i]]
+
+
+    def load_model_biases(self, filename):
+        outfile = np.load(filename+'.npz')
+        for i in range(len(outfile.files)):
+            self.biases[i] = outfile[outfile.files[i]]
+
+    def predict(self, inputs, outputs=None, loss_func_name=None, get_accuracy=False):
         error = 0.0
+        accuracy = 0.0
         nBatches = np.maximum(int(inputs.shape[0]/self.batch_size),1)
+
+        if inputs.shape[0]/self.batch_size<1:
+            nBatches= 1
+            batch_size = inputs.shape[0]
+        else:
+            batch_size = self.batch_size
         if self.device=='CPU':
+            predictions = np.zeros([inputs.shape[0], self.neurons_per_layer[-1]],dtype=inputs.dtype)
             if loss_func_name is None:
                 loss_func = loss.MSE_loss
                 loss_func_grad = loss.MSE_loss_grad
             else:
                 loss_func = utils.loss_func_dict[loss_func_name]
                 loss_func_grad = utils.loss_func_grad_dict[loss_func_name]
             for iBatch in range(nBatches):
-                offset = iBatch*self.batch_size
-                x = inputs[offset:offset + self.batch_size,:]# Input vector
+                # offset = iBatch*self.batch_size
+                offset = iBatch*batch_size
+                # x = inputs[offset:offset + self.batch_size,:]# Input vector
+                x = inputs[offset:offset + batch_size,:]# Input vector
 
                 # Forward feed with optimized weights
                 # Perform Forward feed and get the outputs at each layers and the inputs at each layer
                 a, z = forward_feed(x, self.nLayers, self.weights, self.biases, self.activation_func_names)
                 new_outputs = a[self.nLayers] 
+                predictions[offset:offset + batch_size,:] = new_outputs
                 if outputs is not None:
-                    outExpected = outputs[offset:offset + self.batch_size,:] # Expected output
+                    # outExpected = outputs[offset:offset + self.batch_size,:] # Expected output
+                    outExpected = outputs[offset:offset + batch_size,:] # Expected output
                     # New Error
-                    error += loss_func(new_outputs, outExpected)/self.batch_size
+                    # error += loss_func(new_outputs, outExpected)/self.batch_size
+                    error += loss_func(new_outputs, outExpected)/batch_size
+                    if get_accuracy:
+                        bool_mask = np.argmax(new_outputs,axis=1)==np.argmax(outExpected,axis=1)
+                        accuracy += np.sum(bool_mask)
         if self.device=='GPU':
+            predictions = cp.zeros([inputs.shape[0], self.neurons_per_layer[-1]],dtype=inputs.dtype)
             if loss_func_name is None:
                 loss_func = loss.MSE_loss_cupy
                 loss_func_grad = loss.MSE_loss_grad_cupy
             else:
                 loss_func = utils.loss_func_dict_cupy[loss_func_name]
                 loss_func_grad = utils.loss_func_grad_dict_cupy[loss_func_name]
             for iBatch in range(nBatches):
-                offset = iBatch*self.batch_size
-                x = inputs[offset:offset + self.batch_size,:]# Input vector
+                # offset = iBatch*self.batch_size
+                offset = iBatch*batch_size
+                # x = inputs[offset:offset + self.batch_size,:]# Input vector
+                x = inputs[offset:offset + batch_size,:]# Input vector
 
                 # Forward feed with optimized weights
                 # Perform Forward feed and get the outputs at each layers and the inputs at each layer
                 a, z = forward_feed_cupy(x, self.nLayers, self.weights, self.biases, self.activation_func_names)
                 new_outputs = a[self.nLayers] 
+                predictions[offset:offset + batch_size,:] = new_outputs
                 if outputs is not None:
-                    outExpected = outputs[offset:offset + self.batch_size,:] # Expected output
+                    # outExpected = outputs[offset:offset + self.batch_size,:] # Expected output
+                    outExpected = outputs[offset:offset + batch_size,:] # Expected output
                     # New Error
                     # New Error
-                    error += loss_func(new_outputs, outExpected)/self.batch_size
+                    # error += loss_func(new_outputs, outExpected)/self.batch_size
+                    error += loss_func(new_outputs, outExpected)/batch_size
+                    if get_accuracy:
+                        bool_mask = cp.argmax(new_outputs,axis=1)==cp.argmax(outExpected,axis=1)
+                        accuracy += cp.sum(bool_mask)
         if outputs is None:
-            return new_outputs
+            return predictions
         else:
-            return new_outputs, error/nBatches
+            if not get_accuracy:
+                return predictions, error/nBatches
+            else :
+                return predictions, error/nBatches, accuracy/outputs.shape[0]
diff --git a/crysx_nn/utils.py b/crysx_nn/utils.py
@@ -1,5 +1,7 @@
 import crysx_nn.activation as activation
 import crysx_nn.loss as loss
+import seaborn as sn
+import matplotlib.pyplot as plt
 
 act_func_dict = {'Sigmoid':activation.Sigmoid,'ReLU':activation.ReLU,'Softmax':activation.Softmax, \
     'Tanh':activation.Tanh, 'Tanh_offset':activation.Tanh_offset, 'Identity':activation.Identity, 'Softplus':activation.Softplus}
@@ -13,6 +15,36 @@
 loss_func_grad_dict = {'MAE':loss.MAE_loss_grad,'MSE':loss.MSE_loss_grad,'BCE':loss.BCE_loss_grad, \
     'CCE':loss.CCE_loss_grad}
 
+import numpy as np
+
+def compute_confusion_matrix(predictions, targets):
+  '''Computes a confusion matrix using numpy for two arrays
+  targets and predictions.
+
+  Results are identical (and similar in computation time) to: 
+    "from sklearn.metrics import confusion_matrix"
+
+  However, this function avoids the dependency on sklearn.'''
+
+  K = len(np.unique(targets)) # Number of classes 
+  result = np.zeros((K, K))
+
+  for i in range(len(targets)):
+    result[targets[i]][predictions[i]] += 1
+
+  return result
+
+def plot_confusion_matrix(confusion_matrix, tick_labels=None, x_label='predicted type',y_label='true type'):
+    plt.rcParams["figure.figsize"] = (14,10)
+    if tick_labels is None:
+        tick_labels = range(confusion_matrix.shape[0])
+    sn.heatmap(confusion_matrix, square=True, annot=True, cbar=True, cmap = sn.cm.rocket_r, xticklabels=tick_labels, yticklabels=tick_labels)
+    plt.xlabel(x_label)
+    plt.ylabel(y_label)
+    plt.title('Confusion matrix')
+    plt.show()
+
+
 ##------------------CUPY----------------------
 
 act_func_dict_cupy = {'Sigmoid':activation.Sigmoid_cupy,'ReLU':activation.ReLU_cupy,'Softmax':activation.Softmax_cupy, \
@@ -25,4 +57,27 @@
     'CCE':loss.CCE_loss_cupy}
 
 loss_func_grad_dict_cupy = {'MAE':loss.MAE_loss_grad_cupy,'MSE':loss.MSE_loss_grad_cupy,'BCE':loss.BCE_loss_grad_cupy, \
-    'CCE':loss.CCE_loss_grad_cupy}
+    'CCE':loss.CCE_loss_grad_cupy}
+
+
+try:
+    import cupy as cp                     
+except ImportError:
+    print('Warning: CuPy could not be imported! You can only use CPU for computations.')
+
+def compute_confusion_matrix_cupy(predictions, targets):
+  '''Computes a confusion matrix using numpy for two arrays
+  targets and predictions.
+
+  Results are identical (and similar in computation time) to: 
+    "from sklearn.metrics import confusion_matrix"
+
+  However, this function avoids the dependency on sklearn.'''
+
+  K = len(cp.unique(targets)) # Number of classes 
+  result = cp.zeros((K, K))
+
+  for i in range(len(targets)):
+    result[targets[i]][predictions[i]] += 1
+
+  return result