<a href="https://colab.research.google.com/github/kropotin4/Notebooks/blob/master/NeuralNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Нейронная сеть

In [3]:
#!pip install pycuda
#!pip install scikit-cuda
 
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, log_loss
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from tqdm.notebook import tqdm_notebook
import copy
import pickle
from numba import njit
 
#import pycuda.gpuarray as gpuarray
#import skcuda.linalg as linalg
#import pycuda.autoinit
 
%matplotlib inline

In [4]:
class FFSNNetwork():
 
    def init_WB(self):
        """ Random init Weight and Bias"""
        
        for i in range(self.nh + 1):
            self.W[i + 1] = np.random.randn(self.sizes[i], self.sizes[i + 1])\
                        * np.sqrt(2 / (self.sizes[i]))  # Correct for min NaN collapse
            self.B[i + 1] = np.zeros((1, self.sizes[i + 1]))
        
        if self.calc_type == 'gpu': # GPUArray
            for i in range(self.nh + 1):
                self.W[i + 1] = gpuarray.to_gpu(self.W[i + 1])
                self.B[i + 1] = gpuarray.to_gpu(self.B[i + 1])  
 
    def get_wb(self):
        return {'w': copy.deepcopy(self.W), 'b': copy.deepcopy(self.B)}

    def set_wb(self, w, b):
        self.W = copy.deepcopy(w)
        self.B = copy.deepcopy(b)

    def write_weight_bias(self, filepath):
        with open(filepath, 'wb') as pfile:
            pickle.dump(self.get_wb(), pfile)
 
    def read_weight_bias(self, filepath):
        with open(filepath, 'rb') as pfile:
            rdata = pickle.load(pfile)
            self.W = rdata['w']
            self.B = rdata['b']
 
    def __init__(self, n_inputs, n_outputs, hidden_sizes=[2]):
        # Init function
        # n_inputs - Number of inputs going into the network.
        # n_outputs - Number of neurons in last layer: 1 - regression, else - classification
        # hidden_sizes - Expects a list of integers, represents the number of neurons present in the hidden layer.
 
        # intialize the inputs
        self.nx = n_inputs
        self.ny = n_outputs
        self.nh = len(hidden_sizes)
        self.sizes = [self.nx] + hidden_sizes + [self.ny]
 
        self.calc_type = 'cpu'

        self.W = {}
        self.B = {}
        self.init_WB()
 
    @staticmethod
    @njit
    def h_cpu(x):
        """ Activation function """
        #return x / (1.0 + np.exp(-x)) # Swish
        return np.where(x > 0, x, 0.01 * x) # Lucky RuLU
        #return x * (x > 0) # RuLu
        #return 1.0 / (1.0 + np.exp(-x)) # sigmoid
 
    @staticmethod
    ##@njit
    def h_gpu(x):
        """ Activation function """
        #return x / (1.0 + np.exp(-x)) # Swish
        return gpuarray.maximum(x, 0.01*x)  # Lucky RuLU
        #return x * (x > 0) # RuLu
        #return 1.0 / (1.0 + np.exp(-x)) # sigmoid

    @staticmethod
    @njit
    def grad_h_cpu(x):
        """ Derivative activation function """
        #s = x / (1.0 + np.exp(-x))
        #return s + (1.0 / (1.0 + np.exp(-x))) * (1 - s)
        return np.where(x > 0, 1, 0.01) # Lucky RuLU (require A)
        #return 1. * (x > 0) # RuLu
        #return x * (1 - x) # sigmoid (require H)
 
    @staticmethod
    def grad_h_gpu(x):
        """ Derivative activation function """
        #s = x / (1.0 + np.exp(-x))
        #return s + (1.0 / (1.0 + np.exp(-x))) * (1 - s)
        return gpuarray.maximum(x > 0, 0.01) # Lucky RuLU (require A)
        #return 1. * (x > 0) # RuLu
        #return x * (1 - x) # sigmoid (require H)

    @staticmethod
    @njit
    def softmax(x):
        exps = np.exp(x)
        return exps / np.sum(exps)
 
 
    def forward_pass_cpu(self, x):
        self.A = {}
        self.H = {}
        self.A[0] = x.reshape(1, -1)
        self.H[0] = x.reshape(1, -1)
        for i in range(self.nh):
            self.A[i + 1] = np.matmul(self.H[i], self.W[i + 1]) + self.B[i + 1]
            self.H[i + 1] = self.h_cpu(self.A[i + 1])
 
        if self.ny == 1: # Regression
            self.A[self.nh + 1] = np.matmul(self.H[self.nh], self.W[self.nh + 1]) \
                                 + self.B[self.nh + 1]
            self.H[self.nh + 1] = self.A[self.nh + 1] # self.h_cpu(self.A[self.nh + 1])
        else: # Classification
            self.A[self.nh + 1] = np.matmul(self.H[self.nh], self.W[self.nh + 1]) \
                                 + self.B[self.nh + 1]
            self.H[self.nh + 1] = self.softmax(self.A[self.nh + 1])
 
        return self.H[self.nh + 1]
 
    def forward_pass_gpu(self, x):
        self.A = {}
        self.H = {}
        self.A[0] = x.reshape(1, -1)
        self.H[0] = x.reshape(1, -1)

        for i in range(self.nh):
            self.A[i + 1] = linalg.dot(self.H[i], self.W[i + 1]) + self.B[i + 1]
            self.H[i + 1] = self.h_gpu(self.A[i + 1])
 
        if self.ny == 1: # Regression
            self.A[self.nh + 1] = linalg.dot(self.H[self.nh], self.W[self.nh + 1]) \
                                + self.B[self.nh + 1]
            self.H[self.nh + 1] = self.A[self.nh + 1] # self.h_gpu(self.A[self.nh + 1])
        else: # Classification
            self.A[self.nh + 1] = linalg.dot(self.H[self.nh], self.W[self.nh + 1]) \
                                + self.B[self.nh + 1]
            self.H[self.nh + 1] = self.softmax(self.A[self.nh + 1])
 
        
        return self.H[self.nh + 1]

    def grad_cpu(self, x, y):
        self.forward_pass_cpu(x)
        self.dW = {}
        self.dB = {}
        self.dH = {}
        self.dA = {}
        L = self.nh + 1
        self.dA[L] = (self.H[L] - y)
        for k in range(L, 1, -1):
            self.dW[k] = np.matmul(self.H[k - 1].T, self.dA[k])
            self.dB[k] = self.dA[k]
            self.dH[k - 1] = np.matmul(self.dA[k], self.W[k].T)
            self.dA[k - 1] = np.multiply(self.dH[k - 1], self.grad_h_cpu(self.A[k - 1])) # H[k-1] to A[k-1]
        # First layer (dH[0] and dA[0] - useless)
        self.dW[1] = np.matmul(self.H[0].T, self.dA[1])
        self.dB[1] = self.dA[1]
 
    def grad_gpu(self, x, y):
        self.forward_pass_gpu(x)
        y = y.reshape(1, -1)
        self.dW = {}
        self.dB = {}
        self.dH = {}
        self.dA = {}
        L = self.nh + 1
        self.dA[L] = (self.H[L] - y)
        for k in range(L, 1, -1):
            self.dW[k] = linalg.dot(linalg.transpose(self.H[k - 1]), self.dA[k])
            self.dB[k] = self.dA[k]
            self.dH[k - 1] = linalg.dot(self.dA[k], linalg.transpose(self.W[k]))
            self.dA[k - 1] = linalg.multiply(self.dH[k - 1], self.grad_h_gpu(self.A[k - 1])) # H[k-1] to A[k-1]
        # First layer (dH[0] and dA[0] - useless)
        self.dW[1] = linalg.dot(linalg.transpose(self.H[0]), self.dA[1])
        self.dB[1] = self.dA[1]


    def fit(self, features, labels, epochs=1, learning_rate=1, initialise=True, display_loss=False,
            adv_train=False, adv_epochs=100, score='mse', calc_type='cpu'):
        self.calc_type = calc_type

        if self.calc_type == 'gpu':
            linalg.init()
            features = gpuarray.to_gpu(features)
            labels = gpuarray.to_gpu(labels)

        grad = self.grad_cpu if self.calc_type == 'cpu' else self.grad_gpu

        dW_init = {}
        dB_init = {}
        for i in range(self.nh + 1):
            dW_init[i + 1] = np.zeros((self.sizes[i], self.sizes[i + 1]))
            dB_init[i + 1] = np.zeros((1, self.sizes[i + 1]))

        if self.calc_type == 'gpu':
            for i in range(self.nh + 1):
                dW_init[i + 1] = gpuarray.to_gpu(dW_init[i + 1])
                dB_init[i + 1] = gpuarray.to_gpu(dW_init[i + 1]) 

        # initialise w, b
        if initialise:
            self.init_WB()
 
        if display_loss:
            X_test = features
            Y_test = labels
            loss = []
 
        shfk_cur = 1
        for e in tqdm_notebook(range(epochs), total=epochs, unit="epoch"):
            shfk_cur += 1
            if (adv_train and (shfk_cur == adv_epochs or e == 1)):
                shfk_cur = 1
                features_sh, labels_sh = shuffle(features, labels) # Shuffle data
                X, X_test, Y, Y_test = train_test_split( \
                    features_sh, labels_sh, test_size=0.1) # Split 90% data to train
            else:
                X = features
                Y = labels
 
            dW = dW_init.copy()
            dB = dB_init.copy()
            for x, y in zip(X, Y):
                grad(x, y)
                for i in range(self.nh + 1):
                    dW[i + 1] += self.dW[i + 1]
                    dB[i + 1] += self.dB[i + 1]
 
            m = X.shape[1]
            for i in range(self.nh + 1):
                self.W[i + 1] -= learning_rate * dW[i + 1] / m
                self.B[i + 1] -= learning_rate * dB[i + 1] / m

            for i in range(self.nh + 1):
                if (np.any(np.isnan(self.W[i + 1])) or np.any(np.isnan(self.B[i + 1]))):
                    raise Exception("NaN vaule in B/W")

            if display_loss:
                Y_pred = self.predict(X_test)
                if score == 'mse':
                    loss.append(mean_squared_error(Y_pred, Y_test))
                elif score == 'log':
                    loss.append(log_loss(Y_pred, Y_test))
                else:
                    raise Exception("Available score: 'mse' or 'log'")
 
        if display_loss:
            plt.plot(loss)
            plt.xlabel('Epochs')
            plt.ylabel('Mean Squared Error' if score == 'mse' else 'Cross Entropy Loss')
            plt.show()
 
    def predict(self, X):
        forward_pass = self.forward_pass_cpu if self.calc_type == 'cpu' else \
                       self.forward_pass_gpu
        Y_pred = []
        for x in X:
            y_pred = forward_pass(x)
            Y_pred.append(y_pred)

        y_pred = y_pred if self.calc_type == 'cpu' else y_pred.get() 
        return np.array(Y_pred).squeeze()

# Тестирование CPU/GPU

In [5]:
%%script false --no-raise-error

import numpy as np

import time

# generating 1000 x 1000 matrices
np.random.seed(42)

x = np.random.randint(0,256, size=(300,300)).astype(np.float64)

y = np.random.randint(0,256, size=(300,300)).astype(np.float64)


#computing multiplication time on CPU
tic = time.time()

z = np.matmul(x,y)

toc = time.time()

time_taken = toc - tic #time in s

print("Time taken on CPU (in ms) = {}".format(time_taken*1000))

In [6]:
%%script false --no-raise-error

#!pip install pycuda
#!pip install scikit-cuda

import pycuda.gpuarray as gpuarray
import skcuda.linalg as linalg
import pycuda.autoinit

#computing multiplication time on GPU

linalg.init()

# storing the arrays on GPU
x_gpu = gpuarray.to_gpu(x)

y_gpu = gpuarray.to_gpu(y)

tic = time.time()

#performing the multiplication
z_gpu = linalg.dot(x_gpu, y_gpu)

toc = time.time()

time_taken = toc - tic #time in s

print("Time taken on a GPU (in ms) = {}".format(time_taken*1000))