###code source
https://blog.csdn.net/clayanddev/article/details/53955544


In [0]:
import numpy as np
from sklearn import datasets, linear_model
import matplotlib.pyplot as plt

class NNModel:
    Ws = [] # params W of the whole network
    bs = [] # params b of the whole network
    layers = [] # number of nodes in each layer
    epsilon = 0.01 # learning rate epsilon

    def __init__(self, layers, epsilon, activation = "relu"):
        self.layers = layers
        self.epsilon = epsilon
        self.process_name = None
        self.activation_name = activation
        
        self.activation=Activation(activation).f
        self.activation_deriv=Activation(activation).f_deriv
        
        self.init_params()

    # Initialize the parameters (W and b) to random values. We need to learn these.
    def init_params(self):
        np.random.seed(0)
        layers = self.layers
        hidden_layer_num = len(layers) - 1
        Ws = [1] * hidden_layer_num
        bs = [1] * hidden_layer_num
        for i in range(0, hidden_layer_num):
            if self.activation_name == "relu":
              Ws[i] = np.random.randn(layers[i], layers[i + 1]) / np.sqrt(layers[i]/2) 
            elif self.activation_name == "tanh":
              Ws[i] = np.random.randn(layers[i], layers[i + 1]) / np.sqrt(layers[i]/2)

            bs[i] = np.zeros((1, layers[i + 1]))
        self.Ws = Ws
        self.bs = bs

    # This function learns parameters for the neural network from training dataset
    # - num_passes: Number of passes through the training data for gradient descent
    # - print_loss: If True, print the loss every 1000 iterations
    def train(self, X, y, num_passes=20000, keep_prob = 1, beta = 0, reg_lambda = 0.01, process = None, print_loss=False, X_test = None, y_test = None):
        num_examples = len(X)
        expected_output = y
        losses = []
        accuracys = []
        
        losses_test = []
        accuracys_test = []
        
        self.process_name = process
        
        if (X_test is not None) and (y_test is not None):         
            test = True
        else:
            test = False
            
        if process is not None:
            process_function = Process(process).p
            
            X = process_function(X)
        

        # Gradient descent. For each batch...
        for i in range(0, num_passes+1):

            # Forward propagation
            a_output = self.forward(X, keep_prob)

            # Backpropagation
            v_dWs, v_dbs = self.backward(X, expected_output, a_output, keep_prob, beta)

            # Update parameters of the model
            self.update_model_params(v_dWs, v_dbs, num_examples, reg_lambda)
            
            #record cost
            loss = self.calculate_loss(X, expected_output, 1, reg_lambda)
            
            losses.append(loss)
            
            accuracy = 1-(np.count_nonzero(self.predict(X) - y))/len(X)
            
            
            accuracys.append(accuracy)
            
            # test case
            if test:   
                if process is not None:
                    X_test = process_function(X_test)
                loss_test = self.calculate_loss(X_test, y_test, 1, reg_lambda)
                losses_test.append(loss_test)
                accuracy_test = 1-(np.count_nonzero(self.predict(X_test) - y_test))/len(X_test)
                accuracys_test.append(accuracy_test)
            
            # Optionally print the loss.
            # This is expensive because it uses the whole dataset, so we don't want to do it too often.
            if print_loss and i % 100 == 0:
                print("Loss after iteration %i: %f" % (i, loss))
        if test:
            return losses, accuracys, losses_test, accuracys_test
        else:
            return losses, accuracys

    # Helper function to evaluate the total loss on the dataset
    def calculate_loss(self, X, expected_output, keep_prob, reg_lambda):
        num_examples = len(X)  # training set size

        # Forward propagation to calculate our predictions
        a_output = self.forward(X, keep_prob)
        probs = a_output[-1]

        # Calculating the loss
        corect_logprobs = -np.log(probs[range(num_examples), expected_output])
        data_loss = np.sum(corect_logprobs)
        # Add regulatization term to loss (optional)
        for W in self.Ws:
            data_loss += reg_lambda / 2 * np.sum(np.square(W))
 
        return 1. / num_examples * data_loss

    # Forward propagation
    def forward(self, X, keep_prob):
        Ws = self.Ws
        bs = self.bs
        hidden_layer_num = len(Ws)
        a_output = [1] * hidden_layer_num
        current_input = X

        for i in range(0, hidden_layer_num - 1):
            w_current = Ws[i]
            b_current = bs[i]
            z_current = current_input.dot(w_current) + b_current
            a_current = self.activation(z_current)
            
            d = np.random.rand(a_current.shape[0],a_current.shape[1]) < keep_prob
            
            a_current = np.multiply(d, a_current)
            
            a_current /= keep_prob
            
            a_output[i] = a_current
            current_input = a_current
            
        keep_prob = 1

        #output layer(softmax)
        z_current = current_input.dot(Ws[hidden_layer_num - 1]) + bs[hidden_layer_num - 1]
        a_current = softmax(z_current)
        a_output[hidden_layer_num - 1] = a_current
        return a_output

    
    # Predict the result of classification of input x
    def predict(self, x):
        a_output = self.forward(x, keep_prob = 1)
        return np.argmax(a_output[-1], axis=1)

    # Backpropagation
    def backward(self, X, expected_output, a_output, keep_prob, beta):
        Ws = self.Ws
        bs = self.bs
        hidden_layer_num = len(Ws)
        
        num_examples = len(X)
        ds = [1] * hidden_layer_num
        

        # output layer
        d_current = a_output[hidden_layer_num - 1]
        

        d_current[range(num_examples), expected_output] -= 1
        

        ds[hidden_layer_num - 1] = d_current
        

        #other hidden layer
        for l in range(hidden_layer_num - 2, -1, -1):
            w_current = Ws[l + 1]
            a_current = a_output[l]
            d_current = np.dot(d_current, w_current.T) * (self.activation_deriv(a_current))
            ds[l] = d_current / keep_prob

        #calc dW && db
        dWs = [1] * hidden_layer_num
        dbs = [1] * hidden_layer_num
        
        v_dWs = [0] * hidden_layer_num
        v_dbs = [0] * hidden_layer_num
        
        
        a_last = X
        num_output = len(X)
        for l in range(0, hidden_layer_num):
            d_current = ds[l]
            dWs[l] = np.dot(a_last.T, d_current)
            dbs[l] = np.sum(d_current, axis=0, keepdims=True)
            
            v_dWs[l] = beta * v_dWs[l] + (1-beta) * dWs[l]
            v_dbs[l] = beta * v_dbs[l] + (1-beta) * dbs[l]
            
            a_last = a_output[l]
        return v_dWs, v_dbs

    # Update the params (Ws and bs) of the netword during Backpropagation
    def update_model_params(self, v_dWs, v_dbs, num_examples, reg_lambda):
        Ws = self.Ws
        bs = self.bs
        hidden_layer_num = len(Ws)
        for l in range(0, hidden_layer_num):
            Ws[l] = Ws[l] - self.epsilon * (v_dWs[l] + reg_lambda* Ws[l])/ num_examples
            bs[l] = bs[l] - self.epsilon * (v_dbs[l])/ num_examples

        self.Ws = Ws
        self.bs = bs

def softmax(x):
        exp_scores = np.exp(x)
        probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
        return probs
      
class Activation():
   
    def __tanh(self, x):
        return np.tanh(x)

    def __tanh_deriv(self, a):
        return 1-a**2

    def __relu(self, x):       
        return np.maximum(0,x)

    def __relu_deriv(self, a):
        a[a>0]=1
        a[a<=0]=0
        return a  
      
    def __init__(self,activation='relu'):
        if activation == 'relu':
            self.f = self.__relu
            self.f_deriv = self.__relu_deriv
        elif activation == 'tanh':
            self.f = self.__tanh
            self.f_deriv = self.__tanh_deriv

class Process():
    def __z_score(self, x):
        x = np.array(x).astype(float)
        xr = np.rollaxis(x, axis=1)
        xr -= np.mean(x, axis=1)
        xr /= np.std(x, axis=1)
        return x

    def __norm(self, x):
        x = np.array(x).astype(float)
        xr = np.rollaxis(x, axis=1)
        x_mean = np.mean(x, axis = 1)
        x_max = np.max(x, axis = 1)
        x_min = np.min(x, axis = 1)
        xr -= x_mean
        xr /=(x_max-x_min)
        return x
      
    def __init__(self, process='norm'):
        if process == 'norm':
            self.p = self.__norm
        elif process == 'z_score':
            self.p = self.__z_score
 

###import data

In [0]:
X_test= X[-10000:]
y_test= labels[-10000:]
X_train = X[:5000]
y_train = labels[:5000]

##current optimal config for tanh 50000

In [0]:
class Config_tanh:
    # Gradient descent parameters (I picked these by hand)
    epsilon = 0.1  # learning rate for gradient descent
    layers = [128,20,40,10] # number of nodes in each layer
    activation  = "tanh"

## current optimal config for relu 50000

In [0]:
class Config_relu:
    # Gradient descent parameters (I picked these by hand)
    epsilon = 0.075  # learning rate for gradient descent
    layers = [128,20,40,10] # number of nodes in each layer
    activation = "relu"

In [0]:
def plot_result():
    plt.title('losses_train, accuracys_train, losses_test, accuracys_test')
    
    plt.plot(losses, label = 'losses = %f' %(losses[-1]))
      
    
    plt.plot(losses_test, label = 'losses_test = %f' %(losses_test[-1]))
    
    plt.plot(accuracys, label = 'accuracys = %f' %(accuracys[-1]))
    
    plt.plot(accuracys_test, label = 'accuracys_test = %f' %(accuracys_test[-1]))
    
    plt.legend()
    plt.xlabel('epochs')

## main train process

In [164]:
import time
start_time = time.time()
losses, accuracys, losses_test, accuracys_test = [],[],[],[]
M1 = NNModel(Config_tanh.layers, Config_tanh.epsilon)
""", losses_test, accuracys_test"""
losses, accuracys  = (
    M1.train(X_train, y_train, num_passes=200, keep_prob = .8, beta = 0.8, reg_lambda = 0.01, process = "z_score", 
                print_loss=True, X_test = None, y_test = None))

print("--- %s seconds ---" % (time.time() - start_time))

#plot_result()


Loss after iteration 0: 2.976268
Loss after iteration 100: 1.481803
Loss after iteration 200: 1.121479
--- 10.890503168106079 seconds ---


### accuracy 

train accuracy

In [61]:
1-(np.count_nonzero(model.predict(X_train) - labels_train))/len(X_train)

0.5498000000000001

test accuracy

In [60]:
1-(np.count_nonzero(model.predict(X_predict) - labels_predict))/len(X_predict)

0.5375

## check the impact of activation

## check the impact of beta in sgd momentum

In [0]:
betas = np.linspace(0, 1, 20, endpoint=True)

losses_prob = []
accuracys_pred_prob= []
for k in betas:
    
    model = NNModel(Config.layers, Config.epsilon)
    
    losses_train, accuracys_train = model.train(X_train, labels_train, num_passes=500, keep_prob = 1, beta = k, print_loss=False)
    
    print("Train_Loss for keep_prob = %f: %f" % (k, losses_train[-1]))
    
    losses_prob.append(losses_train[-1])
    
    accuracys = 1- (np.count_nonzero(model.predict(X_predict) - labels_predict))/len(X_predict)
    
    print("Predict_accuracy for keep_prob = %f: %f" % (k, accuracys))
    
    accuracys_pred_prob.append(accuracys)
    

    

##check the impact of keep_prob

In [2]:
keep_probs = np.linspace(0.1, 1, 19, endpoint=True)

NameError: ignored

In [74]:
losses_prob = []
accuracys_pred_prob= []
for k in keep_probs:
    
    model = NNModel(Config.layers, Config.epsilon)
    
    losses_train, accuracys_train = model.train(X_train, labels_train, num_passes=5000, keep_prob =k, beta = 0.8, print_loss=False)
    
    print("Train_Loss for keep_prob = %f: %f" % (k, losses_train[-1]))
    
    losses_prob.append(losses_train[-1])
    
    accuracys = 1- (np.count_nonzero(model.predict(X_predict) - labels_predict))/len(X_predict)
    
    print("Predict_accuracy for keep_prob = %f: %f" % (k, accuracys))
    
    accuracys_pred_prob.append(accuracys)
    
plt.title('Train loss/ test accuracy for different beta')
plt.plot(betas, losses_prob, label = 'trian_loss')
plt.plot(betas, accuracys_pred_prob, label = 'test_accuracy')
plt.xlabel('keep_probs')    
plt.legend()   
    

Train_Loss for keep_prob = 0.100000: 2.005353
Predict_accuracy for keep_prob = 0.100000: 0.196900
Train_Loss for keep_prob = 0.150000: 1.807452
Predict_accuracy for keep_prob = 0.150000: 0.316600
Train_Loss for keep_prob = 0.200000: 1.680609
Predict_accuracy for keep_prob = 0.200000: 0.460800
Train_Loss for keep_prob = 0.250000: 1.573054
Predict_accuracy for keep_prob = 0.250000: 0.436500
Train_Loss for keep_prob = 0.300000: 1.466566
Predict_accuracy for keep_prob = 0.300000: 0.463900
Train_Loss for keep_prob = 0.350000: 1.343462
Predict_accuracy for keep_prob = 0.350000: 0.580800
Train_Loss for keep_prob = 0.400000: 1.269820
Predict_accuracy for keep_prob = 0.400000: 0.632200
Train_Loss for keep_prob = 0.450000: 1.174522
Predict_accuracy for keep_prob = 0.450000: 0.662900
Train_Loss for keep_prob = 0.500000: 1.098272
Predict_accuracy for keep_prob = 0.500000: 0.674000
Train_Loss for keep_prob = 0.550000: 1.018894
Predict_accuracy for keep_prob = 0.550000: 0.685200
Train_Loss for keep_

In [1]:
plt.title('Train loss/ test accuracy for different keep_prob')
plt.plot(keep_probs, losses_prob, label = 'trian_loss')
plt.plot(keep_probs, accuracys_pred_prob, label = 'test_accuracy')
plt.xlabel('keep_probs')
plt.legend()

NameError: ignored

### upload from google drive

In [0]:
# Install a Drive FUSE wrapper.

# https://github.com/astrada/google-drive-ocamlfuse

!apt-get install -y -qq software-properties-common python-software-properties module-init-tools

!add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null

!apt-get update -qq 2>&1 > /dev/null

!apt-get -y install -qq google-drive-ocamlfuse fuse

In [0]:
# Generate auth tokens for Colab

from google.colab import auth 
auth.authenticate_user()

In [0]:
# Generate creds for the Drive FUSE library.

from oauth2client.client import GoogleCredentials 
creds = GoogleCredentials.get_application_default()

import getpass

!google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL

vcode = getpass.getpass()

!echo {vcode} | google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret}

In [0]:
# Create a directory and mount Google Drive using that directory.

!mkdir -p drive

!google-drive-ocamlfuse drive

In [0]:
with h5py.File('drive/Colab Notebooks/deep_data_ass_1/train_128.h5','r') as H:
    X = np.copy(H['data'])

with h5py.File('drive/Colab Notebooks/deep_data_ass_1/train_label.h5','r') as H:
    labels = np.copy(H['label'])
