In [None]:
#Note: This is a visualized version of neural networks with multiple layers lib.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
#This is a implementation and library for binary classification, a.k.a logistic regression

In [None]:
#load iris dataset for debugging
from sklearn import datasets
dataset = datasets.load_breast_cancer()
#print(dataset)
#print(dataset['data'])    #input value of dataset
#print(dataset['target'])   #True label of iris datasets

In [None]:
''' 
In order to implement neural networks, which turns out that we are using multiple computing units(e.g. sigmoid or relu), 
it is required to initialize the parameter w randomly, whileb can be set to all zeros.
'''

# nn_structure is a tuple, indicating the number of layers and units in each layer.
# for example, nn_structure = [4,4,3,1] indicates a neural network with 1 input layer, 2 hidden layers and one output unit.
def nn_parameter_initialize(nn_structure):
    # x_dim indicates the dimensions of input feature,a bias unit b is defaultly set.
    parameter = {}
    for i in range(1,len(nn_structure)):
        parameter['w'+str(i)] = np.random.randn(nn_structure[i],nn_structure[i-1])*0.01 
        parameter['b'+str(i)] = np.zeros((nn_structure[i],1))
    return parameter

# Let's check whether the function is working properly or not
test = nn_parameter_initialize([4,3,2,1])
print(test)

In [None]:
#implementation of sigmoid function
def sigmoid(z):
    value = 1/(1+np.exp(-z))
    return value

def relu(z):
    value = np.maximum(0,z)
    return value

def derivative_of_activation(a,activation='sigmoid'):
    derivative = 0
    shape = a.shape
    if(activation == 'sigmoid'):
        derivative = sigmoid(a)*(1-sigmoid(a))
    elif(activation == 'relu'):
        derivative = a>0
        derivative = derivative.astype(np.int32)
    return derivative.reshape(shape)

#plot functions' shape
plt.figure(figsize=(8,8))

plt.subplot(221)
plt.title('Sigmoid function\'s shape')
z = np.linspace(-10,10)
plt.plot(z,sigmoid(z))

plt.subplot(222)
plt.title('ReLu function\'s shape')
z = np.linspace(-10,10)
plt.plot(z,relu(z))

plt.subplot(223)
plt.title('Sigmoid function\'s derivative \'s shape')
z = np.linspace(-10,10)
plt.plot(z,derivative_of_activation(z))

plt.subplot(224)
plt.title('ReLu\'s derivative \'s shape')
z = np.linspace(-10,10)
plt.plot(z,derivative_of_activation(z,activation='relu'))

In [None]:
# note the dimensions of vectors:
# w - (number_of_units_in_current_layer,number_of_units_in_previous_layer)
# b - (number_of_units_in_current_layer,1) *broadcasted*
# X - (x_dims,m)                           *m=number of samples

'''
Note that in neural networks with multiple layers, when doing forward prop, we need to cache some values for back prop step.
For example:
    when doing forward prop, we calculate z = np.dot(w,X)+b ,and then output a = g(z), when function g can be relu or sigmoid.
    when doing back prop, we calculate dz = da*g'(z), where g'(z) is the derivative of g(z)
    Therefore we can just cache z to save some time for calculating z multiple times.
    
In a nutshell, what we can cache in forward prop are: z,a,w
'''

# Forward propagation step: compute the predicted y's label
def linear_forward_prop(w,b,X):
    z = np.dot(w,X)+b
    linear_cache = w
    return z,linear_cache

def single_layer_forward_prop(z,activation='relu'):
    if(activation == 'relu'):
        a = relu(z)
    elif(activation == 'sigmoid'):
        a = sigmoid(z)
    activation_cache = a,z
    return a,activation_cache

def L_layer_forward_prop(X,parameters,L):
    A = {0:X}
    cache = {}  
    for i in range(L-1):
        w = parameters['w'+str(i+1)]
        b = parameters['b'+str(i+1)]
        z,linear_cache = linear_forward_prop(w,b,A[i])
        a,activation_cache = single_layer_forward_prop(z,activation='relu')
        A[i+1] = a
        cache['layer_'+str(i+1)] = linear_cache,activation_cache
    w = parameters['w'+str(L)]
    b = parameters['b'+str(L)]    
    z,linear_cache = linear_forward_prop(w,b,A[L-1])
    yhat,activation_cache = single_layer_forward_prop(z,activation='sigmoid')
    cache['layer_'+str(L)] = linear_cache,activation_cache
    return yhat,cache

In [None]:
# Compute cost function: used to check convergence
def compute_cost(yhat,y):
    m = yhat.shape[1]
    cost = -np.sum(y*np.log(yhat)+(1-y)*np.log(1-yhat))/m
    return cost

In [None]:
# Back propagation step: compute partial derivatives of each parameter respectively
def linear_back_prop(m,w,a_previous,dz_l):
    #print('w_shape: ',w.shape)
    #print('a dimension: ',a_previous.shape)
    #print('dz\'s shape: ',dz_l.shape)
    dw_l = np.dot(a_previous,dz_l.T).T/m
    #print('dw\'s dimension: ',dw_l.shape)
    #Make sure the dimensions corresponds.
    assert(dw_l.shape == w.shape)
    
    db = np.sum(dz_l,axis=1,keepdims=True)/m
    da_previous = np.dot(w.T,dz_l)
    assert(da_previous.shape == a_previous.shape)
    return dw_l,db,da_previous
 # Note: dw should have the same dimension as w have.Therefore back_prop returns dw.T
    
def single_layer_back_prop(da_l,z_l,activation):
    derivative = derivative_of_activation(z_l,activation)
    dz_l = da_l * derivative
    assert(dz_l.shape == z_l.shape)
    return dz_l

def L_layer_back_prop(m,X,y,cache_from_forward):
    dW = {}
    db = {}
    dA = {}
    W = {}
    A = {0:X}
    Z = {}
    L=len(cache_from_forward)
    
    for layer in range(1,L+1):
        linear_cache_l,activation_cache_l = cache_from_forward['layer_'+str(layer)]
        W[layer] = linear_cache_l
        A[layer],Z[layer] = activation_cache_l
    #Initialize the output layer
    yhat = A[L]
    #print(yhat)
    dA[L] = -np.divide(y,yhat)+np.divide((1-y),(1-yhat))
    dz_L = single_layer_back_prop(dA[L],Z[L],activation='sigmoid')
    dW[L],db[L],dA[L-1] = linear_back_prop(m,W[L],A[L-1],dz_L)
    for i in reversed(range(1,L)):
        dz_l = single_layer_back_prop(dA[i],Z[i],activation='relu')
        dW[i],db[i],dA[i-1] = linear_back_prop(m,W[i],A[i-1],dz_l)
    return dW,db

In [None]:
# The overall implementation of trainning a logistic regression
# Note: net_structure indicates the shape of hidden layers and output layers. No input layer should be included.

def train_neural_network(X,y,net_structure,number_of_iteration = 1000,learning_rate = 0.03,print_cost = True,plot_cost = True):
    # Dimension convert: make sure all vectors are in proper shapes.
    y = y.reshape(1,-1)   # y is a row vector
    m = y.shape[1]  #  m = total number of trainning examples
    X = X.reshape(-1,m)
    print('*******Dimension Check*******')
    print('Input feature\'s dimension: ',X.shape)
    print('Output\'s dimension: ',y.shape)
    print('*****************************')
    x_dim = X.shape[0]
    L = len(net_structure) # number of layers
    # Initialize parameters
    nn_structure = [x_dim]+net_structure
    params = nn_parameter_initialize(nn_structure)

    if(plot_cost == True):
        i_curve = []
        cost_curve = []
        plt.figure(figsize=(5,5))
        plt.title('Cross entrophy of regression')
    
    cache={}
    for i in range(1,number_of_iteration+1):
            # Steps:
                # 1:Forward propagation
                # 2:Compute cost
                # 3:Backward Propagation
                # 4:Update parameters
                
        yhat,cache = L_layer_forward_prop(X,params,L)
        cost = compute_cost(yhat,y)
        dW,dB = L_layer_back_prop(m,X,y,cache)
        #print('Iteration {}'.format(i))    
        #Gradient decent
        for j in range(1,L+1):
            params['w'+str(j)] = params['w'+str(j)] - learning_rate*dW[j]
            params['b'+str(j)] = params['b'+str(j)] - learning_rate*dB[j]
        
        # Visualize the process of regression
        if(i%100 == 0 and print_cost == True):
            print('number of iterations:{}, cost = {}'.format(i,cost))
        if(i%100 == 0 and plot_cost == True):
            i_curve.append(i)
            cost_curve.append(cost)
            
    if(plot_cost==True):        
        i_curve = np.reshape(i_curve,(1,-1))
        cost_curve = np.reshape(cost_curve,(1,-1))
        plt.scatter(i_curve,cost_curve)
    
    return params

In [None]:
#After training the unit, we can now use it to make predictions.
def nn_predict(parameters,X,y=0,evaluate = True):
    L = len(params) // 2
    y = y.reshape(1,-1)   # y is a row vector
    m = y.shape[1]  #  m = total number of trainning examples
    X = X.reshape(-1,m)
    yhat,cache = L_layer_forward_prop(X,parameters,L)
    yhat = yhat>0.5
    #Codes below is used to evaluate the performance of logistic regression on given dataset X with label y
    #You can just ignore this part
    if(evaluate == True):
        y=y.reshape(1,-1)
        train_accuracy = np.sum(yhat==y)/y.shape[1]
        print('accuracy = %.2f\n'%train_accuracy)
    return yhat

In [None]:
print("Goal:Wanna classify whether our patient's breast cancer is {}(0) or {}(1)".format(dataset.target_names[0],dataset.target_names[1]))
y = dataset['target']
#Normalize input feature X
X = dataset['data']
X_norm = np.linalg.norm(X,axis=0,keepdims=True)
X = (X-X_norm)/ (np.max(X)-np.min(X))
#Split up dataset in order to train as well as test the model
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=1)

In [None]:
# Train the logistic unit
params = train_neural_network(X_train,y_train,[3,5,1],number_of_iteration = 100000,learning_rate = 0.1,print_cost = False,plot_cost = True)

In [None]:
# Evaluate the performance of the unit on training set and test set
print('Training accuracy:')
Yhat = nn_predict(params,X_train,y_train,evaluate = True)
print('Accuracy in test sets:')
Ypredict = nn_predict(params,X_test,y_test,evaluate = True)

'''
#Okay, we have built our own logistic regression unit. Let's compare our unit with sklearn's! 
model=LogisticRegression(solver='liblinear')#Build a logistic regression model
model=model.fit(X_train,y_train)#Train the model
train_score=model.score(X_train,y_train)#How many samples can the model predict right? 
print('sklearn\'s logistic regression training accuracy:')
print('%.2f'%train_score)
'''