In [1]:
# import necessary modules
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
# import csv data
df = pd.read_csv('./data.csv')
# reconstruct the 'diagnosis' columns
df['diagnosis'] = df['diagnosis'].replace({'M':1, 'B':-1})
# drop unncecessary columns, and rows having NaN values
df = df.drop(columns = ['id', 'Unnamed: 32'])
df = df.dropna()
df

Unnamed: 0,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,1,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,0.2419,...,25.380,17.33,184.60,2019.0,0.16220,0.66560,0.7119,0.2654,0.4601,0.11890
1,1,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,0.1812,...,24.990,23.41,158.80,1956.0,0.12380,0.18660,0.2416,0.1860,0.2750,0.08902
2,1,19.69,21.25,130.00,1203.0,0.10960,0.15990,0.19740,0.12790,0.2069,...,23.570,25.53,152.50,1709.0,0.14440,0.42450,0.4504,0.2430,0.3613,0.08758
3,1,11.42,20.38,77.58,386.1,0.14250,0.28390,0.24140,0.10520,0.2597,...,14.910,26.50,98.87,567.7,0.20980,0.86630,0.6869,0.2575,0.6638,0.17300
4,1,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,0.1809,...,22.540,16.67,152.20,1575.0,0.13740,0.20500,0.4000,0.1625,0.2364,0.07678
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,1,21.56,22.39,142.00,1479.0,0.11100,0.11590,0.24390,0.13890,0.1726,...,25.450,26.40,166.10,2027.0,0.14100,0.21130,0.4107,0.2216,0.2060,0.07115
565,1,20.13,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,0.1752,...,23.690,38.25,155.00,1731.0,0.11660,0.19220,0.3215,0.1628,0.2572,0.06637
566,1,16.60,28.08,108.30,858.1,0.08455,0.10230,0.09251,0.05302,0.1590,...,18.980,34.12,126.70,1124.0,0.11390,0.30940,0.3403,0.1418,0.2218,0.07820
567,1,20.60,29.33,140.10,1265.0,0.11780,0.27700,0.35140,0.15200,0.2397,...,25.740,39.42,184.60,1821.0,0.16500,0.86810,0.9387,0.2650,0.4087,0.12400


In [3]:
# creating X(parameters for datasets) and Y(target variable)
req_cols = list(df.columns)
req_cols.remove('diagnosis')
X = df[req_cols]
Y = df['diagnosis']

In [4]:
# data normalization
means = X.mean()
stds = X.std()

for col in X.columns:
    X[col] = (X[col] - means[col]) / (stds[col])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [5]:
# creating test_train split of 85:15 ratio
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.15)

In [6]:
# convert into numpy arrays and reshape accordingly
X_train = np.array(X_train)
X_train = X_train.T
X_test = np.array(X_test)
X_test = X_test.T

Y_train = np.array(Y_train)
Y_train = Y_train.reshape(1, Y_train.shape[0])
Y_test = np.array(Y_test)
Y_test = Y_test.reshape(1, Y_test.shape[0])

In [7]:
# activation function definitions
def activation(act, z):
    if act == 'sigmoid':
        return 1/(1 + np.exp(-z))
    elif act == 'tanh':
        return (np.exp(2*z) - 1)/(np.exp(2*z) + 1)

In [8]:
# forward propagation for each step
def forward_prop(W, X, b, act):
    
    Z = np.dot(W, X) + b
    A = activation(act, Z)
    
    return Z, A

In [9]:
# definitions of differential of activation functions
def dif_activation(z, act):
    if act == 'sigmoid':
        return activation(act, z)*activation(act, -z)
    elif act == 'tanh':
        return 4 / (np.exp(z) + np.exp(-z))**2

In [10]:
# overall backpropagation
def back_prop(X, Y, samples, cache, act):
    
#     get variables from 'cache' dictionary
    m = samples
    A2 = cache['A2']
    A1 = cache['A1']
    W2 = cache['W2']
    Z1 = cache['Z1']
    A3 = cache['A3']
    W3 = cache['W3']
    Z2 = cache['Z2']
    
    
#     make necessary corrections in each layer
    dZ3 = A3 - Y
    dW3 = (1/m) * np.dot(dZ3, A2.T)
    db3 = (1/m) * np.sum(dZ3, axis=1, keepdims=True)
    
    dZ2 = np.multiply(np.dot(W3.T, dZ3), dif_activation(Z2, act))
    dW2 = (1/m) * np.dot(dZ2, A1.T)
    db2 = (1/m) * np.sum(dZ2, axis=1, keepdims=True)
    
    dZ1 = np.multiply(np.dot(W2.T, dZ2), dif_activation(Z1, act))
    dW1 = (1/m) * np.dot(dZ1, X.T)
    db1 = (1/m) * np.sum(dZ1, axis=1, keepdims=True)
    
    
#     store the values back in 'cache' dictionary
    cache['dZ1'] = dZ1
    cache['dZ2'] = dZ2
    cache['dZ3'] = dZ3
    cache['db1'] = db1
    cache['db2'] = db2
    cache['db3'] = db3
    cache['dW1'] = dW1
    cache['dW2'] = dW2
    cache['dW3'] = dW3
    
    return cache

In [11]:
# initialization of weights before gradient descent starts
def get_cache(layer, samples):
    
    cache = {}
    
    cache['W1'] = np.random.rand(layer[1], layer[0])
    cache['W2'] = np.random.rand(layer[2], layer[1])
    cache['W3'] = np.random.rand(layer[3], layer[2])
    cache['b1'] = np.random.rand(layer[1], 1)
    cache['b2'] = np.random.rand(layer[2], 1)
    cache['b3'] = np.random.rand(layer[3], 1)

    return cache

In [12]:
# overall forward and backward propagation(all steps included)
def nn_forward_and_backward(X, Y, cache, gamma, learning_rate, act, samples):
    
#     getting variables from 'cache'
    m = X.shape[1]
    
    W1 = cache['W1']
    W2 = cache['W2']
    W3 = cache['W3']
    b1 = cache['b1']
    b2 = cache['b2']
    b3 = cache['b3']

#     forward prop
    Z1, A1 = forward_prop(W1, X, b1, act)
    Z2, A2 = forward_prop(W2, A1, b2, act)
    Z3, A3 = forward_prop(W3, A2, b3, act)
    
#     storing back values in cache
    cache['Z1'] = Z1
    cache['Z2'] = Z2
    cache['Z3'] = Z3
    cache['A1'] = A1
    cache['A2'] = A2
    cache['A3'] = A3
    
#     back propagation
    cache = back_prop(X, Y, samples, cache, act)
    
#     note that L2 regularization is used
    cache['W1'] = (1 - gamma*learning_rate/m)*cache['W1'] - learning_rate*cache['dW1']
    cache['W2'] = (1 - gamma*learning_rate/m)*cache['W2'] - learning_rate*cache['dW2']
    cache['W3'] = (1 - gamma*learning_rate/m)*cache['W3'] - learning_rate*cache['dW3']
    cache['b1'] -= learning_rate*cache['db1']
    cache['b2'] -= learning_rate*cache['db2']
    cache['b3'] -= learning_rate*cache['db3']
    
    return cache

In [13]:
# calculating accuracy
def find_accuracy(cache, X, Y):
    
    A = predict(cache, X, Y)
    m = A.shape[1]
    true = 0
    
    for i in range(m):
        if A[:, i] == Y[:, i]:
            true += 1
            
    return (true / m)*100    

In [14]:
# prediction
def predict(cache, X, Y):
    
    W1 = cache['W1']
    W2 = cache['W2']
    W3 = cache['W3']
    b1 = cache['b1']
    b2 = cache['b2']
    b3 = cache['b3']
    
    Z1, A1 = forward_prop(W1, X, b1, act)
    Z2, A2 = forward_prop(W2, A1, b2, act)
    Z3, A3 = forward_prop(W3, A2, b3, act)
    m = A3.shape[1]
    
    for i in range(m):
        if A3[:, i] >= 0:
            A3[:, i] = 1
        else:
            A3[:, i] = -1
        
    return A3

In [15]:
# initialize hyperparameters

learning_rate = 0.001
gamma = 0.05
iterations = 25000
samples = X_train.shape[1]
act = 'tanh'
trials = 10

In [16]:
# initialize train_test accuracy lists and caches list
train = []
test = []
caches = []

# layer structure of NN
layer = [30, 40, 15, 1]

print("Training started...")

# model training through declared number of trials, whose avg will be the final accuracy
for i in range(trials):
    
    print("Training - " + str(i) + "....") 
    final_cache = get_cache(layer, samples)
    print("Cache initialized...now training")

#     each iteration
    for j in range(iterations):
        
        final_cache = nn_forward_and_backward(X_train, Y_train, final_cache, gamma, learning_rate, act, samples)
    
#     accuracies are found and stored in lists
    tsac = find_accuracy(final_cache, X_test, Y_test)
    trac = find_accuracy(final_cache, X_train, Y_train)
    train.append(trac)
    test.append(tsac)
    caches.append(final_cache)
    
    print("Done")

Training started...
Training - 0....
Cache initialized...now training
Done
Training - 1....
Cache initialized...now training
Done
Training - 2....
Cache initialized...now training
Done
Training - 3....
Cache initialized...now training
Done
Training - 4....
Cache initialized...now training
Done
Training - 5....
Cache initialized...now training
Done
Training - 6....
Cache initialized...now training
Done
Training - 7....
Cache initialized...now training
Done
Training - 8....
Cache initialized...now training
Done
Training - 9....
Cache initialized...now training
Done


In [17]:
# calculating mean accuracies
f_tr_mean = sum(train)/len(train)
f_ts_mean = sum(test)/len(test)

In [18]:
print("Training accuracy: " + str(f_tr_mean))
print("Testing accuracy: " + str(f_ts_mean))

Training accuracy: 89.93788819875778
Testing accuracy: 88.25581395348838
