## Import Libraries

In [26]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Activation Functions

In [27]:
class Activations:
    @staticmethod
    def step(x):
        if x < 0:
            return 0
        return 1
    
    @staticmethod
    def sigmoid(x):
        return 1 / (1+np.exp(-x))

    @staticmethod
    def tanh(x):
        return (2 / (1+np.exp(-2*x))) - 1

    @staticmethod
    def relu(x):
        print("x relu: ", x)
        return np.max(0, x)

    @staticmethod
    def leaky_relu(x):
        if x>=0:
            return x
        return 0.3*x
    
    @staticmethod
    def elu(x, alpha=1):
        if x>=0:
            return x
        return alpha*(np.exp(x)-1)
    
    @staticmethod
    def binary(x):
        if x>= 0.5:
            return 1
        return 0

## Activation Function derivates to compute gradients

In [28]:
class ActivationDerivatives:
    @staticmethod
    def step_deriv(x):
        return 0

    @staticmethod
    def sigmoid_deriv(x):
        sigmoid = Activations.sigmoid(x)
        return sigmoid*(1-sigmoid)

    @staticmethod
    def tanh_deriv(x):
        return 1 - np.sqrt(Activations.tanh(x))

    @staticmethod
    def relu_deriv(x):
        if x<0:
            return 0
        return 1

    @staticmethod
    def leaky_relu_deriv(x):
        if x>=0:
            return 1
        return 0.3

    @staticmethod
    def elu_deriv(x, alpha=1):
        if x<0:
            return Activations.elu(x)+alpha
        return 1

# Losses

In [29]:
class Loss:
    @staticmethod
    # Regression Mean Squared Error
    def mse(targets, predictions):
        differences_squared = (predictions-targets)**2
        return np.sum(differences_squared)
    
    @staticmethod
    # Binary Cross Entropy
    def binary_cross_entropy(y_target, prob):
        return -(y_target*np.log(prob) + (1-y_target)*np.log(1-prob))
    
    @staticmethod
    def mse_prime(y_target, prediction):
        return prediction-y_target

# Build ANN

## Build Layers

In [30]:
class Dense():
    def __init__(self, units, activation, input_dim = None):
        available_activations = ['step', 'sigmoid', 'tanh', 'relu', 'leaky_relu', 'elu']
        activations = [Activations.step, Activations.sigmoid, Activations.tanh, Activations.relu, Activations.leaky_relu, Activations.elu]
        activation_derivs = [ActivationDerivatives.step_deriv, ActivationDerivatives.sigmoid_deriv, ActivationDerivatives.tanh_deriv, ActivationDerivatives.relu_deriv, ActivationDerivatives.leaky_relu_deriv, ActivationDerivatives.elu_deriv]
        index = available_activations.index(activation)
        if not index:
            print("Error. Activation not found, list of available activations are: ", available_activations)
            return
        self.units = units
        self.input_dim = input_dim
        self.activation = activations[index]
        self.activation_deriv = activation_derivs[index]
        
        self.input = []
        self.activations = []
        
    def compile_layer(self, input_dim=None):
        if (self.input_dim != None):
            input_dim = self.input_dim
        self.weights = np.random.rand(input_dim, self.units) # initialize weights
        self.b = np.random.rand()
        return self.units
        
    def propagate(self, x):
        z = np.dot(x, self.weights) + self.b
        self.input = z
        self.activations = self.activation(z)
        return self.activations
    
    def backpropagate(self, local_gradient, learning_rate):
        errors = np.multiply(local_gradient, self.activations)
        layer_gradient = np.array([self.activation_deriv(a) for a in self.activations])*errors
        for gradient in layer_gradient:
            self.b -= gradient
        for index in range(self.weights.shape[0]):
            self.weights[index] -= (learning_rate * layer_gradient[index] * self.input[index])
        return layer_gradient

In [31]:
class Model():
    def __init__(self, lr=0.01):
        self.layers = []
        self.loss = Loss.mse_prime
        self.learning_rate = lr
        
    def compile_layers(self):
        input_dim = self.layers[0].compile_layer()
        for i in range(1, len(self.layers)):
            input_dim = self.layers[i].compile_layer(input_dim)
    
    def fit(self, x, y, epochs=20, batch_size=4):
        dataset_size=x.shape[0]
        if batch_size>dataset_size:
            print("Error. Batch size cannot be greater than dataset size")
            return
        for epoch in range(epochs):
            # assume batch size is 1 for now
            batch_start = 0
            batch_end = batch_size
            while batch_end < dataset_size:
                if (batch_end > dataset_size):
                    batch_end = dataset_size
                x_sample = x[batch_start:batch_end]
                y_batch = y[batch_start:batch_end]
                predictions = self.propagate(x_sample)
                self.backpropagate(y_batch, predictions)
                batch_start+=batch_size
                batch_end+=batch_size
    
    def predict(self, x):
        predictions = []
        for x_sample in x:
            predictions.append(self.propagate(x_sample))
        return predictions
    
    def add(self, layer):
        self.layers.append(layer)
    
    def propagate(self, x_batch):
        for i in range(len(self.layers)):
            x_batch = np.array(self.layers[i].propagate(x_batch))
        prediction = x_batch
        return prediction
    
    def backpropagate(self, y_target, prediction_values):
        output_layer = self.layers[-1]
        errors = self.loss(y_target, prediction_values)
        local_gradient = np.array([output_layer.activation_deriv(a) for a in output_layer.activations])*errors
        output_layer.weights -= self.learning_rate*np.dot(output_layer.input.T, local_gradient)
        for gradient in local_gradient:
            output_layer.b -= gradient
        for i in range(len(self.layers)-2, -1, -1):
            self.layers[i].backpropagate(local_gradient, self.learning_rate)
            
    
    def summary(self):
        pass


# Set up Data 

In [32]:
breast_cancer_data = pd.read_csv('Data/breast_cancer_diagnosis_data.csv')
breast_cancer_data.drop(['id'], axis=1, inplace=True)
breast_cancer_data.dropna(axis='columns', inplace=True)
breast_cancer_data.head()

Unnamed: 0,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [33]:
np.unique(breast_cancer_data['diagnosis']) #Malignant or Benign
def diagnosis_to_binary(diagnosis):
    if diagnosis=='B':
        return 0
    elif diagnosis=='M':
        return 1
    
breast_cancer_data['diagnosis'] = breast_cancer_data['diagnosis'].apply(diagnosis_to_binary)

## Feature Selection

In [34]:
def standard_units(arr):
    return (arr-np.mean(arr))/np.std(arr)
                   
def correlation(a, b):
    return np.mean(standard_units(a)*standard_units(b))

In [35]:
def correlations(df, y_feature):
    feature_correlations = {}
    y = df[y_feature].values
    df_copy = df.drop([y_feature], axis = 1)
    print("Explore Correlation between features and " + str(y_feature))
    for column in df_copy:
        corr = correlation(df[column].values, y)
        feature_correlations[column] = corr
        print(str(column)+" Correlation: " + str(corr))
    return feature_correlations

In [36]:
feature_correlations = correlations(breast_cancer_data, 'diagnosis')

Explore Correlation between features and diagnosis
radius_mean Correlation: 0.7300285113754565
texture_mean Correlation: 0.4151852998452045
perimeter_mean Correlation: 0.7426355297258332
area_mean Correlation: 0.7089838365853901
smoothness_mean Correlation: 0.3585599650859321
compactness_mean Correlation: 0.5965336775082535
concavity_mean Correlation: 0.6963597071719059
concave points_mean Correlation: 0.7766138400204355
symmetry_mean Correlation: 0.33049855426254715
fractal_dimension_mean Correlation: -0.012837602698432387
radius_se Correlation: 0.5671338208247177
texture_se Correlation: -0.008303332973877421
perimeter_se Correlation: 0.5561407034314833
area_se Correlation: 0.5482359402780244
smoothness_se Correlation: -0.06701601057948733
compactness_se Correlation: 0.2929992442488584
concavity_se Correlation: 0.25372976598083036
concave points_se Correlation: 0.40804233271650475
symmetry_se Correlation: -0.006521755870647961
fractal_dimension_se Correlation: 0.07797241739025616
radi

In [37]:
def pick_correlations(feature_correlations, threshold=0.8):
    correlations_chosen = []
    for feature in feature_correlations:
        if feature_correlations[feature] >= threshold:
            correlations_chosen.append(feature)
    return correlations_chosen

In [38]:
# choose features with correlation higher than 0.75
columns_to_keep = pick_correlations(feature_correlations, threshold=0.73)

In [39]:
breast_cancer_data_diagnosis = breast_cancer_data[['diagnosis']]
breast_cancer_data_features = breast_cancer_data[columns_to_keep]
breast_cancer_data_features.head()

Unnamed: 0,radius_mean,perimeter_mean,concave points_mean,radius_worst,perimeter_worst,area_worst,concave points_worst
0,17.99,122.8,0.1471,25.38,184.6,2019.0,0.2654
1,20.57,132.9,0.07017,24.99,158.8,1956.0,0.186
2,19.69,130.0,0.1279,23.57,152.5,1709.0,0.243
3,11.42,77.58,0.1052,14.91,98.87,567.7,0.2575
4,20.29,135.1,0.1043,22.54,152.2,1575.0,0.1625


## Split Tran and Test

In [40]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(breast_cancer_data_features, breast_cancer_data_diagnosis, test_size=0.33, random_state=42)

In [41]:
from sklearn.preprocessing import StandardScaler

# make sure they are in numpy form
x_train = np.array(x_train.values, dtype=np.float32)
x_test = np.array(x_test.values, dtype=np.float32)
y_train = np.array(y_train.values, dtype=np.uint8)
y_test = np.array(y_test.values, dtype=np.uint8)

# scale data
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

print("x train samples: ", x_train[:5])
print("x test samples: ", x_test[:5])
print("y train samples: ", y_train[:5])
print("y test samples: ", y_test[:5])

x train samples:  [[ 0.35581988  0.4130312   1.5938771   0.5110286   0.52179366  0.37393498
   1.0565228 ]
 [-0.37326664 -0.39470002 -0.7896234  -0.39223224 -0.473485   -0.4140696
  -0.8966741 ]
 [ 1.4033579   1.3479992   1.0764874   2.0233212   1.8669261   2.1755323
   1.4635353 ]
 [-0.47941723 -0.48331875 -0.6581813  -0.46218845 -0.4379925  -0.48057154
  -0.13676336]
 [-0.48221073 -0.48413163 -0.46704042 -0.585641   -0.5778744  -0.57730156
  -0.5846312 ]]
x test samples:  [[-0.47941723 -0.45730227 -0.27799425 -0.27495223 -0.34165588 -0.3586225
  -0.19534856]
 [ 1.3279356   1.2707627   0.8033918   1.7599561   1.7416584   1.6936095
   0.9979379 ]
 [ 0.35581988  0.38051045  0.8390016   0.6077329   0.51881117  0.46721038
   0.5739667 ]
 [-0.49897146 -0.44185477 -0.5455915  -0.7049784  -0.5337325  -0.63862157
  -0.6239449 ]
 [-0.73920673 -0.7190931  -0.5992681  -0.83254594 -0.8567432  -0.73880625
  -0.69363046]]
y train samples:  [[1]
 [0]
 [1]
 [0]
 [0]]
y test samples:  [[0]
 [1]
 [1]
 

# Train Model

In [42]:
# create model
model = Model(lr=0.008)

# add Hidden Layer Layer
hl1 = Dense(units=15, activation='sigmoid', input_dim=len(x_train[0]))
model.add(hl1)

# add Output Layer
output_layer = Dense(units=1, activation='sigmoid')
model.add(output_layer)

model.compile_layers()

In [43]:
model.layers[0].weights

array([[6.49946934e-01, 3.83083253e-01, 4.96963092e-01, 7.22959043e-01,
        7.46097325e-01, 6.46601287e-01, 6.38374311e-02, 9.20810101e-01,
        3.71371037e-01, 8.71522182e-01, 7.63219545e-01, 6.86620642e-01,
        9.79498953e-01, 6.27032544e-01, 8.72991495e-01],
       [3.66896564e-01, 2.96763845e-01, 3.95370555e-01, 7.46813966e-01,
        7.46002023e-01, 2.74893541e-01, 4.14087121e-01, 1.59811151e-01,
        5.39090465e-01, 3.22391569e-01, 7.54493859e-01, 7.12911118e-01,
        2.10304315e-01, 5.29340086e-01, 4.00628274e-01],
       [6.87753587e-01, 8.59222141e-01, 5.98151008e-01, 7.60902535e-01,
        4.06924324e-02, 1.17767336e-01, 2.61241875e-01, 4.22213486e-01,
        1.73697005e-02, 1.86932808e-01, 5.46663644e-02, 2.55909133e-02,
        9.46897293e-01, 4.59072435e-01, 8.38981689e-01],
       [8.58271933e-04, 4.36504115e-01, 6.28395019e-01, 5.75310573e-01,
        8.86187506e-02, 8.41143846e-01, 2.68053036e-01, 5.19463441e-01,
        9.93037266e-01, 9.61239163e-0

In [44]:
model.layers[1].weights

array([[0.32228455],
       [0.7767414 ],
       [0.87020354],
       [0.62531461],
       [0.8041837 ],
       [0.69101008],
       [0.94029136],
       [0.46389576],
       [0.21918817],
       [0.28051403],
       [0.07688939],
       [0.6027361 ],
       [0.09972335],
       [0.0162104 ],
       [0.02695112]])

In [45]:
model.fit(x_train, y_train, epochs=80, batch_size=8)

In [46]:
model.layers[0].weights

array([[0.90535653, 0.53450388, 0.71334309, 0.89823979, 0.88470156,
        0.69176437, 0.22789929, 1.05677476, 0.43303345, 0.92951317,
        0.88055074, 0.75664973, 1.20691138, 0.68998004, 0.98701275],
       [0.34670902, 0.273644  , 0.36895109, 0.74529494, 0.74230182,
        0.29739661, 0.39623547, 0.14945696, 0.57720696, 0.35569632,
        0.77022465, 0.74637004, 0.18229473, 0.53635963, 0.4133919 ],
       [0.7361295 , 0.8787511 , 0.63253811, 0.79342285, 0.06794342,
        0.13580304, 0.30108923, 0.44525665, 0.04278897, 0.20877223,
        0.08599656, 0.05046241, 0.97363211, 0.47080283, 0.86414089],
       [0.08396201, 0.46064684, 0.68515619, 0.59767005, 0.10454228,
        0.8218305 , 0.28910279, 0.52752651, 0.97763185, 0.94880955,
        0.56693461, 0.97822475, 0.28553502, 0.65497387, 0.41645842],
       [0.65751811, 0.78956805, 0.08043323, 0.83151041, 0.47621228,
        0.48214731, 0.80734879, 0.24071722, 0.96025007, 0.58516014,
        0.30653039, 1.02190544, 0.11422849, 

In [47]:
model.layers[1].weights

array([[0.61076125],
       [1.0652181 ],
       [1.15868024],
       [0.91379131],
       [1.0926604 ],
       [0.97948678],
       [1.22876806],
       [0.75237246],
       [0.50766487],
       [0.56899073],
       [0.36536609],
       [0.8912128 ],
       [0.38820006],
       [0.3046871 ],
       [0.31542782]])

# View Results on Test Set

In [48]:
test_predictions = model.predict(x_test)
test_predictions = np.array([1 if x>0.5 else 0 for x in test_predictions], dtype=np.uint8)

In [49]:
from sklearn.metrics import confusion_matrix

confusion_matrix(y_test, test_predictions)

array([[114,   7],
       [  1,  66]], dtype=int64)

In [50]:
from sklearn.metrics import accuracy_score

print("accuracy: ", accuracy_score(y_test, test_predictions))

accuracy:  0.9574468085106383
