In [1]:
# Breast cancer detection by feed forward back propogation neural network using back propogation
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
dataset=pd.read_csv('cancer_detection.csv')

In [3]:
dataset.shape

(569, 33)

In [4]:
dataset=dataset.drop(['id','Unnamed: 32'],axis=1)

In [5]:
dataset.shape

(569, 31)

In [6]:
from sklearn.preprocessing import LabelEncoder
encoder=LabelEncoder()
dataset['diagnosis']=encoder.fit_transform(dataset['diagnosis'])

In [7]:
dataset['diagnosis'].value_counts() # dataset is balanced

0    357
1    212
Name: diagnosis, dtype: int64

In [8]:
X=dataset.drop(['diagnosis'],axis=1)
Y=dataset['diagnosis']

In [9]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.20, random_state=42)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(455, 30)
(455,)
(114, 30)
(114,)


In [10]:
dummies=pd.get_dummies(dataset['diagnosis'])

In [11]:
dataset = pd.concat((dataset,dummies),axis=1)

In [12]:
X=dataset[dataset.columns[1:29]].values
y=dataset[dataset.columns[30:32]]

In [13]:
y.shape

(569, 2)

In [14]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(455, 28)
(455, 2)
(114, 28)
(114, 2)


## 3-Layer Neural Network Model

In [29]:
# Setting the HyperParameters(We will tune this parameters later)
training_epochs = 500 # steps require to train the model
n_neurons_in_h1 = 20 # First Hidden layer with 10 neurons
n_neurons_in_h2 = 20 # Second hidden layer with 10 neurons
learning_rate = 0.001 # how fast model is learning

In [30]:
n_dim=X.shape[1]
print("n_dim",n_dim)

n_dim 28


In [31]:
n_features=n_dim
n_classes=2

In [32]:
x = tf.placeholder(tf.float32,shape=[None,n_features], name='features')
y_ = tf.placeholder(tf.float32,shape=[None,n_classes], name='labels')

In [33]:
# Defining the model
# Using tanh activation function in hidden layer and softmax in output layer
# we can also use sigmoid activation in output layer
# Defining the model
def multilayer_perceptron(x,weights,biases):
    
    # First Hidden layer with tanh activation function
    layer_1=tf.add(tf.matmul(x,weights['h1']),biases['b1'])
    layer_1=tf.nn.tanh(layer_1)
    
    # Second Hidden layer with sigmoid activation function
    layer_2=tf.add(tf.matmul(layer_1,weights['h2']),biases['b2'])
    layer_2=tf.nn.sigmoid(layer_2)
    
    # Output layer with Softmax activation
    out_layer=tf.add(tf.matmul(layer_2,weights['out']),biases['out'])
    #out_layer=tf.nn.softmax(out_layer)
    return out_layer

In [34]:
# Defining weights and bias for each layer
weights={
    'h1':tf.Variable(tf.truncated_normal([n_features, n_neurons_in_h1]), name='weights1'),
    'h2':tf.Variable(tf.random_normal([n_neurons_in_h1, n_neurons_in_h2]),name='weights2'),
    'out':tf.Variable(tf.random_normal([n_neurons_in_h2, n_classes]), name='weightsOut')
}

In [35]:
biases={
    'b1': tf.Variable(tf.truncated_normal([n_neurons_in_h1]), name='biases1'),
    'b2': tf.Variable(tf.random_normal([n_neurons_in_h2]),name='biases2'),
    'out':tf.Variable(tf.random_normal([n_classes]), name='biasesOut')

}

In [36]:
# Calling a model
y=multilayer_perceptron(x,weights,biases)

In [37]:
#cost function
cross_entropy_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=y,labels=y_))
#optimizer
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy_loss)

In [38]:
# initialization of all variables
initial = tf.global_variables_initializer()

loss_trace = []
train_acc = []
test_acc = []
#creating a session
with tf.Session() as sess:
    sess.run(initial)
    writer = tf.summary.FileWriter('./graphs/linear_reg', sess.graph)

    
    # training loop over the number of epoches
    for epoch in range(training_epochs):
    
        sess.run(train_step,feed_dict={x:X_train,y_:y_train})
        cost=sess.run(cross_entropy_loss,feed_dict={x:X_train,y_:y_train})
        #cost_history=np.append(cost_history,cost)
        correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name="Accuracy")
        pred_y=sess.run(y,feed_dict={x:X_test})
        mse=tf.reduce_mean(tf.square(pred_y-y_test))
        mse_=sess.run(mse)
        #mse_history.append(mse_)
        accuracy=sess.run(accuracy,feed_dict={x:X_train,y_:y_train})
        #accuracy_history.append(accuracy)
        #loss_trace.append(cost)
        #train_acc.append(accuracy)
        #test_acc.append(accuracy_test)
        print('epoch:',epoch,'-','cost:',cost,'-','Train Accuracy:',accuracy)
        
# Plot mse and accuracy graph
#plt.plot(mse_history,'r')
#plt.show()
#plt.plot(accuracy_history)
#plt.show
        

epoch: 0 - cost: 0.56925327 - Train Accuracy: 0.62857145
epoch: 1 - cost: 0.5671864 - Train Accuracy: 0.62857145
epoch: 2 - cost: 0.56623125 - Train Accuracy: 0.62857145
epoch: 3 - cost: 0.56555 - Train Accuracy: 0.62857145
epoch: 4 - cost: 0.5649033 - Train Accuracy: 0.62857145
epoch: 5 - cost: 0.5642329 - Train Accuracy: 0.62857145
epoch: 6 - cost: 0.56346744 - Train Accuracy: 0.62857145
epoch: 7 - cost: 0.56248665 - Train Accuracy: 0.62857145
epoch: 8 - cost: 0.560604 - Train Accuracy: 0.62857145
epoch: 9 - cost: 0.55827343 - Train Accuracy: 0.62857145
epoch: 10 - cost: 0.55677265 - Train Accuracy: 0.62857145
epoch: 11 - cost: 0.55553067 - Train Accuracy: 0.62857145
epoch: 12 - cost: 0.5550044 - Train Accuracy: 0.62857145
epoch: 13 - cost: 0.554496 - Train Accuracy: 0.62857145
epoch: 14 - cost: 0.5539924 - Train Accuracy: 0.62857145
epoch: 15 - cost: 0.5534897 - Train Accuracy: 0.62857145
epoch: 16 - cost: 0.5529851 - Train Accuracy: 0.62857145
epoch: 17 - cost: 0.55247504 - Train A

KeyboardInterrupt: 