In [1]:
import cv2
import tensorflow as tf
import tflearn
import matplotlib.pyplot as plt
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.estimator import regression
from random import shuffle
import numpy as np
from tqdm import tqdm
from sklearn.metrics import confusion_matrix
import os


In [6]:
image_size = 50
LR = 1e-3
train_path = 'home/kushalgbk/Desktop/acad/ai-asg2/images/train'
test_path = 'home/kushalgbk/Desktop/acad/ai-asg2/images/test'
modelNAME = 'skin_cancer-{}-{}.model'.format(LR, '2conv-basic')

In [7]:
def categorise_images(img):
    word = img.split('.')[-3]
    if word == 'mal':
        return [0,1]
    elif word == 'beg':
        return [1,0]


In [8]:
def get_resized_image(path):
    img = cv2.imread(path,cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, (image_size,image_size))
    return img


def get_train_data():
    training_data = []
    for img in tqdm(os.listdir(train_path)):
        category = categorise_images(img)
        path = os.path.join(train_path,img)
        img = get_resized_image(path)
        training_data.append([np.array(img),np.array(category)])
    shuffle(training_data)
    np.save('train_data.npy', training_data)
    return training_data

train_data = get_train_data()

100%|██████████████████████████████████████| 2000/2000 [06:22<00:00,  5.23it/s]


In [9]:
def get_test_data():
    testing_data = []
    for img in tqdm(os.listdir(test_path)):
        path = os.path.join(test_path,img)
        img_number = img.split('.')[0]
        img = get_resized_image(path)
        testing_data.append([np.array(img), img_number])
    shuffle(testing_data)
    np.save('test_data.npy', testing_data)
    return testing_data

In [12]:
tf.reset_default_graph()
conv_network = input_data(shape=[None, image_size, image_size, 1], name='input')

conv_network = conv_2d(conv_network, 32, 5, activation='relu')
conv_network = max_pool_2d(conv_network, 5)

conv_network = conv_2d(conv_network, 64, 5, activation='relu')
conv_network = max_pool_2d(conv_network, 5)

conv_network = conv_2d(conv_network, 128, 5, activation='relu')
conv_network = max_pool_2d(conv_network, 5)

conv_network = conv_2d(conv_network, 64, 5, activation='relu')
conv_network = max_pool_2d(conv_network, 5)

conv_network = conv_2d(conv_network, 32, 5, activation='relu')
conv_network = max_pool_2d(conv_network, 5)

conv_network = fully_connected(conv_network, 1024, activation='relu')
conv_network = dropout(conv_network, 0.8)

conv_network = fully_connected(conv_network, 2, activation='softmax')
conv_network = regression(conv_network, optimizer='adam', learning_rate=LR, loss='categorical_crossentropy', name='targets')
model = tflearn.DNN(conv_network, tensorboard_dir='log')


hdf5 is not supported on this machine (please install/reinstall h5py for optimal experience)
curses is not supported on this machine (please install/reinstall curses for an optimal experience)
Instructions for updating:
Use tf.initializers.variance_scaling instead with distribution=uniform to get equivalent behavior.
Instructions for updating:
keep_dims is deprecated, use keepdims instead


In [16]:
if os.path.exists('{}.meta'.format(modelNAME)):
    model.load(modelNAME)

train = train_data[:-500]
test = train_data[-500:]
X = np.array([i[0] for i in train]).reshape(-1,image_size,image_size,1)
Y = [i[1] for i in train]

test_x = np.array([i[0] for i in test]).reshape(-1,image_size,image_size,1)
test_y = [i[1] for i in test]


model.fit({'input': X}, {'targets': Y}, n_epoch=20, validation_set=({'input': test_x}, {'targets': test_y}), 
    snapshot_step=500, show_metric=True, run_id=modelNAME)

model.save(modelNAME)

INFO:tensorflow:Restoring parameters from usr/lib/python3/scripts/skin_cancer-0.001-2conv-basic.model
Training Step: 2297  | total loss: 0.25741 | time: 8.416s
| Adam | epoch: 01| loss: 0.25741 - acc: 0.7477 -- iter: 1372/1500
Training Step: 2285  | total loss: 0.27228 | time: 8.748s
| Adam | epoch: 02 | loss: 0.27228 - acc: 0.7188 | val_loss: 0.32982 - val_acc: 0.7810 -- iter: 1384/1500
--


In [18]:
test_data = get_test_data()
for num, data in enumerate(test_data[:20]):
    
    img_num = data[1]
    img_data = data[0]
    orig = img_data
    data = img_data.reshape(image_size,image_size,1)
    model_out = model.predict([data])[0]
    
    if np.argmax(model_out) == 1:
         str_label='malignant'
    else:
         str_label='benign'
        


100%|██████████████████████████████████████████| 20/20 [00:00<00:00, 51.13it/s]


In [30]:
Y_true=[i[1] for i in test]

Y_pred=[]
for num,data in enumerate(test):
    img_data = data[0]
    orig = img_data
    data = img_data.reshape(image_size,image_size,1)
    model_out = model.predict([data])[0]
    Y_pred.append(model_out)

for data in (Y_pred):
    if data[0]>0.5: data[0]=int(1)
    else: data[0]=int(0)
    if data[1]>0.5: data[1]=int(1)
    else: data[1]=int(0)
y_true=[]
y_pred=[]
for data in (Y_true):
    if(data[0]==1 and data[1]==0):
        y_true.append(1)
    else: y_true.append(0)

for data in (Y_pred):
    if(data[0]==1 and data[1]==0):
        y_pred.append(1)
    else: y_pred.append(0)
        

confusion_matrix=confusion_matrix(y_true, y_pred)


In [33]:
print("Confusion Matrix")
print("          ben  mal")
for i in (1,0):
    if i==1: print("ben   ",end=' ')
    else: print("mal   ",end=' ') 
    for j in (1,0):
        print(confusion_matrix[i][j],end='      ') 
    print()

Confusion Matrix
      ben  mal
ben   270    143      
mal   35      52      


In [35]:
a=confusion_matrix[1][1]
b=confusion_matrix[1][0]
c=confusion_matrix[0][1]
d=confusion_matrix[0][0]
print("Class Benign: Precision and Recall")
print("Precision - ",end=' ',a/(a+c))
print("Recall - ",end=' ',a/(a+b))


Class Benign: Precision and Recall 
Precision - 0.6453815565467411
Recall - 0.6413148951504479


In [36]:
print("Class Malignant: Precision and Recall")
print("Precision - ",end=' ',b/(b+d))
print("Recall - ",end=' ',b/(a+b))


Class Malignant: Precision and Recall
Precision - 0.3007107107107107
Recall - 0.05467004167821414
