In [133]:
import numpy as np
import keras
from keras.datasets import mnist
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D, Conv2DTranspose, Input, concatenate
from keras.utils import np_utils
from keras.optimizers import SGD, Adagrad, RMSprop, Adam
from keras.callbacks import ModelCheckpoint
from keras.layers.core import Lambda
import glob
import cv2
import itertools
from skimage.transform import resize
from PIL import Image
from sklearn.cluster import KMeans
from scipy.cluster.vq import *
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import ShuffleSplit
from keras import backend as K
from skimage.io import imshow
K.set_image_data_format('channels_last')
np.random.seed(13)

In [2]:
#Obtain images for segementation
def get_imgs(file_path):
    
    img_arr = list()
    gt_img_arr = list()
    
    for filename in glob.glob(file_path):
        
        gt_filename = '/Users/prajwal967/Desktop/AI_Assignment/gt/'+filename[48:60]+'_segmentation.png'

        img = cv2.imread(filename)
        gt_img = cv2.imread(gt_filename)
        
        gt_img = rgb_to_gray(gt_img)

        img = cv2.resize(img,(128, 128), interpolation = cv2.INTER_CUBIC)
        gt_img = cv2.resize(gt_img,(128, 128), interpolation = cv2.INTER_CUBIC)
        
        img_arr.append(img)
        gt_img_arr.append(gt_img)
        
    return [img_arr, gt_img_arr]

In [85]:
#Obtain images for classification
def get_imgs_classify(file_path, label_data):
    
    img_arr = list()
    label = list()
    
    for filename in glob.glob(file_path): 
        
        img = cv2.imread(filename)
        img = cv2.resize(img,(128, 128), interpolation = cv2.INTER_CUBIC)
        img_arr.append(img)
        label.append(label_data)
        
    return [img_arr, label]

In [3]:
#Function converts the image to a numpy array
def imgToarr(img):
    
    return np.array(img)

In [4]:
#Convert image to grayscale
def rgb_to_gray(color_img):
    
    img_gray = cv2.cvtColor(color_img, cv2.COLOR_BGR2GRAY)
    return img_gray

In [5]:
#Resizes the image to the specified dimensions
def resize(img,x,y):
    
    return img.resize((x,y),Image.ANTIALIAS)

In [6]:
#Dice loss.
smooth = 1.

def dice_coef(y_true, y_pred):
    
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)

def dice_coef_loss(y_true, y_pred):
    
    return -dice_coef(y_true, y_pred)

In [7]:
img_arr, gt_arr = get_imgs('/Users/prajwal967/Desktop/AI_Assignment/segment/*.jpg')

In [8]:
img_arr = np.asarray(img_arr)
ground_truth = np.asarray(gt_arr)

In [9]:
ground_truth = ground_truth.reshape(ground_truth.shape[0], ground_truth.shape[1], ground_truth.shape[2], 1)

In [10]:
split = ShuffleSplit(n_splits = 2, test_size=.20, random_state=0)

In [65]:
#Split the data (Train and test)
segment_index, y = split.split(img_arr)
segment_train_X = img_arr[segment_index[0]]
segment_train_Y = ground_truth[segment_index[0]]
segment_test_X = img_arr[segment_index[1]]
segment_test_Y = ground_truth[segment_index[1]]

In [None]:
#Normalize data

In [66]:
blue_mean = np.mean(segment_train_X[:,:,:,0])
red_mean = np.mean(segment_train_X[:,:,:,1])
green_mean = np.mean(segment_train_X[:,:,:,2])

In [67]:
blue_std = np.std(segment_train_X[:,:,:,0])
red_std = np.std(segment_train_X[:,:,:,1])
green_std = np.std(segment_train_X[:,:,:,2])

In [68]:
segment_train_X = segment_train_X.astype('float32')
segment_test_X = segment_test_X.astype('float32')
segment_train_Y = segment_train_Y.astype('float32')
segment_test_Y = segment_test_Y.astype('float32')

In [69]:
segment_train_Y = segment_train_Y/255
segment_test_Y = segment_test_Y/255

In [15]:
segment_train_X[:,:,:,0] = segment_train_X[:,:,:,0] - blue_mean
segment_train_X[:,:,:,1] = segment_train_X[:,:,:,1] - red_mean
segment_train_X[:,:,:,2] = segment_train_X[:,:,:,2] - green_mean

In [16]:
segment_test_X[:,:,:,0] = segment_test_X[:,:,:,0] - blue_mean
segment_test_X[:,:,:,1] = segment_test_X[:,:,:,1] - red_mean
segment_test_X[:,:,:,2] = segment_test_X[:,:,:,2] - green_mean

In [17]:
segment_train_X[:,:,:,0] = segment_train_X[:,:,:,0]/blue_std
segment_train_X[:,:,:,1] = segment_train_X[:,:,:,1]/red_std
segment_train_X[:,:,:,2] = segment_train_X[:,:,:,2]/green_std

In [18]:
segment_test_X[:,:,:,0] = segment_test_X[:,:,:,0]/blue_std
segment_test_X[:,:,:,1] = segment_test_X[:,:,:,1]/red_std
segment_test_X[:,:,:,2] = segment_test_X[:,:,:,2]/green_std

In [45]:
#Initializing the values for the convolution neural network
nb_epoch = 10
batch_size = 10

In [72]:
#Segementation architecture.
inputs = Input((128, 128, 3))

conv1 = Conv2D(8, (3, 3), activation='relu', padding='same')(inputs)
conv1 = Conv2D(8, (3, 3), activation='relu', padding='same')(conv1)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

conv2 = Conv2D(16, (3, 3), activation='relu', padding='same')(pool1)
conv2 = Conv2D(16, (3, 3), activation='relu', padding='same')(conv2)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

conv3 = Conv2D(32, (3, 3), activation='relu', padding='same')(pool2)
conv3 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv3)
pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

conv4 = Conv2D(64, (3, 3), activation='relu', padding='same')(pool3)
conv4 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv4)
pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)

conv5 = Conv2D(128, (3, 3), activation='relu', padding='same')(pool4)
conv5 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv5)

up6 = concatenate([Conv2DTranspose(256, (2, 2), strides=(2, 2), padding='same')(conv5), conv4], axis=3)
conv6 = Conv2D(64, (3, 3), activation='relu', padding='same')(up6)
conv6 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv6)

up7 = concatenate([Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(conv6), conv3], axis=3)
conv7 = Conv2D(32, (3, 3), activation='relu', padding='same')(up7)
conv7 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv7)

up8 = concatenate([Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(conv7), conv2], axis=3)
conv8 = Conv2D(16, (3, 3), activation='relu', padding='same')(up8)
conv8 = Conv2D(16, (3, 3), activation='relu', padding='same')(conv8)

up9 = concatenate([Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(conv8), conv1], axis=3)
conv9 = Conv2D(8, (3, 3), activation='relu', padding='same')(up9)
conv9 = Conv2D(8, (3, 3), activation='relu', padding='same')(conv9)

conv10 = Conv2D(1, (1, 1), activation='sigmoid')(conv9)


model = Model(inputs=[inputs], outputs=[conv10])

In [73]:
# Train the segmentation model.
model.compile(loss = dice_coef_loss, optimizer=Adam(lr=1e-5), metrics=[dice_coef, 'accuracy'])

#Store the model.
filepath = "segment.h5"
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]

model.fit(segment_train_X, segment_train_Y, batch_size=batch_size, epochs = nb_epoch, callbacks=callbacks_list, \
          validation_split=0.2, shuffle=True)

Train on 1280 samples, validate on 320 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x14b389278>

In [74]:
#Accuracy of segmentation
score = model.evaluate(segment_test_X, segment_test_Y)



In [76]:
#Accuracy of segmentation
print(score[2] * 100)

90.6045227051


# Classification

In [86]:
non_melanoma, non_melanoma_label= get_imgs_classify('/Users/prajwal967/Desktop/AI_Assignment/others/*.jpg', 0)
melanoma, melanoma_label= get_imgs_classify('/Users/prajwal967/Desktop/AI_Assignment/melanoma/*.jpg', 1)

In [87]:
arr = np.concatenate([non_melanoma, melanoma])
labels = np.concatenate([non_melanoma_label, melanoma_label])

In [88]:
#Get the segmented image
arr_segment = model.predict(arr)

In [105]:
arr_segment_thresh = arr_segment

In [106]:
arr_segment_thresh[arr_segment_thresh > 0.5] = 1
arr_segment_thresh[arr_segment_thresh < 0.5] = 0.5

In [135]:
#Perform a stratified split. (Train and test)
X_train, X_test, Y_train, Y_test = \
train_test_split(arr_segment_thresh, labels, test_size=0.3, random_state=42, stratify = labels)

In [108]:
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

In [109]:
nb_classes = 2
Y_train = np_utils.to_categorical(Y_train, nb_classes)
Y_test = np_utils.to_categorical(Y_test, nb_classes)

In [121]:
#Initializing the values for the convolution neural network
nb_epoch_classify = 10
batch_size_classify = 10

In [122]:
#Classification architecture
model_classify = Sequential()

model_classify.add(Conv2D(128, (3, 3), border_mode='same',
                        input_shape=X_train.shape[1:]))
model_classify.add(Activation('relu'))
model_classify.add(Conv2D(128, (3, 3)))
model_classify.add(Activation('relu'))
model_classify.add(MaxPooling2D((2,2), strides=(2,2)))

model_classify.add(Conv2D(256, (3, 3), activation='relu'))
model_classify.add(Conv2D(256, (3, 3), activation='relu'))
model_classify.add(MaxPooling2D((2,2), strides=(2,2)))

model_classify.add(Conv2D(128, (3, 3), activation='relu'))
model_classify.add(Conv2D(128, (3, 3), activation='relu'))
model_classify.add(MaxPooling2D((2,2), strides=(2,2)))

model_classify.add(Flatten())
model_classify.add(Dense(4096, activation='relu'))
model_classify.add(Dropout(0.2))
model_classify.add(Dense(4096, activation='relu'))
model_classify.add(Dropout(0.2))
model_classify.add(Dense(nb_classes))
model_classify.add(Activation('sigmoid'))

  after removing the cwd from sys.path.


In [123]:
def f1(y_true, y_pred):
    def recall(y_true, y_pred):
        """Recall metric.

        Only computes a batch-wise average of recall.

        Computes the recall, a metric for multi-label classification of
        how many relevant items are selected.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

    def precision(y_true, y_pred):
        """Precision metric.

        Only computes a batch-wise average of precision.

        Computes the precision, a metric for multi-label classification of
        how many selected items are relevant.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision
    precision = precision(y_true, y_pred)
    recall = recall(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [124]:
#Train the classification model.
sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)

# Let's train the model
model_classify.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy', f1])

filepath="classify.h5"
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]

model_classify.fit(X_train, Y_train, batch_size=batch_size_classify, epochs = nb_epoch_classify, callbacks=callbacks_list, \
          validation_split = 0.2,shuffle=True)

Train on 1120 samples, validate on 280 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x16d391978>

In [126]:
y_pred = model_classify.predict(X_test)
y_pred_copy = np.copy(y_pred)

In [127]:
x = y_pred[:,1]

In [128]:
mean = np.mean(x)
std = np.std(x)
median = np.median(x)

In [129]:
thresh = mean - std

In [130]:
x[x > thresh] = 1
x[x < thresh] = 0.5

In [131]:
x[x == 1] = 0
x[x == 0.5] = 1

In [136]:
f1_score(Y_test_1, x, average=None)

array([ 0.84934277,  0.29383886])

In [138]:
score = model_classify.evaluate(X_test, Y_test)
print("Loss:", score[0])
print("Accuracy:", score[1])
print("F1:", score[2])

Loss: 0.497973384857
Accuracy: 0.813333333333
F1: 0.813333273729
