In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import os
import re
import random
import time # just to see how fast/slow things run

from scipy.misc import imread
from sklearn.model_selection import train_test_split
from PIL import Image

from sklearn.ensemble import RandomForestClassifier

In [2]:
melanoma_raw_data = pd.read_csv('./melanoma/melanoma_dataset.csv').fillna(0)
melanoma_raw_data.head()

Unnamed: 0,Image_Name,Common_Nevus,Atypical_Nevus,Melanoma,Asymmetry,Pigment_Network,Dots_Globules,Streaks,Regression_Area,Blue-Whitish_Veil,White,Red,Light-Brown,Dark-Brown,Blue-Gray,Black
0,IMD003,X,0,0,0,T,A,A,A,A,0,0,0,X,0,0
1,IMD009,X,0,0,0,T,A,A,A,A,0,0,X,0,0,0
2,IMD016,X,0,0,0,T,T,A,A,A,0,0,X,X,0,0
3,IMD022,X,0,0,0,T,A,A,A,A,0,0,X,0,0,0
4,IMD024,X,0,0,0,T,A,A,A,A,0,0,X,X,0,0


In [3]:
# hard_coded

names          = list(melanoma_raw_data['Image_Name'])
processed_data = []

def transform_1(array):
    return list(array == 'X')

def transform_2(array):
    output = []
    
    for i in array:
        temp = [0, 0, 0]
        temp[i] = 1
        output.append(temp)
    
    return np.array(output).T

def transform_3(array):
    return np.array(array == 'T')

def transform_4(array):
    output = []
    
    for i in array:
        temp = [0, 0, 0]
        
        if i == 'A':
            temp[0] = 1
        elif i == 'AT':
            temp[1] = 1
        else:
            temp[2] = 1
            
        output.append(temp)
            
    return np.array(output).T

def transform_5(array):
    return list(array == 'P')

processed_data.append(transform_1(melanoma_raw_data['Common_Nevus']))
processed_data.append(transform_1(melanoma_raw_data['Atypical_Nevus']))
processed_data.append(transform_1(melanoma_raw_data['Melanoma']))
processed_data = np.concatenate((np.array(processed_data), transform_2(melanoma_raw_data['Asymmetry'])))
processed_data = np.array(list(processed_data) + [list(transform_3(melanoma_raw_data['Pigment_Network']))])
processed_data = np.concatenate((processed_data, transform_4(melanoma_raw_data['Dots_Globules'])))

color_one_hot = []

for remaining in list(melanoma_raw_data)[-9:-6]:
    color_one_hot.append(transform_5(melanoma_raw_data[remaining]))

for color in list(melanoma_raw_data)[-6:]:
    color_one_hot.append(transform_1(melanoma_raw_data[color]))

processed_data = np.concatenate((processed_data, np.array(color_one_hot, dtype=np.float32)))

In [90]:
x_train = ['/Users/shamalama/Documents/GitHub/AI4GoodMedicalModel/melanoma/PH2Dataset/PH2 Dataset images/' + img + '/' + img + '_Dermoscopic_Image/' + img + '.bmp' for img in names]
y_train = processed_data.T

zipped_data = list(zip(x_train, y_train))
random.shuffle(zipped_data)

N = int(len(x_train)*0.8) # 80% is ratio of training vs. test
_x, _y = list(zip(*zipped_data))
_x = list(_x)
_y = list(_y)

x_train = _x[:N]
x_test  = _x[N:]
y_train = np.array(_y[:N]).reshape(19, -1, 1)
y_test  = np.array(_y[N:]).reshape(19, -1, 1)

In [91]:
def new_weights(shape):
    return tf.Variable(tf.truncated_normal(shape, stddev=0.05))

def new_biases(length):
    return tf.Variable(tf.constant(0.05, shape=[length]))

def new_conv_layer(input,             
                   num_input_channels,
                   filter_size,       
                   num_filters,  
                   name,
                   use_pooling=True): 

    shape   = [filter_size, filter_size, num_input_channels, num_filters]
    weights = new_weights(shape=shape)
    biases  = new_biases(length=num_filters) # one for each filter

    layer = tf.nn.conv2d(input=input,
                         filter=weights,
                         strides=[1, 1, 1, 1],
                         padding='SAME')

    layer += biases

    if use_pooling:
        layer = tf.nn.max_pool(value=layer,
                               ksize=[1, 2, 2, 1],
                               strides=[1, 2, 2, 1],
                               padding='SAME')

    layer = tf.nn.relu(layer, name=name)

    return layer, weights


def flatten_layer(layer):
    layer_shape  = layer.get_shape()
    num_features = layer_shape[1:4].num_elements()
    layer_flat   = tf.reshape(layer, [-1, num_features])

    # Return both the flattened layer and the number of features.
    return layer_flat, num_features


def new_fc_layer(input,          # The previous layer.
                 num_inputs,     # Num. inputs from prev. layer.
                 num_outputs,    # Num. outputs.
                 name,
                 use_relu=True): # Use Rectified Linear Unit (ReLU)?

    # Create new weights and biases.
    weights = new_weights(shape=[num_inputs, num_outputs])
    biases = new_biases(length=num_outputs)

    # Calculate the layer as the matrix multiplication of
    # the input and weights, and then add the bias-values.

    # Use ReLU?
    if use_relu:
        layer = tf.nn.relu(tf.add(tf.matmul(input, weights), biases), name=name)
    else:
        layer = tf.add(tf.matmul(input, weights), biases, name=name)

    return layer

def create_batch_tuples(length, batch_size):
    output = np.arange(0, length, batch_size)
    
    if output[-1] != length:
        output = np.append(output, length)
        
    return list(zip(output[:-1], output[1:]))

def indicator(array):
    return np.concatenate(array > 0)

In [99]:
def convolutional_neural_network(x_train, y_train, x_test, y_test, batch_size, number):
    '''
    x_train is the path of the images
    '''
    NUM_EPOCHS = 3
    
    n = len(x_train)
    
    # dimensions are 100
    x = tf.placeholder(dtype=tf.float32, shape=[None, 100, 100, 3], name='x')
    y = tf.placeholder(dtype=tf.float32, shape=[None, 1], name='y')
    
    layer_conv1, weights_conv1 = new_conv_layer(input=x, 
                                                num_input_channels=3, 
                                                filter_size=5, 
                                                num_filters=8, 
                                                name='layer_conv1')
    layer_conv2, weights_conv2 = new_conv_layer(input=layer_conv1, 
                                                num_input_channels=8, 
                                                filter_size=7, 
                                                num_filters=32,
                                                name='layer_conv2')
    layer_flat, num_features = flatten_layer(layer_conv2)
    
    layer_fc = new_fc_layer(input=layer_flat,
                            num_inputs=num_features,
                            num_outputs=32,
                            name='layer_fc',
                            use_relu=True)
    
    out = new_fc_layer(input=layer_fc,
                       num_inputs=32,
                       num_outputs=1,
                       name='out',
                       use_relu=False)
    # range of out is \mathbb{R}^d, with d = 1
    
    cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=out, labels=y)
    cost          = tf.reduce_mean(cross_entropy)
    optimizer     = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cost)
    
    batch_tuples = create_batch_tuples(len(x_train), batch_size)
    x_reshaped   = np.array([np.array(Image.open(pic).resize((100, 100))) for pic in x_train], dtype=np.float32)
    
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        
        for e in range(NUM_EPOCHS):
            
            global prediction  # for debugging
            prediction = []

            for (start, end) in batch_tuples:
                
#                print(start)
                
                sess.run([optimizer], feed_dict={x: x_reshaped[start: end],
                                                 y: y_train[start: end]})
        
            '''for (start, end) in batch_tuples:
                
                x_test_reshaped = np.array([imread(pic) for pic in x_test[start: end]])
                
                batch_preds = list(indicator(out.eval({x: x_test_reshaped})))
                prediction += batch_preds
                
            
            accuracy = np.mean(prediction == y_test)
            print('Epoch {}, accuracy: {}%'.format(e, np.mean(accuracy)*100))'''
        
        
        batch_tuples_test = create_batch_tuples(len(x_test), batch_size)
        x_test_reshaped   = np.array([np.array(Image.open(pic).resize((100, 100))) for pic in x_test], dtype=np.float32)
        
        y_pred = []
        for (start, end) in batch_tuples_test:
            
            batch_y_pred = out.eval({x: x_test_reshaped[start:end]})
            y_pred.append(batch_y_pred)

        saver = tf.train.Saver()
        saver.save(sess, './Melanoma_Cnn_' + str(number))
        
    return np.concatenate(y_pred)

In [100]:
num_classes = len(y_train[0])

y_pred = []

for pos in range(19):
    print(pos)
    y_pred.append(convolutional_neural_network(x_train, y_train[pos], x_test, y_test[pos], 16, pos))

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19


IndexError: index 19 is out of bounds for axis 0 with size 19

In [111]:
outcome = np.array(y_pred).T[0][0]
outcome

array([-1.9185889 , -1.3620255 , -6.0662518 , -1.5457033 ,  0.23240742,
       -0.25563607, -1.689253  ,  2.7209263 , -0.44664657,  1.543501  ,
       -6.7909174 , -1.5091423 , -0.5499488 , -4.9956775 ,  0.54936534,
       -3.2218692 , -1.3565081 , -1.1194694 , -3.621114  ], dtype=float32)

array([-1.9185889, -1.3620255], dtype=float32)

In [152]:
def meaning(outcome):
    return np.array([np.argmax(outcome[:3]), # diagnosis
                     np.argmax(outcome[3:6]), # asymmetry
                     outcome[7] > 0,        # pigment network
                     np.argmax(outcome[8:11]),     # dots/globules
                     outcome[11] > 0, # streaks
                     outcome[12] > 0, # regression areas
                     np.argmax(outcome[13:19])])

In [179]:
y_test.T[0]

array([[1., 0., 0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 1.,
        0., 0., 0.],
       [0., 1., 0., 1., 1., 0., 0., 0., 0., 1., 0., 0., 1., 0., 0., 0.,
        0., 1., 0.],
       [0., 0., 0., 0., 0., 1., 0., 1., 1., 0., 1., 1., 0., 1., 0., 1.,
        1., 0., 0.],
       [1., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0.,
        1., 0., 1.],
       [0., 1., 0., 0., 0., 0., 1., 1., 0., 1., 0., 1., 0., 0., 0., 1.,
        1., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
        1., 0., 1.],
       [1., 0., 0., 0., 0., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
        1., 0., 0.],
       [1., 1., 0., 0., 1., 1., 0., 1., 0., 1., 1., 1., 0., 0., 1., 0.,
        0., 1., 0.],
       [0., 0., 1., 0., 1., 0., 1., 1., 0., 1., 0., 1., 0., 0., 0., 0.,
        0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 1., 0., 1., 0., 0.,
        1., 0., 0.],
       [0., 0., 0., 0., 0., 1., 1., 0., 1., 0., 0., 0., 1., 1., 0., 1.

In [181]:
for i in range(len(np.array(y_pred).T[0])):
    print(np.mean(meaning(np.array(y_pred).T[0][i]) == meaning(y_test.T[0][i])))
    print()

0.2857142857142857

0.5714285714285714

0.42857142857142855

0.42857142857142855

0.42857142857142855

0.5714285714285714

0.2857142857142857

0.2857142857142857

0.42857142857142855

0.14285714285714285

0.5714285714285714

0.2857142857142857

0.2857142857142857

0.42857142857142855

0.42857142857142855

0.2857142857142857

0.2857142857142857

0.42857142857142855

0.42857142857142855

0.42857142857142855

0.42857142857142855

0.2857142857142857

0.14285714285714285

0.2857142857142857

0.2857142857142857

0.14285714285714285

0.42857142857142855

0.42857142857142855

0.14285714285714285

0.2857142857142857

0.14285714285714285

0.42857142857142855

0.14285714285714285

0.42857142857142855

0.2857142857142857

0.2857142857142857

0.0

0.2857142857142857

0.14285714285714285

0.14285714285714285



In [195]:
y = np.argmax(processed_data[:3].T, axis=1)
x = processed_data[3:].T

zipped_data_for_tree = list(zip(x, y))
random.shuffle(zipped_data_for_tree)

x_tree, y_tree = list(zip(*zipped_data_for_tree))
N_tree = int(len(x_tree)*0.8)
x_train_tree = x_tree[N_tree:]
y_train_tree = y_tree[N_tree:]
x_test_tree = x_tree[:N_tree]
y_test_tree = y_tree[:N_tree]

clf = RandomForestClassifier()
clf.fit(x_train_tree, y_train_tree)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [197]:
np.mean(clf.predict(x_test_tree) == y_test_tree)

0.8875

In [207]:
def find_index(array, obj):
    return list(array).index(obj)

print(find_index(np.array(melanoma_raw_data)[:, 0], 'IMD003'))
print(find_index(np.array(melanoma_raw_data)[:, 0], 'IMD002'))

0
80


In [210]:
print(np.array(melanoma_raw_data)[0])
print(np.array(melanoma_raw_data)[80])

['IMD003' 'X' 0 0 0 'T' 'A' 'A' 'A' 'A' 0 0 0 'X' 0 0]
['IMD002' 0 'X' 0 1 'AT' 'A' 'A' 'A' 'A' 0 0 'X' 'X' 0 0]


In [211]:
list(melanoma_raw_data)

['Image_Name',
 'Common_Nevus',
 'Atypical_Nevus',
 'Melanoma',
 'Asymmetry',
 'Pigment_Network',
 'Dots_Globules',
 'Streaks',
 'Regression_Area',
 'Blue-Whitish_Veil',
 'White',
 'Red',
 'Light-Brown',
 'Dark-Brown',
 'Blue-Gray',
 'Black']

Atypical_Nevus, Asymmetry=1, Pigment_network=Atypical, Dot_globules= Absent, Streaks= Absent, Regression_areas=Absent, Blue_whitish Veil- Absent, color=Brown

• 