In [None]:
#imports
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
#import tensorflow_datasets as tfds
from tensorflow import keras
import pandas as pd
import os
import cv2
import keras.layers as kl
from sklearn.utils import shuffle
from sklearn.metrics import classification_report

from keras.layers import Input, Lambda, Dense, Flatten,Dropout
from keras.models import Model
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential

#number of samples that will be passed through to the network at one time
#"Generally batch size of 32 or 25 is good, with epochs = 100 unless you have large dataset"
batch_size = 20
#img_height = 300
#img_width = 300
#import image directories
#data_dir = '/Users/laure/Desktop/melanoma/melanoma_cancer_dataset2'

#label benign images with 0, malignant images as 1

class_names = ['benign', 'malignant']

class_names_label = {class_name: i for i, class_name in enumerate(class_names)}

num_classes = len(class_names)

IMAGE_SIZE = 150

#function to load data
def load_data():
    DIRECTORY = '/Users/15044/Desktop/melanoma_cancer_dataset'
    CATEGORY = ['test', 'train']

    output = []

    for category in CATEGORY:
        print("loading category " + category + "\n")

        path = os.path.join(DIRECTORY, category)
        #path is now set to /test or /train
        images = []
        labels = []

        for folder in os.listdir(path):
            label = class_names_label[folder]   
            #i.e, recall that label is 0 for benign, 1 for malignant 
            print("in folder " + folder+"\n")

            #iterate through each image in folder
            for file in os.listdir(os.path.join(path, folder)):
                #path is now /test/malignant or other combination od test, train, malignant, benign
                
                #create img path
                img_path = os.path.join(path, folder, file)
                #path is now, e.g. /test/benign/image47.jpg

                #open image and resize
                    #read image
                image = cv2.imread(img_path)
                    #convert to RGB color from cv2 default color value byte system
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                    #resize image to desired size
                image = cv2.resize(image, [IMAGE_SIZE, IMAGE_SIZE])

                #append image and its label
                    #image is appended as its array representation, label is benign or malignant
                images.append(image)
                labels.append(label)
                #print("appended\n")
        #convert to numpy array
        images = np.array(images, dtype='float32') 
        labels = np.array(labels, dtype = 'int32') #can use int because working with 0 and 1 values

        #the output will be a list of images and their respective label in tuple form
        output.append((images, labels))

    #return the output list
    return output

#call load_data() function to create train and test dataset
(train_images, train_labels), (test_images, test_labels) = load_data()

print("data loaded\n")

#shuffling train images and labels trains the model better
train_images, train_labels = shuffle(train_images, train_labels, random_state = 25)

#standardize/normalize the data
#rescale data from RGB (0, 255) to B/W (0, 1)
#this is because we want smaller data values to make the data easier to work with and easier for neural net to pick features out
#this is done in the model below in k1.Rescaling

#making the modes (using Keras sequential model)
#copied and pasted this just as a starting point
model = Sequential([
  # the 3s refer to 3 colors (RGB)
  # relu

  #kl.Rescaling(1./255, input_shape=(img_height, img_width, 3)),
  #2D refers to image
  #using relu activation function (default)
  # params:
    # Conv2D( number of feature options that the NN is looking for, ( window length and width if when the NN is taking "steps" through the image ) )
      # the "window" size AKA kernel size is the "art": mess around with it
  #kl.Conv2D(256, (3, 3), padding='same', activation='relu'),
  #kl.Conv2D(256, (3, 3), padding='same', activation='relu'),
  #kl.Conv2D(128, (3, 3), padding='same', activation='relu'),
  kl.Conv2D(16, (3, 3), padding='same', activation='relu'),
  kl.Conv2D(32, (3, 3), padding='same', activation='relu'),
  kl.MaxPooling2D(2, 2),
  #hidden layers
  kl.Conv2D(32, (3, 3), padding='same', activation='relu'),
  kl.MaxPooling2D(2, 2),
  kl.Conv2D(64, (3, 3), padding='same', activation='relu'),
  kl.MaxPooling2D(2, 2),
  kl.Conv2D(128, (3, 3), padding='same', activation='relu'),
  kl.MaxPooling2D(2, 2),
  #flatten outputs to reduce number of features
  kl.Flatten(),
  #output layers
  kl.Dense(128, activation='relu'),
  kl.Dense(num_classes)
])

#compiling the model
#copied and pasted this just as a starting point
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

print("model compiled\n")

#adam optimizer: stoichastic gradient descent, default
# loss: 
# "everybody looks at accuracy"



#training the model 
#epoch: one complete pass through training data 


history = model.fit(train_images, train_labels, batch_size = 20, epochs = 100, validation_split=.2)
print('model has been trained\n')

print(model.summary())


#check with test images
print("testing model accuracy on test image and label set")
test_loss = model.evaluate(test_images, test_labels)

#this is the only part i dont understand yet but we'll get there i just wanted to see what it did
predictions = model.predict(test_images)    #vector of probabilities
pred_labels = np.argmax(predictions, axis=1) #take highest probability

print(classification_report(test_labels, pred_labels))

#print confusion matrix

from sklearn.datasets import make_classification
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

cm = confusion_matrix(test_labels, pred_labels)
disp = ConfusionMatrixDisplay(confusion_matrix = cm)

disp.plot()
plt.show()

pd.DataFrame(history.history).plot(figsize=(8,5))
plt.ylim(0, 1)
plt.show 

loading category test

in folder benign

in folder malignant

loading category train

in folder benign

in folder malignant

data loaded

model compiled

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100


Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100