<a href="https://colab.research.google.com/github/karthikm15/Semi-Supervised-GANs-For-Melanoma-Detection/blob/main/Semi_Supervised_GAN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

from keras import backend as K

from keras.datasets import mnist
from keras.layers import (Activation, BatchNormalization, Concatenate, Dense,
                          Dropout, Flatten, Input, Lambda, Reshape)
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.convolutional import Conv2D, Conv2DTranspose
from keras.models import Model, Sequential
from keras.optimizers import Adam
from keras.utils import to_categorical


import PIL
from PIL import Image
import os
import cv2
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import boto3
import shutil

from numpy import vstack

In [26]:
img_rows = 100
img_cols = 100
channels = 1

batch_size = 16

img_shape = (img_rows, img_cols, channels)

z_dim = 100

num_classes = 2

list1 = os.listdir('./unlabeled_resize/')
number_files_unlabeled = len(list1)
num_of_unlabeled_imgs = number_files_unlabeled

list2_m = os.listdir('./train_resize/melanoma/')
number_files_train_m = len(list2_m)
list2_nm = os.listdir('./train_resize/non-melanoma/')
number_files_train_nm = len(list2_nm)
number_files_train = number_files_train_m + number_files_train_nm
num_of_train_imgs = number_files_train

list3_m = os.listdir('./valid_resize/melanoma/')
number_files_test_m = len(list3_m)
list3_nm = os.listdir('./valid_resize/non-melanoma/')
number_files_test_nm = len(list3_nm)
number_files_test = number_files_test_m + number_files_test_nm
num_of_test_imgs = number_files_test

In [14]:
def prepare(filepath, IMG_SIZE):
    img_array = cv2.imread(filepath) 
    try:
      res = cv2.resize(img_array, dsize=(IMG_SIZE, IMG_SIZE), interpolation=cv2.INTER_CUBIC)
    except:
      print(filepath)
    res = cv2.cvtColor(res, cv2.COLOR_BGR2RGB)
    return res



class Dataset:
    def __init__(self):
        folders = ['melanoma', 'non-melanoma']
        
        self.x_train = []
        self.y_train = []
        self.x_test = []
        self.y_test = []
        self.x_unlabeled = []
        for folder in folders:
            for filename in os.listdir("./train_resize/" + folder):
                if (filename != '.ipynb_checkpoints'):
                  image_data = prepare(("./train_resize/" + folder + "/" + filename), 100)
                  index = folders.index(folder)
                  
                  self.x_train.append(image_data)
                  self.y_train.append(index)
        
        for folder in folders:
            for filename in os.listdir("./valid_resize/" + folder):
                if (filename != '.ipynb_checkpoints'):
                  image_data = prepare(("./valid_resize/" + folder + "/" + filename), 100)
                  index = folders.index(folder)
                  
                  self.x_test.append(image_data)
                  self.y_test.append(index)
        
        self.x_train = np.array(self.x_train)
        self.y_train = np.array(self.y_train)
        self.x_test = np.array(self.x_test)
        self.y_test = np.array(self.y_test)
        
        for filename in os.listdir("./unlabeled_resize/"):
            if (filename != '.ipynb_checkpoints'):
              image_data = prepare(("./unlabeled_resize/" + filename), 100)

              self.x_unlabeled.append(image_data)
        
        self.x_unlabeled = np.array(self.x_unlabeled)
            
        def preprocess_imgs(x, process_img):
            x = (x.astype(np.float32) - 127.5) / 127.5
            if (process_img):
                x = x.reshape(3, -1, 100, 100, 1)[0]
            return x

        def preprocess_labels(y):
            return y.reshape(-1, 1)

        self.x_train = preprocess_imgs(self.x_train, True)
        self.y_train = preprocess_labels(self.y_train)

        self.x_test = preprocess_imgs(self.x_test, True)
        self.y_test = preprocess_labels(self.y_test)
        
        self.x_unlabeled = preprocess_imgs(self.x_unlabeled, True)

    def batch_labeled(self, batch_size):
        idx = np.random.randint(0, num_of_train_imgs, batch_size)
        imgs = self.x_train[idx]
        labels = self.y_train[idx]
        return imgs, labels

    def batch_unlabeled(self, batch_size):
        idx = np.random.randint(0, num_of_unlabeled_imgs, batch_size)
        imgs = self.x_unlabeled[idx]
        return imgs

    def training_set(self):
        return self.x_train, self.y_train

    def test_set(self):
        return self.x_test, self.y_test

dataset = Dataset()

In [15]:

def build_generator(z_dim):

    model = Sequential()
    model.add(Dense(256 * 25 * 25, input_dim=z_dim))
    model.add(Reshape((25, 25, 256)))

    model.add(Conv2DTranspose(128, kernel_size=3, strides=2, padding='same'))

    model.add(BatchNormalization())

    model.add(LeakyReLU(alpha=0.01))

    model.add(Conv2DTranspose(64, kernel_size=3, strides=1, padding='same'))

    model.add(BatchNormalization())

    model.add(LeakyReLU(alpha=0.01))

    model.add(Conv2DTranspose(1, kernel_size=3, strides=2, padding='same'))

    model.add(Activation('tanh'))

    return model

In [16]:
def build_discriminator_net(img_shape):

    model = Sequential()

    model.add(
        Conv2D(batch_size*2,
               kernel_size=(3),
               strides=2,
               input_shape=(100,100,1),
               padding='same'))

    model.add(LeakyReLU(alpha=0.01))

    model.add(
        Conv2D(batch_size*4,
               kernel_size=(3),
               strides=2,
               input_shape=(100,100,1),
               padding='same'))

    model.add(BatchNormalization())

    model.add(LeakyReLU(alpha=0.01))

    model.add(
        Conv2D(batch_size*8,
               kernel_size=(3),
               strides=2,
               input_shape=(100,100,1),
               padding='same'))

    model.add(BatchNormalization())

    model.add(LeakyReLU(alpha=0.01))

    model.add(Dropout(0.5))

    model.add(Flatten())

    model.add(Dense(num_classes))

    return model


In [17]:
def build_discriminator_supervised(discriminator_net):

    model = Sequential()

    model.add(discriminator_net)

    model.add(Activation('softmax'))

    return model

In [18]:
def build_discriminator_unsupervised(discriminator_net):

    model = Sequential()

    model.add(discriminator_net)

    def predict(x):
        prediction = 1.0 - (1.0 /
                            (K.sum(K.exp(x), axis=-1, keepdims=True) + 1.0))

        return prediction

    model.add(Lambda(predict))

    return model

In [19]:
def build_gan(generator, discriminator):

    model = Sequential()

    model.add(generator)
    model.add(discriminator)

    return model

In [20]:
discriminator_net = build_discriminator_net(img_shape)

discriminator_supervised = build_discriminator_supervised(discriminator_net)
discriminator_supervised.compile(loss='categorical_crossentropy',
                                 metrics=['accuracy'],
                                 optimizer=Adam())

discriminator_unsupervised = build_discriminator_unsupervised(
                                 discriminator_net)
discriminator_unsupervised.compile(loss='binary_crossentropy',
                                   optimizer=Adam())
generator = build_generator(z_dim)
discriminator_unsupervised.trainable = False
gan = build_gan(generator, discriminator_unsupervised)
gan.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate = 0.0001))


In [21]:
supervised_losses = []
iteration_checkpoints = []
def train(iterations, batch_size, sample_interval):

    real = np.ones((batch_size, 1))

    fake = np.zeros((batch_size, 1))

    for iteration in range(iterations):


        imgs, labels = dataset.batch_labeled(batch_size*2)
        labels = to_categorical(labels, num_classes=num_classes)

        imgs_unlabeled = dataset.batch_unlabeled(batch_size)

        z = np.random.normal(0, 1, (batch_size, z_dim))
        gen_imgs = generator.predict(z)
        
        images, label = vstack((imgs_unlabeled, gen_imgs)), vstack((real, fake))

        d_loss_supervised, accuracy = discriminator_supervised.train_on_batch(imgs, labels)
 
        predictions = discriminator_unsupervised.predict(generator.predict(z))

        squared_diff_image = tf.square(1-predictions)
        ssd_images = tf.reduce_sum(squared_diff_image, [1])
        error_images = tf.reduce_mean(ssd_images)

        d_loss_unsupervised = error_images.numpy()
        if (d_loss_unsupervised > 0.05):
            d_loss_real = discriminator_unsupervised.train_on_batch(imgs_unlabeled, real)

            d_loss_fake = discriminator_unsupervised.train_on_batch(gen_imgs, fake)

            d_loss_unsupervised = 0.5 * np.add(d_loss_real, d_loss_fake)


        z = np.random.normal(0, 1, (batch_size, z_dim))
        gen_imgs = generator.predict(z)

        g_loss = gan.train_on_batch(z, np.ones((batch_size, 1)))

        if (iteration + 1) % sample_interval == 0:

            supervised_losses.append(d_loss_supervised)
            iteration_checkpoints.append(iteration + 1)
            
            print(
                "%d [D loss supervised: %.4f, acc.: %.2f%%] [D loss unsupervised: %.4f] [G loss: %f]" %(iteration + 1, d_loss_supervised, 100 * accuracy,
                  d_loss_unsupervised, g_loss))

In [22]:
iterations = 500
sample_interval = 10

train(iterations, batch_size, sample_interval)

10 [D loss supervised: 2.8548, acc.: 34.38%] [D loss unsupervised: 0.1192] [G loss: 0.368392]
20 [D loss supervised: 1.2988, acc.: 53.12%] [D loss unsupervised: 0.0004] [G loss: 0.265805]
30 [D loss supervised: 1.1331, acc.: 59.38%] [D loss unsupervised: 0.0006] [G loss: 0.145179]
40 [D loss supervised: 1.7777, acc.: 56.25%] [D loss unsupervised: 0.0467] [G loss: 0.066678]
50 [D loss supervised: 1.5874, acc.: 53.12%] [D loss unsupervised: 0.0441] [G loss: 0.012702]
60 [D loss supervised: 1.3373, acc.: 62.50%] [D loss unsupervised: 0.0394] [G loss: 0.006635]
70 [D loss supervised: 1.9358, acc.: 53.12%] [D loss unsupervised: 0.0331] [G loss: 0.002638]
80 [D loss supervised: 0.8893, acc.: 43.75%] [D loss unsupervised: 0.0330] [G loss: 0.000664]
90 [D loss supervised: 1.5420, acc.: 59.38%] [D loss unsupervised: 0.0303] [G loss: 0.000462]
100 [D loss supervised: 1.0569, acc.: 68.75%] [D loss unsupervised: 0.0279] [G loss: 0.000103]
110 [D loss supervised: 1.4567, acc.: 43.75%] [D loss unsup

In [25]:
x, y = dataset.test_set()
y = to_categorical(y, num_classes=num_classes)

_, accuracy = discriminator_supervised.evaluate(x, y)
# print("Test Accuracy: %.2f%%" % (100 * accuracy))


def prepare(filepath, IMG_SIZE):
    img_array = cv2.imread(filepath) 
    res = cv2.resize(img_array, dsize=(IMG_SIZE, IMG_SIZE), interpolation=cv2.INTER_CUBIC)
    return res

img_array = cv2.imread('./train_resize/melanoma/0101_1.jpg')
res = cv2.resize(img_array, dsize=(100, 100), interpolation=cv2.INTER_CUBIC)
res = cv2.cvtColor(res, cv2.COLOR_BGR2RGB)
res=res.reshape(3,100,100)
x, y = dataset.training_set()

z_one = np.random.normal(0, 1, (batch_size, z_dim))
gen_images = generator.predict(z_one)

