In [1]:
#--Tensorflow/keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import BatchNormalization, SeparableConv2D, MaxPooling2D, Activation, Flatten, Dropout, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.data import AUTOTUNE
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model
import tensorflow as tf

import keras_tuner as kt
#--Sklearn
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelBinarizer
#--Others
from imutils import paths, resize
import matplotlib.pyplot as plt
import os 
from cancer_model.cancernet_kt import CancerNet_kt
import cv2
import numpy as np
import random

### Get the data

In [2]:
main_path = "G:\\pyimage_univ\\CNN_tf\\tf_data\\cancer_data\\"
train_path = main_path+"trainig"
valid_path = main_path+"validation"
test_path = main_path+"test"

In [3]:
trainPath =list(paths.list_images(train_path))
valPath = list(paths.list_images(valid_path))
testPath = list(paths.list_images(test_path))

### Quickly visualize

In [4]:
all_images = list(paths.list_images(main_path))
random_images = random.choices(all_images, k=3)
for i in random_images:
    random_image = cv2.imread(i)
    random_image = cv2.cvtColor(random_image, cv2.COLOR_BGR2RGB)
    random_image = resize(random_image, height=200)
    cv2.imshow("Random cancer image", random_image)
    cv2.waitKey(0)
cv2.destroyAllWindows()

### Deploy tf.data()

In [82]:
#--1. The preprocess pipeline
def load_images(imagePath):
    image = tf.io.read_file(imagePath)
    image = tf.io.decode_png(image, channels=3)
    image = tf.image.convert_image_dtype(image, dtype=tf.float32) #to transform into tensonrs
    #image = tf.image.per_image_standardization(image) #normalization
    image = tf.image.resize(image, (48,48))
    #--labels
    label = tf.strings.split(imagePath, os.path.sep)[-2]
    label = tf.strings.to_number(label, tf.int32)
    return(image, label)

In [83]:
# 2. Define some augmentations
@tf.function #the decorator makes this function able to be used with tf on the pipeline

def augment(image, label): #we add labels, since the function 'load_images" is getting labels 
    image = tf.image.random_flip_up_down(image)
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_brightness(image, 0.2)
    return(image, label)

In [84]:
#3. Pass each dataset through the data Pipelines
trainDS = tf.data.Dataset.from_tensor_slices(trainPath)
trainDS = (trainDS.shuffle(len(trainPath)).
            map(load_images, num_parallel_calls=AUTOTUNE). #preprocessing
            map(augment, num_parallel_calls=AUTOTUNE). #augmentation for training
            cache().batch(64).prefetch(AUTOTUNE))

validDS = tf.data.Dataset.from_tensor_slices(valPath)
validDS = (validDS.map(load_images, num_parallel_calls=AUTOTUNE).
                    cache().batch(64).prefetch(AUTOTUNE))

testDS = tf.data.Dataset.from_tensor_slices(testPath)
testDS = (testDS.map(load_images, num_parallel_calls=AUTOTUNE).
                    cache().batch(64).prefetch(AUTOTUNE))

### Class Imbalance

In [85]:
#get the labels
train_paths = list(paths.list_images(train_path))
trainlabels = [int(p.split(os.path.sep)[-2]) for p in train_paths]
#turn them to one hot and sum the amount of each category
trainlabels = to_categorical(trainlabels)
classTotals = trainlabels.sum(axis=0)
#Create a dictionary and fill it
classweights = {}
for i in range(0, len(classTotals)):
    classweights[i] = classTotals.max()/classTotals[i]

### Define the model and compile it

In [86]:
def build(hp):
		# initialize the model along with the input shape to be
		# "channels last" and the channels dimension itself
	model = Sequential()
	inputShape = (48, 48, 3)
	chanDim = -1

		# if we are using "channels first", update the input shape
		# and channels dimension
	if K.image_data_format() == "channels_first":
		inputShape = (3, 48, 48)
		chanDim = 1

		# CONV => RELU => POOL
	model.add(SeparableConv2D(
        hp.Int("sep_conv_1", min_value=32, max_value=96, step=32), (3, 3), padding="same",
		input_shape=inputShape))
	model.add(Activation("relu"))
	model.add(BatchNormalization(axis=chanDim))
	model.add(MaxPooling2D(pool_size=(2, 2)))
	model.add(Dropout(0.25))

		# (CONV => RELU => POOL) * 2
	model.add(SeparableConv2D(
            hp.Int("sep_conv_2", min_value=64,max_value=128, step=32), (3, 3), padding="same"))
	model.add(Activation("relu"))
	model.add(BatchNormalization(axis=chanDim))
	model.add(SeparableConv2D(64, (3, 3), padding="same"))
	model.add(Activation("relu"))
	model.add(BatchNormalization(axis=chanDim))
	model.add(MaxPooling2D(pool_size=(2, 2)))
	model.add(Dropout(0.25))

		# (CONV => RELU => POOL) * 3
	model.add(SeparableConv2D(
            hp.Int("sep_conv_3", min_value=128, max_value=256, step=32), (3, 3), padding="same"))
	model.add(Activation("relu"))
	model.add(BatchNormalization(axis=chanDim))
	model.add(SeparableConv2D(128, (3, 3), padding="same"))
	model.add(Activation("relu"))
	model.add(BatchNormalization(axis=chanDim))

	model.add(SeparableConv2D(
            hp.Int("sep_conv_4", min_value=128, max_value=256, step=32), (3, 3), padding="same"))
	model.add(Activation("relu"))
	model.add(BatchNormalization(axis=chanDim))
	model.add(MaxPooling2D(pool_size=(2, 2)))
	model.add(Dropout(0.25))

		# first (and only) set of FC => RELU layers
	model.add(Flatten())
	model.add(Dense(
            hp.Int("dense_units", min_value=256, max_value=768, step=256)
            ))
	model.add(Activation("relu"))
	model.add(BatchNormalization())
	model.add(Dropout(0.5))

		# binary classifier
	model.add(Dense(1, activation="sigmoid",
			bias_initializer="zeros"))
			
	lr = hp.Choice("learning_rate", values=[1e-1, 1e-2, 1e-3])
	
	model.compile(optimizer=Adam(learning_rate=lr), loss = "categorical_crossentropy", metrics=["accuracy"])

		# return the constructed network architecture
	return model

- ‘hp’ is an object internally passed by tuner to model-building function to help it specify range of hyperparameter values whenever the tuner is called. 
- hp.Int() is used to set the range of a hyperparameter whose values are integers, like for ‘number of filters’ in Convolutional Neural Networks and ‘number of units’ in Dense layer/Fully Connected layer.
- hp.Choice() offers us way to provide specific values (integers or decimal) and functions to be used for hyperparameter tuning.

### Define some Callbacks

In [87]:
early_stop = EarlyStopping(monitor='loss', patience=10, verbose=1)
checkp = ModelCheckpoint("G:\\pyimage_univ\\CNN_tf\\tf_data\\", monitor="val_loss", )
callbacks = [early_stop, checkp]

### Define the keras tuner functions

In [88]:
tuner_hyper = kt.Hyperband(build,
objective='val_accuracy',
max_epochs=10,
factor=3,
seed=42,
directory = "G:\\pyimage_univ\\CNN_tf\\tf_data\\",
project_name="hyperband")

tuner_rand = kt.RandomSearch(build,
objective='val_accuracy',
max_trials=11,
seed=42,
directory = "G:\\pyimage_univ\\CNN_tf\\tf_data\\",
project_name="random")

tuner_bay = kt.BayesianOptimization(build, 
objective="val_accuracy",
max_trials=10,
seed=42,
directory = "G:\\pyimage_univ\\CNN_tf\\tf_data\\",
project_name="Bayes")

INFO:tensorflow:Reloading Oracle from existing project G:\pyimage_univ\CNN_tf\tf_data\hyperband\oracle.json
INFO:tensorflow:Reloading Tuner from G:\pyimage_univ\CNN_tf\tf_data\hyperband\tuner0.json


In [89]:
#I am still working on this, since  the error shouldnt be infinite 0
tuner_hyper.search(x=trainDS, validation_data=validDS, batch_size=64, epochs=10)


Search: Running Trial #68

Value             |Best Value So Far |Hyperparameter
96                |64                |sep_conv_1
96                |64                |sep_conv_2
160               |192               |sep_conv_3
128               |160               |sep_conv_4
512               |768               |dense_units
0.01              |0.01              |learning_rate
17                |2                 |tuner/epochs
6                 |0                 |tuner/initial_epoch
2                 |3                 |tuner/bracket
1                 |0                 |tuner/round
0052              |None              |tuner/trial_id

Epoch 7/17
Epoch 8/17
Epoch 9/17
Epoch 10/17

KeyboardInterrupt: 

### Get best tuner parameters

In [70]:
bestHP = tuner_hyper.get_best_hyperparameters(num_trials=1)[0]

In [72]:
model = tuner_hyper.hypermodel.build(bestHP)
H = model.fit(x=X_train[-1], y=y_train[-1],
	validation_data=(X_val[-1], y_val[-1]), 
	batch_size=64,
	epochs=10, 
	callbacks=callbacks, 
	verbose=1)

Epoch 1/10
INFO:tensorflow:Assets written to: G:\pyimage_univ\CNN_tf\tf_data\assets
Epoch 2/10
INFO:tensorflow:Assets written to: G:\pyimage_univ\CNN_tf\tf_data\assets
Epoch 3/10
INFO:tensorflow:Assets written to: G:\pyimage_univ\CNN_tf\tf_data\assets
Epoch 4/10
INFO:tensorflow:Assets written to: G:\pyimage_univ\CNN_tf\tf_data\assets
Epoch 5/10
INFO:tensorflow:Assets written to: G:\pyimage_univ\CNN_tf\tf_data\assets
Epoch 6/10
INFO:tensorflow:Assets written to: G:\pyimage_univ\CNN_tf\tf_data\assets
Epoch 7/10
INFO:tensorflow:Assets written to: G:\pyimage_univ\CNN_tf\tf_data\assets
Epoch 8/10
INFO:tensorflow:Assets written to: G:\pyimage_univ\CNN_tf\tf_data\assets
Epoch 9/10
INFO:tensorflow:Assets written to: G:\pyimage_univ\CNN_tf\tf_data\assets
Epoch 10/10
INFO:tensorflow:Assets written to: G:\pyimage_univ\CNN_tf\tf_data\assets


In [None]:
predictions = model.predict(X_test, batch_size=32)
print(classification_report(
    y_test.argmax(axis=1), predictions.argmax(axis=1), target_names=["yes","no"]
))

NameError: name 'X_test' is not defined