# PARAMETERS 

In [25]:
problem_name = 'template' #to save
model_architecture = 'VGG_16'
weights_path = None 
target_size = (224, 224) 
batch_size = 1

epochs = 100 #após x épocas sem melhorar pará (a usar callback)

In [26]:
from keras.models import Sequential
from keras.models import load_model
from keras.layers.core import Flatten, Dense, Dropout
from keras.layers.convolutional import Conv2D, Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.layers.normalization import BatchNormalization
from keras.layers import Activation
from keras.constraints import maxnorm 
from keras.optimizers import SGD 
from keras.utils import np_utils 
from keras import backend as K 
from keras.utils.data_utils import get_file
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger
import keras
K.set_image_dim_ordering('tf') #ordem 'th' ou 'tf' 
import numpy as np
from numpy import genfromtxt
import math 

from timeit import default_timer as timer
from time import time as tick
import matplotlib.pyplot as plt 
import pickle 
from os import listdir
from PIL import Image, ImageOps
from os.path import isfile, join
import os
from scipy.misc	import toimage 
from scipy import misc, ndimage
import scipy.fftpack as pack
import scipy.misc
from scipy.ndimage import rotate
from sklearn.datasets import fetch_mldata
from sklearn.svm import SVC
from sklearn.utils import shuffle
from sklearn import preprocessing
from sklearn.preprocessing import LabelEncoder
from skimage.io import imread
from skimage.transform import resize

from sklearn.metrics import roc_auc_score
import pandas as pd
import pathlib

# fixar random seed para se puder reproduzir os resultados 
seed = 9 
np.random.seed(seed) 

# Útils

In [27]:
def print_model(model,fich):
    from keras.utils import plot_model
    plot_model(model, to_file=fich, show_shapes=True, show_layer_names=True)
    
def print_history_accuracy(history):
    print(history.history.keys())
    plt.plot(history.history['acc'])
    plt.plot(history.history['val_acc'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()
    
def print_history_loss(history):
    print(history.history.keys())
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

def plots(ims, figsize=(12,6), rows=1, interp=False, titles=None):
    if type(ims[0]) is np.ndarray:
        ims = np.array(ims).astype(np.uint8)
        if (ims.shape[-1] != 3):
            ims = ims.transpose((0,2,3,1))
    f = plt.figure(figsize=figsize)
    cols = len(ims)//rows if len(ims) % 2 == 0 else len(ims)//rows + 1
    for i in range(len(ims)):
        sp = f.add_subplot(rows, cols, i+1)
        sp.axis('Off')
        if titles is not None:
            sp.set_title(titles[i], fontsize=16)
        plt.imshow(ims[i], interpolation=None if interp else 'none')

def load_batch(fpath, label_key='labels'): 
 
    f = open(fpath, 'rb') 
    d = pickle.load(f, encoding='bytes') 
    d_decoded = {}        # decode utf8 
    for k, v in d.items(): 
        d_decoded[k.decode('utf8')] = v 
    d = d_decoded 
    f.close() 
    data = d['data'] 
    labels = d[label_key] 
    data = data.reshape(data.shape[0], 3, 32, 32) 
    return data, labels


def rotate_resize(temp, tam_image):
    #-------------------rodar se necessário e cortar em quadrado
    if temp.shape[0] > temp.shape[1]:
        temp = rotate(temp,90)
    
    #cortar em quadrado no centro da imagem e fazer resize para o tam_image
    difShapes = temp.shape[1]-temp.shape[0]
    return (255 * resize(temp[0:temp.shape[0],int(difShapes/2):int(difShapes/2)+temp.shape[0]],
                            (tam_image, tam_image))).astype(np.uint8)

# Models

In [28]:
#choosing model

def choosing_model(model_architecture, num_classes,epochs, weights_path=None):
    
    model = None
    
    if model_architecture == 'VGG_16':
        model = VGG_16(num_classes,epochs, weights_path=None)
    #if model_architecture == 'VGG_19':
        #model = VGG_19(num_classes,epochs, weights_path=None)
        
    if model_architecture == 'my1':
        model = my1(num_classes,epochs)
        
    if model == None:
        print('non valid model')
        
    return model
            

In [29]:
def VGG_16(num_classes,epochs, weights_path=None):
    model = Sequential()
    model.add(ZeroPadding2D((1,1),input_shape=(224,224,3)))
    model.add(Convolution2D(64, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(64, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))   

    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(128, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(128, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))

    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(256, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(256, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(256, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))

    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))

    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))

    model.add(Flatten())
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))

    if weights_path:
        model.load_weights(weights_path)
        
    # Compile model 
    lrate = 0.01 
    decay = lrate/epochs 
    sgd = SGD(lr=lrate, momentum=0.9, decay=decay, nesterov=False) 
    model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) 
    #model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    return model

In [30]:
# train_acc = 1.0 val_acc = 0.9944
def my1(num_classes, epochs):
    model = Sequential() 
    
    model.add(Conv2D(30, (3, 3),input_shape=(128, 128, 3), activation='relu', padding='same'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    model.add(BatchNormalization())
    model.add(Conv2D(50, (3, 3), activation='relu', padding='same'))
    model.add(MaxPooling2D(pool_size=(2, 2))) 
    
    model.add(BatchNormalization())
    model.add(Conv2D(70, (3, 3), activation='relu',strides=2, padding='same'))
    model.add(MaxPooling2D(pool_size=(2, 2))) 
    
    model.add(BatchNormalization())
    model.add(Conv2D(90, (3, 3), activation='relu', padding='same'))
    model.add(MaxPooling2D(pool_size=(2, 2)))  
    
    model.add(Dropout(0.25)) #----------------------------------------------------------
    
    model.add(Flatten()) 
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5)) 
    model.add(Dense(num_classes, activation='softmax'))
    #learning_rate=0.0001
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) 
     
    return model

# Callbacks

In [31]:
class LossHistory(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.losses = []

    def on_batch_end(self, batch, logs={}):
        self.losses.append(logs.get('loss'))

history_loss = LossHistory() #print(history.losses) to use      

early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, mode='min')    

checkpoint = ModelCheckpoint(filepath = 'checkpoint-{epoch:02d}-{val_loss:.2f}.hdf5', monitor='val_loss', save_best_only=False, mode='min', period=1)

#reduce training rate when no improving
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.001)

csv_logger = CSVLogger('training.log')

# Training Functions

In [32]:
def training(model_architecture, train_generator, validation_generator, epochs, batch_size, weights_path=None):
    
    num_classes = train_generator.num_classes
    
    model = choosing_model(model_architecture, num_classes, epochs, weights_path=None)
    
    print(model.summary())
    #print_model(model,"model_plus.png")
    
    history = model.fit_generator(
        train_generator, 
        steps_per_epoch = math.ceil(train_generator.n/batch_size),
        epochs=epochs,
        validation_data = validation_generator,
        validation_steps = math.ceil(validation_generator.n/batch_size),
        callbacks=[history_loss, early_stop, checkpoint, reduce_lr, csv_logger],
        verbose=1) 
    
    model.save(model, problem_name + '_' + model_architecture + '.h5') 
    
    print_history_accuracy(history) 
    print_history_loss(history) 
    # Final evaluation with test cases
    scores = model.evaluate_generator(validation_generator) 
    print('Scores: ', scores) 
    print("Accuracy: %.2f%%" % (scores[1]*100)) 
    print("Erro modelo: %.2f%%" % (100-scores[1]*100))
       
    return model, history 


def training_folders(model_architecture, trainPath, testPath, target_size, epochs, batch_size, weights_path=None):
    
    train_datagen = ImageDataGenerator(         #https://keras.io/preprocessing/image/
        rescale=1./255)
    test_datagen = ImageDataGenerator(
        rescale=1./255)
    
    train_generator = train_datagen.flow_from_directory(
            trainPath,
            target_size=target_size,
            color_mode = 'rgb',
            batch_size=batch_size,
            class_mode = 'categorical',
            shuffle = True)

    validation_generator = test_datagen.flow_from_directory(
            testPath,
            target_size = target_size,
            color_mode = 'rgb',
            class_mode = 'categorical',
            batch_size = batch_size,
            shuffle = True)
    
    
    model, history = training(model_architecture, train_generator, validation_generator, epochs, batch_size, weights_path)
    
    return model, history 



def training_data(model_architecture, x_train, y_train, x_test, y_test, epochs, batch_size, weights_path=None):    
    
    y_train = np_utils.to_categorical(y_train)
    y_test = np_utils.to_categorical(y_test)
    
    #atention to type float32 uint8..
    
    train_datagen = ImageDataGenerator(      #https://keras.io/preprocessing/image/
        rescale=1./255) 
    test_datagen = ImageDataGenerator(
        rescale=1./255)

    train_generator = train_datagen.flow(
        x_train, 
        y_train, 
        batch_size=batch_size,
        shuffle=True)
        
    validation_generator = validation_generator.flow(
        x_test, 
        y_test, 
        batch_size=batch_size,
        shuffle=True)
        
    model, history = training(model_architecture, train_generator, validation_generator, epochs, batch_size, weights_path)
    
    return history, model

# Training

In [33]:
if __name__ == '__main__':
    
    trainPath = 'train/'
    testPath = 'test/'
                                                
    model, history = training_folders(model_architecture, trainPath, testPath, target_size, epochs, batch_size, weights_path)


Found 17653 images belonging to 2 classes.
Found 239 images belonging to 2 classes.


  after removing the cwd from sys.path.
  
  # Remove the CWD from sys.path while we load stuff.
  if sys.path[0] == '':
  app.launch_new_instance()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
zero_padding2d_1 (ZeroPaddin (None, 226, 226, 3)       0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 224, 224, 64)      1792      
_________________________________________________________________
zero_padding2d_2 (ZeroPaddin (None, 226, 226, 64)      0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 224, 224, 64)      36928     
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 112, 112, 64)      0         
_________________________________________________________________
zero_padding2d_3 (ZeroPaddin (None, 114, 114, 64)      0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 112, 112, 128)     73856     
__________

KeyboardInterrupt: 

# Testing

In [1]:
model_name = 'VGG_16'
testPath = 'test/'

model = load_model('Models/' + model_name + 'model.h5')

test_datagen = ImageDataGenerator(
    rescale=1./255)

validation_generator = test_datagen.flow_from_directory(
        testPath,
        color_mode = 'rgb',
        class_mode = 'categorical',
        target_size=target_size,
        batch_size=batch_size,
        shuffle=False)  # keep data in same order as labels


# Final evaluation with test cases
scores = model.evaluate_generator(validation_generator) 
print('Scores: ', scores) 
print("Accuracy: %.2f%%" % (scores[1]*100)) 
print("Erro modelo: %.2f%%" % (100-scores[1]*100))

scores = model.predict_generator(validation_generator)
j, predicted_classes = np.unravel_index(scores.argmax(axis=1), scores.shape)

for i in range(0, scores.shape[0]):
    if predicted_classes[i] != validation_generator.classes[i]:#print failed images
        plt.imshow(imread(pathtest + validation_generator.filenames[i]))
        plt.show()
        print('true: ', validation_generator.classes[i])
        print('prediction: ',predicted_classes[i])
        print('scores: ', scores[i])
        print('file: ', validation_generator.filenames[i])
    

NameError: name 'ImageDataGenerator' is not defined

# Confusion Matrix + AUC

In [None]:
from sklearn.metrics import confusion_matrix

print(confusion_matrix(validation_generator.classes, predicted_classes))

# AUC for prediction on validation sample
X_val_sample, val_labels = next(validation_generator)
val_pred = model.predict_proba(X_val_sample)
val_pred = np.reshape(val_pred, val_labels.shape)
val_score_auc = roc_auc_score(val_labels, val_pred)
print ("AUC validation score: ",val_score_auc)

# Data Augmentation

In [None]:
dir_to_save = 'testes'
n_imgs = 10
#select image 
origin = '/media/marcelo/OS/Users/Marcelo Queirós/Documents/MIEI/Semestre 2/Tecnologias e Aplicacoes/deep learning/Projects/pills/dc/' 
image_path = origin + '1.jpg'

datagen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True,
    rotation_range=20,
    width_shift_range=2,
    height_shift_range=2,
    horizontal_flip=True)

image = np.expand_dims(ndimage.imread(image_path),0) 


# fits the model on batches with real-time data augmentation:
#model.fit_generator(datagen.flow(x_train, y_train, batch_size=32),
                    #steps_per_epoch=len(x_train) / 32, epochs=epochs)
i = 0
for batch in datagen.flow(image, batch_size=1, 
                          save_to_dir = dir_to_save, save_prefix='dc', save_format='jpg'):
    i += 1
    if i > n_imgs:
        break  # otherwise the generator would loop indefinitely
        
#plots(aug_images, figsize=(20,7), rows=2)