In [1]:
import os
import time
from warnings import filterwarnings

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from keras.applications import vgg16, inception_v3, resnet_v2, xception
from keras.callbacks import (EarlyStopping, ModelCheckpoint)
from keras.layers import (Dense, Dropout, GlobalMaxPool2D, Input)
from keras.models import Model
from keras.optimizers import adam_v2
from keras.utils import np_utils
from PIL import Image

In [2]:
print("Tensorflow: ",tf.__version__)
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

physical_devices = tf.config.list_physical_devices('GPU') 
tf.config.experimental.set_memory_growth(physical_devices[0], True)
filterwarnings("ignore", "(Possibly )?corrupt EXIF data", UserWarning)

Tensorflow:  2.9.1
Num GPUs Available:  1


2022-08-11 15:06:03.775482: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-11 15:06:03.813829: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-11 15:06:03.814707: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero


In [3]:
def set_class_quantzities(path):
    '''
    Metodo que define a quantidade de itens de cada classe considerando as subpastas
    '''
    quantities= {}
    
    for root, dirs, files in os.walk(path, topdown=False):
        for name in dirs:
            diretorio = os.path.join(root, name)
            dpath, ddirs, dfiles = next(os.walk(diretorio))
            sub = root.replace(path, '').split('/')
            if len(sub) > 1:
                if quantities.get(sub[1]):
                    quantities[sub[1]] += len(dfiles)
                else:
                    quantities[sub[1]] = len(dfiles)
            else:
                if quantities.get(name):
                    quantities[name] += len(dfiles)
                else:
                    quantities[name] = len(dfiles)
            
    return quantities

In [4]:
def model_x(model):
    model.trainable = False
    
    x = model.output
    x = GlobalMaxPool2D()(x)
    x = Dense(1024, activation='relu')(x)
    x = Dropout(0.25)(x)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.1)(x)
    x = Dense(256, activation='relu')(x)
    predictions = Dense(24, activation='softmax')(x)
    
    model = Model(inputs=model.input, outputs=predictions)
    model.compile(optimizer=adam_v2.Adam(learning_rate=0.00001, decay=0.0001), 
                  loss='categorical_crossentropy', 
                  metrics=[tf.keras.metrics.CategoricalAccuracy(),
                           tf.keras.metrics.Recall(),
                           tf.keras.metrics.Precision(),
                           tf.keras.metrics.AUC()])
    
    return model

In [5]:
def load_dataset(base_dir, classes, dim):
    X = []
    Y = []
    processed_image_count = 0
    for root, subdirs, files in os.walk(base_dir):
        for filename in files:
            file_path = os.path.join(root, filename)
            suffix = file_path[len(base_dir):].lstrip(os.sep)
            label = suffix.split(os.sep)[0]
            
            img = Image.open(file_path)
            img = img.resize(dim)
            img = img.convert('RGB')
            img = np.asarray(img)
            
            img = img/255.
            
            X.append(img)
            Y.append(classes.index(label))
            processed_image_count += 1

    print (f"Imagens processadas: {processed_image_count}")
    
    X = np.array(X, dtype='float32')
    Y = np.array(Y) #Atrinui o número de classes a Y
    
    return X, Y

In [6]:
def load_qtdImages(path, classes, dim, qtd_imgs):
    X = []
    Y = []
    processed_image_count = 0
    for root, dirs, files in os.walk(path, topdown=False):
        for name in dirs:
            diretorio = os.path.join(root, name)
            dpath, ddirs, dfiles = next(os.walk(diretorio))
            for i in range(qtd_imgs):
                if len(dfiles) <= i:
                    break
                file_path = os.path.join(diretorio, dfiles[i])
                suffix = file_path[len(path):].lstrip(os.sep)
                label = suffix.split(os.sep)[0]

                img = Image.open(file_path)
                img = img.resize(dim)
                img = img.convert('RGB')
                img = np.asarray(img)

                img = img/255.

                X.append(img)
                Y.append(classes.index(label))
                processed_image_count += 1

    print (f"Imagens processadas: {processed_image_count}") 
    
    X = np.array(X, dtype='float32')
    Y = np.array(Y) #Atrinui o número de classes a Y
    return X, Y

In [7]:
path_atual = os.getcwd()
path_models = path_atual.replace('/'+path_atual.split('/')[-1], '')+'/modelos/save_models/'
path_db = path_atual.replace('/'+path_atual.split('/')[-1], '').replace('/'+path_atual.split('/')[-2], '')+'/dataset/kimia/'

quantidade = set_class_quantzities(path_db)
classes = list(quantidade.keys())

metrics = pd.DataFrame(index=['accuracy', 'loss', 'val_accuracy', 'val_loss', 'precision', 'recall', 'time'])

In [None]:
#inicia as redes do ImageNet
vgg16_model = vgg16.VGG16(weights='imagenet', 
                          include_top=False, 
                          input_tensor=Input(shape=( 224, 224, 3)), 
                          input_shape=( 224, 224,3))

In [9]:
modelos = {'vgg16': vgg16_model}

In [None]:
#Carregamento da Base de dados
print("Carregando dataset com imagens de dimensao 299x299...")
start = time.time()

#dataset com dimensao 224x224
x_train, y_train = load_qtdImages(path_db+'train', classes, (224, 224), 150)

print(f"Dataset carregado em: {time.time()-start:.2f} segundos")

In [None]:
#Carregamento da Base de dados
print("Carregando dataset com imagens de dimensao 299x299...")
start = time.time()

#dataset com dimensao 224x224
x_val, y_val = load_qtdImages(path_db+'test', classes, (224, 224), 25)

print(f"Dataset carregado em: {time.time()-start:.2f} segundos")

In [None]:
for name, model in modelos.items():
    print(f'Iniciando treinamento do modelo {name}')
    
    
    y_train = np_utils.to_categorical(y_train, 24)
    y_val = np_utils.to_categorical(y_val, 24)

    
    #Definições de parametros de avaliação
    checkpoint = ModelCheckpoint(path_models+name+'/'+name+'.h5', monitor='val_categorical_accuracy', verbose=1, save_best_only=True, mode='max')
    earlyStopping = EarlyStopping(monitor='val_categorical_accuracy', min_delta=0, patience=100, verbose=1, mode='auto', baseline=None)
    callbacks_list = [checkpoint, earlyStopping]
    
    t0 = time.time()
    
    model = model_x(model)
    history = model.fit(x = x_train, 
                        y = y_train,
                        batch_size=64,
                        validation_data=(x_val, y_val),
                        epochs=150,
                        callbacks=callbacks_list)
                        
    ttt = time.time() - t0
    
    accuracy = np.array(history.history['categorical_accuracy'])
    loss = np.array(history.history['loss'])
    val_accuracy = np.array(history.history['val_categorical_accuracy'])
    val_loss = np.array(history.history['val_loss'])
   
    accuracy = sorted(accuracy, key = float)
    val_accuracy = sorted(val_accuracy, key = float)
    
    metrics.loc['time', name] = ttt
    metrics.loc['accuracy', name] = accuracy[-1]
    metrics.loc['loss', name] = loss[-1]
    metrics.loc['val_accuracy', name] = val_accuracy[-1]
    metrics.loc['val_loss', name] = val_loss[-1]
    
    print("Tempo de Treinamento: %.2f" % (ttt))
    print("Ac: %.2f" % (accuracy[-1]))
    print("Acval: %.2f" % (val_accuracy[-1]))
    
    # summarize history for accuracy
    plt.plot(history.history['categorical_accuracy'])
    plt.plot(history.history['val_categorical_accuracy'])
    plt.title('model accuracy')
    plt.ylabel('Acurácia')
    plt.xlabel('Épocas')
    plt.legend(['Treinamento', 'Teste'], loc='upper left')
    plt.savefig(path_models+name+'/'+name+"_accuracy.png")
    plt.clf()

    # summarize history for loss
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Loss')
    plt.ylabel('Loss')
    plt.xlabel('Épocas')
    plt.legend(['Treinamento', 'Teste'], loc='upper left')
    plt.savefig(path_models+name+'/'+name+"_loss.png")
    plt.clf()
    
    #salvar dataframe
    metrics.to_csv(path_models+name+'/dados_train.csv', index=False, sep=';')
    
    
print('Finalizidado!')