## Sources

In [None]:
# https://medium.com/ai-techsystems/insect-classification-2c89e7398ec4

In [None]:
# https://www.linkedin.com/pulse/using-ai-recognize-kissing-bugs-mobile-phone-images-lorenzo-pattori/

In [None]:
# https://web.stanford.edu/~nanbhas/blog/sigmoid-softmax/#:~:text=And%20the%20sigmoid%20can%20now,belongs%20to%20the%20negative%20class.

## Import libraries and set configurations

In [9]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras as ks
import tensorflow.keras.models as model_loader
import tensorflow.keras.utils as image_loader
import tensorflow.nn as activation_function
from PIL import Image
from rembg import remove
import os
from keras.activations import relu
from tensorflow.keras.layers import *
from tensorflow.keras import Model
from tensorflow.keras import layers as Layers


In [10]:
config = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, 
                        inter_op_parallelism_threads=2, 
                        allow_soft_placement=True,
                        device_count = {'CPU': 1})

session = tf.compat.v1.Session(config=config)

## Get and load data

In [None]:
# Directory where images are present
valid_dir = '../valid_ds'
train_dir = '../train_ds'
num_fldrs = 2
target_size = (224,224)
insect_names = {'1':"triatomineo",'2':"outro_inseto"}

In [13]:
def datapreprocessing(train_dir, valid_dir, batch_size):
    from tensorflow.keras.utils import image_dataset_from_directory

    train_ds = image_dataset_from_directory(
        directory=train_dir,
        image_size=target_size,
        batch_size=batch_size,
        shuffle=True,
        )

    valid_ds = image_dataset_from_directory(
        directory=valid_dir,
        image_size=target_size,
        batch_size=batch_size,
        shuffle=False,
        )
    
    return train_ds, valid_ds

In [None]:
batch_size = 32

train_ds, valid_ds = datapreprocessing(train_dir, valid_dir, batch_size=batch_size)

## Augmentation

In [15]:
data_random_Flip = tf.keras.layers.RandomFlip("horizontal_and_vertical")
data_random_rotation = tf.keras.layers.RandomRotation(0.4)

In [16]:
def augmentation(ds):
    aug1 = ds.map(lambda x, y: (data_random_Flip(x), y))
    aug2 = ds.map(lambda x, y: (data_random_rotation(x), y))
    
    ds = ds.concatenate(aug1)
    ds = ds.concatenate(aug2)
 
    return ds 

In [17]:
aug_train_ds = augmentation(train_ds)

In [None]:
print(f'The training dataset was updated from {len(list(train_ds))} to {len(list(aug_train_ds))} image batchs')

## CNN model (ResNet18)

In [None]:
class ResBlock(Model):
    def __init__(self, channels, stride=1):
        super(ResBlock, self).__init__(name='ResBlock')
        self.flag = (stride != 1)
        self.conv1 = Conv2D(channels, 3, stride, padding='same')
        self.bn1 = BatchNormalization()
        self.conv2 = Conv2D(channels, 3, padding='same')
        self.bn2 = BatchNormalization()
        self.relu = ReLU()
        if self.flag:
            self.bn3 = BatchNormalization()
            self.conv3 = Conv2D(channels, 1, stride)

    def call(self, x):
        x1 = self.conv1(x)
        x1 = self.bn1(x1)
        x1 = self.relu(x1)
        x1 = self.conv2(x1)
        x1 = self.bn2(x1)
        if self.flag:
            x = self.conv3(x)
            x = self.bn3(x)
        x1 = Layers.add([x, x1])
        x1 = self.relu(x1)
        return x1


class ResNet18(Model):
    def __init__(self):
        super(ResNet18, self).__init__(name='ResNet18')
        self.conv1 = Conv2D(64, 7, 2, padding='same')
        self.bn = BatchNormalization()
        self.relu = ReLU()
        self.mp1 = MaxPooling2D(pool_size=(2, 2), strides=2, padding="same")

        self.conv2_1 = ResBlock(64)
        self.conv2_2 = ResBlock(64)

        self.conv3_1 = ResBlock(128, 2)
        self.conv3_2 = ResBlock(128)

        self.conv4_1 = ResBlock(256, 2)
        self.conv4_2 = ResBlock(256)

        self.conv5_1 = ResBlock(512, 2)
        self.conv5_2 = ResBlock(512)

        self.pool = GlobalAveragePooling2D()
        self.flat = Flatten()
        self.fc = Dense(2, name="output")
        
    def call(self, x):
        x = self.conv1(x)
        x = self.bn(x)
        x = self.relu(x)
        x = self.mp1(x)

        x = self.conv2_1(x)
        x = self.conv2_2(x)

        x = self.conv3_1(x)
        x = self.conv3_2(x)

        x = self.conv4_1(x)
        x = self.conv4_2(x)

        x = self.conv5_1(x)
        x = self.conv5_2(x)

        x = self.pool(x)
        x = self.flat(x)
        x = self.fc(x)
        
        return x


model = ResNet18()
model.build(input_shape=(1, 224, 224, 3))
model.summary()

## Training

In [20]:
def compiler(model,train_ds,valid_ds,epchs,lr):
    early_stopping = ks.callbacks.EarlyStopping(monitor='val_accuracy',patience=10,
                                         verbose=1,restore_best_weights=True)
    #red_lr= ks.callbacks.ReduceLROnPlateau(monitor='val_accuracy',patience=20,verbose=1,factor=0.1)
    opt = ks.optimizers.Adam(learning_rate=lr)

    model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                      optimizer=opt,
                      metrics=["accuracy"])
    history = model.fit(train_ds,
                        epochs=epchs,
                        callbacks=[early_stopping],
                        validation_data=valid_ds
                        )
    #Visualise curves
    plt.plot(history.history['accuracy'], label='train_acc')
    plt.plot(history.history['val_accuracy'], label='valid_acc')

    plt.title('lrate='+str(lr), pad=-50)
    plt.legend()
    plt.grid(True)
    return model,history

In [None]:
model = compiler(model,aug_train_ds,valid_ds,100,lr=0.001)

## Save model

In [16]:
model[0].save(filepath='../cnn_model.keras')

## Load model

In [17]:
model = tf.keras.models.load_model(filepath='../cnn_model.keras')

## Triatomineo inferences (validation images)

In [None]:
dir = '../valid_ds/triatomineo'
target_size = (224, 224)
results = []

# Definir opções de impressão do NumPy para formatar números decimais completos
np.set_printoptions(precision=2, suppress=True)
for img in os.listdir(dir):
  image = image_loader.load_img(dir+'/'+img, target_size=target_size)
  image_arr = np.array([image])

  output = np.array(activation_function.softmax(model.predict(image_arr)))[0] * 100
  results.append(output)

In [None]:
erros = 0
for count, img in enumerate(os.listdir(dir)):
    #print(img, f'É um barbeiro com uma certeza de aprox. {results[count][1]}%' if results[count][1] > results[count][0] else 
    #  f'Não é um barbeiro com uma certeza de aprox. {results[count][0]}%')
    print(f'Inferência incorreta de {img} com {results[count]}' if results[count][1] <= results[count][0] else None)
    if results[count][1] <= results[count][0]:
        erros += 1
soma = len(os.listdir(dir))
print(f'\nPorcentagem de acerto é de {((soma - erros) / soma) * 100}%')

## Non-triatomineo inferences (validation images)

In [None]:
dir = '../outro_inseto'
target_size = (224, 224)
results = []

# Definir opções de impressão do NumPy para formatar números decimais completos
np.set_printoptions(precision=2, suppress=True)
for img in os.listdir(dir):
  image = image_loader.load_img(dir+'/'+img, target_size=target_size)
  image_arr = np.array([image])

  output = np.array(activation_function.softmax(model.predict(image_arr)))[0] * 100
  results.append(output)

In [None]:
erros = 0
for count, img in enumerate(os.listdir(dir)):
    #print(img, f'É um barbeiro com uma certeza de aprox. {results[count][1]}%' if results[count][1] > results[count][0] else 
    #  f'Não é um barbeiro com uma certeza de aprox. {results[count][0]}%')
    print(f'Inferência incorreta de {img} com {results[count]}' if results[count][0] <= results[count][1] else None)
    if results[count][0] <= results[count][1]:
        erros += 1
soma = len(os.listdir(dir))
print(f'Porcentagem de acerto é de {((soma - erros) / soma) * 100}%')

## Confusion's Matrix

In [None]:
import matplotlib.pyplot as plt
import numpy as np

def criar_matriz_confusao(verdadeiro_positivo_A, verdadeiro_negativo_B, total_A, total_B):
    falso_positivo_B = total_B - verdadeiro_negativo_B
    falso_negativo_A = total_A - verdadeiro_positivo_A
    
#    matriz_confusao = np.array([[verdadeiro_positivo_A / total_A, falso_negativo_A / total_A], 
#                                [falso_positivo_B / total_B, verdadeiro_negativo_B / total_B]])

    matriz_confusao = np.array([[verdadeiro_positivo_A, falso_negativo_A], 
                                [falso_positivo_B, verdadeiro_negativo_B]])

#    matriz_confusao_porcentagem = matriz_confusao * 100
#    matriz_confusao_porcentagem = np.round(matriz_confusao_porcentagem, 2)

    return matriz_confusao

# Dados fornecidos
verdadeiro_positivo_A = 61
verdadeiro_negativo_B = 59
total_A = 66
total_B = 66

# Chamada da função para criar a matriz de confusão
matriz = criar_matriz_confusao(verdadeiro_positivo_A, verdadeiro_negativo_B, total_A, total_B)

# Criar o gráfico da matriz de confusão
fig, ax = plt.subplots()
im = ax.imshow(matriz, cmap='viridis')

# Adicionar textos dentro dos quadrados
for i in range(len(matriz)):
    for j in range(len(matriz[i])):
        text = ax.text(j, i, matriz[i, j],
                       ha="center", va="center", color="black", fontsize=18, bbox={'facecolor': 'white', 'alpha': 1.0, 'pad': 5})

# Adicionar rótulos aos eixos
ax.set_xticks(np.arange(len(matriz)))
ax.set_yticks(np.arange(len(matriz)))
ax.set_xticklabels(['Triatomineo', 'Outro inseto'])
ax.set_yticklabels(['Triatomineo', 'Outro inseto'])
ax.set_xlabel('Predição')
ax.set_ylabel('Verdadeiro valor')

# Adicionar barra de cores
#cbar = ax.figure.colorbar(im, ax=ax)
#cbar.ax.set_ylabel("Contagem", rotation=-90, va="bottom")

# Rotacionar os rótulos dos eixos para melhor visualização
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
         rotation_mode="anchor")

# Adicionar título
ax.set_title("Matriz de Confusão")

# Mostrar o gráfico
plt.show()
