# Import needed modules

In [13]:
# import system libs
import os

# Data preprocessing
import cv2
import numpy as np
from PIL import Image
import random

# import pandas as pd


# Model definition
from sklearn.model_selection import train_test_split

# import Deep learning libraries
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Ignore Warnings
import warnings
warnings.filterwarnings("ignore")

# Create needed functions

#### Function to generate images from dataframe

In [2]:
def create_gens (train_df, valid_df, test_df, batch_size):
    '''
    This function takes train, validation, and test dataframe and fit them into image data generator, 
    because model takes data from image data generator.
    Image data generator converts images into tensors. '''


    # define model parameters
    img_size = (224, 224)
    channels = 3 # either BGR or Grayscale
    color = 'rgb'
    img_shape = (img_size[0], img_size[1], channels)

    # Recommended : use custom function for test data batch size, else we can use normal batch size.
    ts_length = len(test_df)
    test_batch_size = max(sorted([ts_length // n for n in range(1, ts_length + 1) if ts_length%n == 0 and ts_length/n <= 80]))
    test_steps = ts_length // test_batch_size

    # This function which will be used in image data generator for data augmentation, it just take the image and return it again.
    def scalar(img):
        return img

    tr_gen = ImageDataGenerator(preprocessing_function= scalar, horizontal_flip= True)
    ts_gen = ImageDataGenerator(preprocessing_function= scalar)

    train_gen = tr_gen.flow_from_dataframe( train_df, x_col= 'filepaths', y_col= 'labels', target_size= img_size, class_mode= 'categorical',
                                        color_mode= color, shuffle= True, batch_size= batch_size)

    valid_gen = ts_gen.flow_from_dataframe( valid_df, x_col= 'filepaths', y_col= 'labels', target_size= img_size, class_mode= 'categorical',
                                        color_mode= color, shuffle= True, batch_size= batch_size)

    # Note: we will use custom test_batch_size, and make shuffle= false
    test_gen = ts_gen.flow_from_dataframe( test_df, x_col= 'filepaths', y_col= 'labels', target_size= img_size, class_mode= 'categorical',
                                        color_mode= color, shuffle= False, batch_size= test_batch_size)

    return train_gen, valid_gen, test_gen

#### **Function to display data sample**

In [3]:
def show_images(data):
    '''
    This function take the data generator and show sample of the images
    '''

    # return classes , images to be displayed
    g_dict = data.class_indices        # defines dictionary {'class': index}
    classes = list(g_dict.keys())     # defines list of dictionary's kays (classes), classes names : string
    images, labels = next(gen)        # get a batch size samples from the generator

    # calculate number of displayed samples
    length = len(labels)        # length of batch size
    sample = min(length, 25)    # check if sample less than 25 images

    plt.figure(figsize= (20, 20))

    for i in range(sample):
        plt.subplot(5, 5, i + 1)
        image = images[i] / 255       # scales data to range (0 - 255)
        plt.imshow(image)
        index = np.argmax(labels[i])  # get image index
        class_name = classes[index]   # get class of image
        plt.title(class_name, color= 'blue', fontsize= 12)
        plt.axis('off')
    plt.show()

#### Reading Dataset

In [14]:
# Reading the data set
data_dir = 'Data/imgs'
tags = 'Data/train_data.csv'

In [26]:
# Obtén la lista de nombres de archivos en el directorio
archivos = os.listdir(data_dir)

# Variable para contar las imágenes con 3 canales
imagenes_con_tres_canales = 0

for archivo in archivos:
    # Abre cada imagen y obtén el número de canales
    imagen = Image.open(os.path.join(directorio_imagenes, archivo))
    canales = len(imagen.split())
    
    if canales == 3:
        imagenes_con_tres_canales += 1

# Imprime el mensaje con la información
print(f'Hay {imagenes_con_tres_canales} imágenes con 3 canales (formato RGB).')



Hay 4680 imágenes con 3 canales (formato RGB).


In [28]:
import os
from PIL import Image

# Directorio que contiene tus imágenes
directorio_imagenes = 'Data/imgs'

# Obtén la lista de nombres de archivos en el directorio
archivos = os.listdir(directorio_imagenes)

# Variable para contar las imágenes con 3 canales
imagenes_con_tres_canales = 0

# Diccionario para almacenar dimensiones de imágenes
dimensiones_imagenes = {}

for archivo in archivos:
    # Abre cada imagen y obtén el número de canales
    imagen = Image.open(os.path.join(directorio_imagenes, archivo))
    canales = len(imagen.split())

    # Obtiene el tamaño de la imagen
    ancho, alto = imagen.size
    
    if canales == 3:
        imagenes_con_tres_canales += 1
        dimensiones_imagenes[archivo] = (ancho, alto)

# Imprime el mensaje con la información
print(f'Hay {imagenes_con_tres_canales} imágenes con 3 canales (formato RGB).')
print('Dimensiones de las imágenes:')
for archivo, dimensiones in dimensiones_imagenes.items():
    print(f'{archivo}: Ancho={dimensiones[0]}px, Alto={dimensiones[1]}px')

Hay 4680 imágenes con 3 canales (formato RGB).
Dimensiones de las imágenes:
pcrhealthy.35.jpg: Ancho=224px, Alto=224px
healthy.1339.jpg: Ancho=224px, Alto=224px
salmo.1957.jpg: Ancho=224px, Alto=224px
salmo.391.jpg: Ancho=224px, Alto=224px
healthy.27.jpg: Ancho=224px, Alto=224px
healthy.795.jpg: Ancho=224px, Alto=224px
pcrhealthy.299.jpg: Ancho=224px, Alto=224px
healthy.781.jpg: Ancho=224px, Alto=224px
healthy.959.jpg: Ancho=224px, Alto=224px
salmo.385.jpg: Ancho=224px, Alto=224px
healthy.33.jpg: Ancho=224px, Alto=224px
pcrhealthy.21.jpg: Ancho=224px, Alto=224px
salmo.1943.jpg: Ancho=224px, Alto=224px
healthy.1305.jpg: Ancho=224px, Alto=224px
healthy.1463.jpg: Ancho=224px, Alto=224px
healthy.971.jpg: Ancho=224px, Alto=224px
healthy.965.jpg: Ancho=224px, Alto=224px
healthy.1477.jpg: Ancho=224px, Alto=224px
healthy.1311.jpg: Ancho=224px, Alto=224px
salmo.352.jpg: Ancho=224px, Alto=224px
salmo.2245.jpg: Ancho=224px, Alto=224px
salmo.1994.jpg: Ancho=224px, Alto=224px
healthy.756.jpg: Ancho

In [None]:
# Convert the imagens into grey scale to facilitate interpretation


In [24]:
directorio_normalizado = 'Data/imgs_normalized'


# Obtiene la lista de nombres de archivos en el directorio original
archivos = os.listdir(directorio_imagenes)

for archivo in archivos:
    # Carga cada imagen
    imagen = cv2.imread(os.path.join(directorio_imagenes, archivo))

    # Normaliza los valores de píxeles
    imagen_normalizada = imagen / 255

    # Asegúrate de que los valores estén en el rango de 0 a 1
    imagen_normalizada = imagen_normalizada.clip(0, 1)

    # Guarda la imagen normalizada en el directorio correspondiente
    cv2.imwrite(os.path.join(directorio_normalizado, archivo), imagen_normalizadaya)

Proceso de normalización completado.


In [None]:
# Composición de pixeles de 0 a 255, 
# Tienen tamaño: ancho y largo.
# La máquina ve una matriz en donde cada posición es un pixel, un valor entre 0 y 255
# cómo son números puedo aclararla, voltearla, cambiar el tamaño

# Por ejemplo, en una escala de grises siendo 0 negro y 255 blanco. Aquí sólo hay un canal, más eficientes
# Las imágenes a color tienen exactamente el mismo tamaño, pero ahora 3 canales: RGB. Aumenta la complejidad, muy lenta, miu pesada


#### **Display Image Sample**

#### **Generic Model Creation**

In [None]:
# Create Model Structure
img_size = (224, 224)
channels = 3
img_shape = (img_size[0], img_size[1], channels)
class_count = len(list(train_gen.class_indices.keys())) # to define number of classes in dense layer

# create pre-trained model (you can built on pretrained model such as :  efficientnet, VGG , Resnet )
# we will use efficientnetb3 from EfficientNet family.
base_model = tf.keras.applications.efficientnet.EfficientNetB3(include_top= False, 
weights= "imagenet", 
input_shape= img_shape, 
pooling= 'max')

model = Sequential([
    base_model,
    BatchNormalization(axis= -1, momentum= 0.99, epsilon= 0.001),
    Dense(256, kernel_regularizer= regularizers.l2(l= 0.016), 
                activity_regularizer= regularizers.l1(0.006),
                bias_regularizer= regularizers.l1(0.006), 
                activation= 'relu'),
    Dropout(rate= 0.45, seed= 123),
    Dense(class_count, activation= 'softmax')
])

model.compile(Adamax(learning_rate= 0.001), loss= 'categorical_crossentropy', metrics= ['accuracy'])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 efficientnetb3 (Functional)  (None, 1536)             10783535  
                                                                 
 batch_normalization (BatchN  (None, 1536)             6144      
 ormalization)                                                   
                                                                 
 dense (Dense)               (None, 256)               393472    
                                                                 
 dropout (Dropout)           (None, 256)               0         
                                                                 
 dense_1 (Dense)             (None, 4)                 1028      
                                                                 
Total params: 11,184,179
Trainable params: 11,093,804
Non-trainable params: 90,375
_______________________________________

#### **Train model**

In [None]:
history = model.fit(x= train_gen, epochs= epochs, verbose= 0, callbacks= callbacks,
                    validation_data= valid_gen, validation_steps= None, shuffle= False)
                    

Do you want model asks you to halt the training [y/n] ?
 Epoch     Loss   Accuracy  V_loss    V_acc     LR     Next LR  Monitor  % Improv  Duration


2023-10-16 23:10:11.417006: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


                    processing batch 0 of 162  -   accuracy=  22.500   -   loss: 10.29979 

# **Evaluate model**

In [None]:
ts_length = len(test_df)
test_batch_size = test_batch_size = max(sorted([ts_length // n for n in range(1, ts_length + 1) if ts_length%n == 0 and ts_length/n <= 80]))
test_steps = ts_length // test_batch_size

train_score = model.evaluate(train_gen, steps= test_steps, verbose= 1)
valid_score = model.evaluate(valid_gen, steps= test_steps, verbose= 1)
test_score = model.evaluate(test_gen, steps= test_steps, verbose= 1)

print("Train Loss: ", train_score[0])
print("Train Accuracy: ", train_score[1])
print('-' * 20)
print("Validation Loss: ", valid_score[0])
print("Validation Accuracy: ", valid_score[1])
print('-' * 20)
print("Test Loss: ", test_score[0])
print("Test Accuracy: ", test_score[1])

  9/269 [>.............................] - ETA: 31:48 - loss: 4.0649 - accuracy: 0.9778

KeyboardInterrupt: 

# **Get Predictions**

In [None]:
preds = model.predict_generator(test_gen)
y_pred = np.argmax(preds, axis=1)
print(y_pred)

#### **Confusion Matrics and Classification Report**

In [None]:
g_dict = test_gen.class_indices
classes = list(g_dict.keys())

# Confusion matrix
cm = confusion_matrix(test_gen.classes, y_pred)
plot_confusion_matrix(cm= cm, classes= classes, title = 'Confusion Matrix')

# Classification report
print(classification_report(test_gen.classes, y_pred, target_names= classes))

#### **Save model**

In [None]:
model_name = model.input_names[0][:-6]
subject = 'Chicken Disease'
acc = test_score[1] * 100
save_path = ''

# Save model
save_id = str(f'{model_name}-{subject}-{"%.2f" %round(acc, 2)}.h5')
model_save_loc = os.path.join(save_path, save_id)
model.save(model_save_loc)
print(f'model was saved as {model_save_loc}')

# Save weights
weight_save_id = str(f'{model_name}-{subject}-weights.h5')
weights_save_loc = os.path.join(save_path, weight_save_id)
model.save_weights(weights_save_loc)
print(f'weights were saved as {weights_save_loc}')

#### **Generate CSV files containing classes indicies & image size**

In [None]:
class_dict = train_gen.class_indices
img_size = train_gen.image_shape
height = []
width = []
for _ in range(len(class_dict)):
    height.append(img_size[0])
    width.append(img_size[1])

Index_series = pd.Series(list(class_dict.values()), name= 'class_index')
Class_series = pd.Series(list(class_dict.keys()), name= 'class')
Height_series = pd.Series(height, name= 'height')
Width_series = pd.Series(width, name= 'width')
class_df = pd.concat([Index_series, Class_series, Height_series, Width_series], axis= 1)
csv_name = f'{subject}-class_dict.csv'
csv_save_loc = os.path.join(save_path, csv_name)
class_df.to_csv(csv_save_loc, index= False)
print(f'class csv file was saved as {csv_save_loc}')