# In Image_Classifier_Animal_Train_Model_01.ipynb is too slow.
# It would take 90 hr to finish so I will optimize in this notebook instead

# Data split train test
Module

In [1]:
import os
import numpy as np
import pandas as pd
import glob
from typing import List, Tuple
import matplotlib.pyplot as plt

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.utils import plot_model

from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle





In [2]:
def train_test_validate(original_images_base_path: str, augmented_images_base_path: str, animals: List[str]) -> Tuple[List[str], List[str], List[str]]:
    all_images = []

    # Loop through each animal type and gather images from both original and augmented directories
    for animal in animals:
        original_images_path = os.path.join(original_images_base_path, animal)
        #augmented_images_path = os.path.join(augmented_images_base_path, animal)

        # Use glob to collect all images of this type of animal from both directories
        original_images = glob.glob(os.path.join(original_images_path, '*.jpg'))  # Adjust the pattern if needed
        #augmented_images = glob.glob(os.path.join(augmented_images_path, '*.jpeg'))  # Adjust the pattern if needed

        all_images.extend(original_images)
        #all_images.extend(augmented_images)

    # Splitting the dataset into training, validation, and test sets
    train_val_images, test_images = train_test_split(all_images, test_size=0.2, random_state=42)  # 20% for testing
    train_images, val_images = train_test_split(train_val_images, test_size=0.125, random_state=42)  # 12.5% of 80% = 10% for validation

    print(f"Total images: {len(all_images)}")
    print(f"Training set size: {len(train_images)}")
    print(f"Validation set size: {len(val_images)}")
    print(f"Test set size: {len(test_images)}")

    return train_images, val_images, test_images

In [3]:
def create_model(input_shape, num_classes):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        MaxPooling2D(2, 2),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D(2, 2),
        Conv2D(128, (3, 3), activation='relu'),
        MaxPooling2D(2, 2),
        Flatten(),
        Dense(512, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')  
    ])
    return model

In [4]:
def compile_and_train(model, train_generator, validation_generator, epochs):
    
    filepath="model_{epoch:02d}_{val_accuracy:.2f}.h5"
    model_dir = r'C:\Users\purin\Desktop\ImageClassifier-Animal\ImageClassifier-Animal\models'
    model_save_path = os.path.join(model_dir, filepath)
    os.makedirs(os.path.dirname(model_dir), exist_ok=True)

    model.compile(optimizer='adam',
                  loss='categorical_crossentropy', 
                  metrics=['accuracy'])
    
    early_stopping = EarlyStopping(
        monitor='val_loss',
        patience=(epochs//10) +1,  
        restore_best_weights=True  
    )
    
    model_checkpoint_callback = ModelCheckpoint(
        filepath= model_save_path,
        save_best_only= True,
        monitor= 'val_accuracy',
        mode='max',
        verbose = 1
    )
    
    history = model.fit(
        train_generator,
        steps_per_epoch=train_generator.n // train_generator.batch_size,
        epochs= epochs,
        validation_data=validation_generator,
        validation_steps=validation_generator.n // validation_generator.batch_size
        callbacks= [early_stopping, model_checkpoint_callback]
    )
    return history

In [5]:
def save_text(output_directory,image_list) :
    with open(output_directory, 'w') as file :
        for item in image_list:
            file.write('%s\n' % item)

In [None]:
def plot_training_history(history):
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs = range(1, len(acc) + 1)

    plt.figure(figsize=(12, 6))

    plt.subplot(1, 2, 1)
    plt.plot(epochs, acc, 'bo', label='Training acc')
    plt.plot(epochs, val_acc, 'b', label='Validation acc')
    plt.title('Training and validation accuracy')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(epochs, loss, 'bo', label='Training loss')
    plt.plot(epochs, val_loss, 'b', label='Validation loss')
    plt.title('Training and validation loss')
    plt.legend()

    plt.show()

In [None]:
def visualize_feature_maps(model, layer_name, input_image):
    model = Model(inputs=model.inputs, outputs=model.get_layer(layer_name).output)
    feature_maps = model.predict(input_image)

    plt.figure(figsize=(15, 15))
    for i in range(1, feature_maps.shape[-1] + 1):
        plt.subplot(6, 6, i)
        plt.imshow(feature_maps[0, :, :, i-1], cmap='viridis')
        plt.axis('off')
    plt.show()

In [6]:
def main(original_images_directory:str, augmented_images_directory:str, train_test_valid_directory:str,epochs:int):

    animals = [
    "antelope", "badger", "bat", "bear", "bee", "beetle", "bison", "boar", "butterfly",
    "cat", "caterpillar", "chimpanzee", "cockroach", "cow", "coyote", "crab", "crow", "deer",
    "dog", "dolphin", "donkey", "dragonfly", "duck", "eagle", "elephant", "flamingo", "fly",
    "fox", "goat", "goldfish", "goose", "gorilla", "grasshopper", "hamster", "hare", "hedgehog",
    "hippopotamus", "hornbill", "horse", "hummingbird", "hyena", "jellyfish", "kangaroo",
    "koala", "ladybugs", "leopard", "lion", "lizard", "lobster", "mosquito", "moth", "mouse",
    "octopus", "okapi", "orangutan", "otter", "owl", "ox", "oyster", "panda", "parrot",
    "pelecaniformes", "penguin", "pig", "pigeon", "porcupine", "possum", "raccoon", "rat",
    "reindeer", "rhinoceros", "sandpiper", "seahorse", "seal", "shark", "sheep", "snake",
    "sparrow", "squid", "squirrel", "starfish", "swan", "tiger", "turkey", "turtle", "whale",
    "wolf", "wombat", "woodpecker", "zebra"
    ]
    

    train_images, test_images, val_images = train_test_validate(original_images_directory, augmented_images_directory, animals)
    train_images, val_images = shuffle(train_images), shuffle(val_images)
    

    train_text_path = os.path.join(train_test_valid_directory, 'train.txt')
    test_text_path = os.path.join(train_test_valid_directory, 'test.txt')
    validate_text_path = os.path.join(train_test_valid_directory, 'validate.txt')
    save_text(train_text_path,train_images)
    save_text(test_text_path,test_images)
    save_text(validate_text_path,val_images)
    
    
    train_df = pd.DataFrame({
        'filename': train_images,
        'class': [os.path.basename(os.path.dirname(x)) for x in train_images] 
    })
    val_df = pd.DataFrame({
        'filename': val_images,
        'class': [os.path.basename(os.path.dirname(x)) for x in val_images]
    })

   
    train_datagen = ImageDataGenerator(rescale=1./255)
    validation_datagen = ImageDataGenerator(rescale=1./255)
    
   
    train_generator = train_datagen.flow_from_dataframe(
        dataframe=train_df,
        x_col='filename',
        y_col='class',
        target_size=(224, 224),
        batch_size=32,
        class_mode='categorical'  
    )

    validation_generator = validation_datagen.flow_from_dataframe(
        dataframe=val_df,
        x_col='filename',
        y_col='class',
        target_size=(224, 224),
        batch_size=32,
        class_mode='categorical' 
    )
    
  
    num_classes = train_df['class'].nunique()
    model = create_model((224, 224, 3), num_classes)
    plot_model(model, to_file='model_architecture.png', show_shapes=True, show_layer_names=True)
   
    history = compile_and_train(model, train_generator, validation_generator, epochs)
    plot_training_history(history)
   
    model.save('animal_classifier_model.h5')

   
    print(history.history['accuracy'])
    print(history.history['val_accuracy'])

In [7]:
if __name__ == "__main__":
    main(
        r'C:\Users\purin\Desktop\ImageClassifier-Animal\ImageClassifier-Animal\data\animal_dataset\animals\animals',
        r'C:\Users\purin\Desktop\ImageClassifier-Animal\ImageClassifier-Animal\data\augmented_images',
        r'C:\Users\purin\Desktop\ImageClassifier-Animal\ImageClassifier-Animal\data\train_test_validate',
        20
    )

Total images: 5400
Training set size: 3780
Validation set size: 540
Test set size: 1080
Found 3780 validated image filenames belonging to 90 classes.
Found 1080 validated image filenames belonging to 90 classes.



Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


  saving_api.save_model(


[0.026414087042212486, 0.08457843959331512, 0.22411952912807465, 0.48319104313850403, 0.7201173901557922, 0.8655282855033875, 0.9234258532524109, 0.9463713765144348, 0.9610459208488464, 0.9602454900741577]
[0.053977273404598236, 0.11079545319080353, 0.18655303120613098, 0.2651515007019043, 0.30397728085517883, 0.3087121248245239, 0.2954545319080353, 0.313446968793869, 0.30587121844291687, 0.3219696879386902]
