In [None]:
import numpy as np
import os
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import load_img, img_to_array
from keras.utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv2D, MaxPool2D, Flatten, Input
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
import kagglehub
try:
  ran == True

except:
  path = kagglehub.dataset_download("alessiocorrado99/animals10")

  print("Path to dataset files:", path)
  
  ran = True
else:
  print(path)

In [None]:
# add to path /raw-img/ with os module
root_dir = os.path.join(path, 'raw-img')

# Path to the root directory where your images are
# root_dir = '/root/raw-img/'

# Define image dimensions (resize all images to this size)
img_height, img_width = 128, 128 #32, 32

In [None]:
# Function to load and preprocess the images from the directory
def load_images_from_directory(directory):
    images = []
    labels = []
    label_map = {folder: i for i, folder in enumerate(os.listdir(directory)) if os.path.isdir(os.path.join(directory, folder))}  # Map animal class to integer labels
    for folder in os.listdir(directory):
        folder_path = os.path.join(directory, folder)
        if os.path.isdir(folder_path):
            for img_name in os.listdir(folder_path):
                img_path = os.path.join(folder_path, img_name)
                try:
                    img = load_img(img_path, target_size=(img_height, img_width))  # Resize to the target size
                    img = img_to_array(img)  # Convert to numpy array
                    images.append(img)
                    labels.append(label_map[folder])  # Use the class label for the folder
                except Exception as e:
                    print(f"Error loading image {img_path}: {e}")
    return np.array(images), np.array(labels)

In [None]:
# Load and preprocess all images
# X, y = load_images_from_directory(root_dir)

# Normalize the images to [0, 1]
# X = X.astype('float32') / 255

# One-hot encode the labels
# y = to_categorical(y, num_classes=10)

# # Split the data into training and testing sets (80% train, 20% test)
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# print(f"Training data shape: {X_train.shape}, Test data shape: {X_test.shape}")

In [None]:
# Data Augmentation for the training set
train_datagen = ImageDataGenerator(
    rescale=1./255,            # Normalize images to [0, 1]
    rotation_range=20,         # Random rotation
    width_shift_range=0.2,     # Random width shift
    height_shift_range=0.2,    # Random height shift
    shear_range=0.2,           # Shear transformation
    zoom_range=0.2,            # Zoom transformation
    horizontal_flip=True,      # Random horizontal flip
    fill_mode='nearest'        # Filling mode for newly created pixels
)
# No augmentation for the test set, only scaling
test_datagen = ImageDataGenerator(rescale=1./255)

In [None]:
# Create a training data generator that loads images from the directory
train_generator = train_datagen.flow_from_directory(
    directory=root_dir,               # Root directory containing the images
    target_size=(img_height, img_width),  # Resize images
    batch_size=128,                     # Number of images to process per batch
    class_mode='categorical',          # Use categorical labels
    subset='training',                 # Automatically divide into training and validation
    shuffle=True,
    validation_split=0.2       # Define the validation split (20% for validation)

)

# Create a validation data generator
test_generator = test_datagen.flow_from_directory(
    directory=root_dir,               # Root directory containing the images
    target_size=(img_height, img_width),  # Resize images
    batch_size=128,                     # Number of images to process per batch
    class_mode='categorical',          # Use categorical labels
    subset='validation',               # Automatically divide into training and validation
    shuffle=False,
    validation_split=0.2       # Define the validation split (20% for validation)

)

In [None]:
# # Fit the model using augmented data
# train_generator = train_datagen.flow(X_train, y_train, batch_size=128)
# test_generator = test_datagen.flow(X_test, y_test, batch_size=128)

# Build the CNN model (same as before)
model = Sequential()
model.add(Input(shape=(img_height, img_width, 3)))
model.add(Conv2D(50, kernel_size=(3, 3), activation='relu'))
model.add(Conv2D(75, kernel_size=(3, 3), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(125, kernel_size=(3, 3), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(500, activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(250, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(10, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss',  # Track validation loss (you can also use 'val_accuracy')
                               patience=3,           # Number of epochs to wait for improvement
                               restore_best_weights=True)  # Restore the best weights after stopping


In [None]:
# class weights for the model fit
from sklearn.utils.class_weight import compute_class_weight

class_weights = compute_class_weight(
                'balanced', 
                np.unique(train_generator.classes), 
                train_generator.classes)

class_weight_dict = dict(enumerate(class_weights))

In [None]:
# Train the model
history = model.fit(
    train_generator,
    verbose = 1, 
    epochs=10, 
    validation_data=test_generator,
    callbacks=[early_stopping]
    )
# Check if early stopping was triggered by checking the history
print("Training stopped at epoch:", len(history.history['loss']))

In [None]:
# save model
model.save("model.h5")

# Evaluate the model on the test set
loss, accuracy = model.evaluate(test_generator)
print(f"Test accuracy: {accuracy}")

