<a href="https://colab.research.google.com/github/rhumtea/trainModelML/blob/main/phong_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

1. Introduction:
- This project designs and train the Deep Learnign model.
- I choose the Natural Inmages with 8 classes from Kaggle.
- Link: https://www.kaggle.com/datasets/prasunroy/natural-images

In [1]:
import os
import zipfile
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import kagglehub

# Download the dataset from Kaggle and save in Google Colab Files
!kaggle datasets download -d prasunroy/natural-images

Dataset URL: https://www.kaggle.com/datasets/prasunroy/natural-images
License(s): CC-BY-NC-SA-4.0
Downloading natural-images.zip to /content
 99% 340M/342M [00:04<00:00, 120MB/s]
100% 342M/342M [00:04<00:00, 85.3MB/s]


In [2]:
# Unzip the natural-images.zip
with zipfile.ZipFile('natural-images.zip', 'r') as zip_ref:
  zip_ref.extractall('')

# Define path for original dataset
data_dir = 'natural_images'

In [3]:
import shutil
import random

# Define path for split data
base_dir = 'data_split'

# Define path for train, validation and test directory
train_dir = os.path.join(base_dir, 'train')
val_dir = os.path.join(base_dir, 'val')
test_dir = os.path.join(base_dir, 'test')

# Create directories
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

In [4]:
# Split ratio
train_ratio = 0.7
val_ratio = 0.15
test_ratio = 0.15

In [5]:
# Split data
categories = os.listdir(data_dir)

for category in categories:
  category_path = os.path.join(data_dir, category)

  if not os.path.isdir(category_path): continue

  # Create subdirectories for each class in train and val files
  os.makedirs(os.path.join(train_dir, category), exist_ok=True)
  os.makedirs(os.path.join(val_dir, category), exist_ok=True)

  # Get all images in the category to split
  images = os.listdir(category_path)
  random.shuffle(images)

  # Find indices of image to split
  total_images = len(images)
  train_images_index = int(train_ratio * total_images)
  val_images_index = int((train_ratio + val_ratio) * total_images)

  # Split data by indices
  train_images = images[:train_images_index]
  val_images = images[train_images_index:val_images_index]
  test_images = images[val_images_index:]

  # Move images to train and val directories with folders as original file:
  for image in train_images:
    shutil.copy(os.path.join(category_path, image), os.path.join(train_dir, category))

  for image in val_images:
    shutil.copy(os.path.join(category_path, image), os.path.join(val_dir, category))

  # Move all images to test directory:
  for image in test_images:
    shutil.copy(os.path.join(category_path, image), test_dir)

In [6]:
# Define train generator which includes augmentation to expand the dataset
# and make the model more roburst
train_datagen = ImageDataGenerator(
    rescale=1.0 / 255,
    rotation_range = 30,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    shear_range=0.2,
    zoom_range = 0.2,
    horizontal_flip = True,
)

In [7]:
# Define validation and test generators with normalization
val_datagen = ImageDataGenerator(rescale=1.0/255)
test_datagen = ImageDataGenerator(rescale=1.0/255)

In [8]:
# Set image height, width for resizing image from th original size (uniformity)
# Set batch_size for deciding how many images per batch during train and validation
img_height, img_width = 224, 224
batch_size = 32

In [9]:
# Training generator
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size = (img_height, img_width),
    batch_size = batch_size,
    class_mode = 'categorical' # because of one-hot encode labels
)

# Validation generator
val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size = (img_height, img_width),
    batch_size = batch_size,
    class_mode = 'categorical'
)

Found 4826 images belonging to 8 classes.
Found 1034 images belonging to 8 classes.


In [10]:
# Test generator should be unlabeled.
import pandas as pd

# Make a list of all test images
test_images = [os.path.join(test_dir, fname) for fname in os.listdir(test_dir)]
print(len(test_images))

# Convert test_images into DataFrame - contains file paths
test_df = pd.DataFrame({'testimages' : test_images})

# Test generator
test_generator = test_datagen.flow_from_dataframe(
    dataframe = test_df,
    x_col = 'testimages',
    y_col = None,
    target_size = (img_height, img_width),
    batch_size = batch_size,
    class_mode = None,
    shuffle = False,
)


1039
Found 1039 validated image filenames.


2. Check before create Model

In [11]:
# Get the number of classes
num_classes = len(train_generator.class_indices)
print(num_classes)
class_names = [item for item in train_generator.class_indices]
print(class_names)

8
['airplane', 'car', 'cat', 'dog', 'flower', 'fruit', 'motorbike', 'person']


3. Pre-Trained ResNet50 Model

In [12]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Load  the Pre-Trained ResNet50 model
pretrained_model = ResNet50(weights='imagenet', pooling='avg', include_top=False, input_shape=(img_height, img_width, 3))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [13]:
# Unfreeze the pretrained model
pretrained_model.trainable = True

# Freeze first 140 layers of the pretrained model
for layer in pretrained_model.layers[:140]:
    layer.trainable = False

4. Model for natural images

In [15]:
# Add custom layer for natural image dataset
from tensorflow.keras.layers import BatchNormalization

model = Sequential([
    pretrained_model,
    Flatten(),
    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

In [None]:
model.summary()

In [16]:
# Compile the model
model.compile(
    optimizer = Adam(learning_rate=0.0001),
    loss='categorical_crossentropy',
    metrics = ['accuracy']
)

In [None]:
# Train the model
from tensorflow.keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)

history = model.fit(
    train_generator,
    validation_data = val_generator,
    epochs=50,
    callbacks=[early_stopping]
)

Epoch 1/50


  self._warn_if_super_not_called()


[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1643s[0m 11s/step - accuracy: 0.4590 - loss: 1.6219 - val_accuracy: 0.4613 - val_loss: 1.6744
Epoch 2/50
[1m 12/151[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m21:53[0m 9s/step - accuracy: 0.6184 - loss: 1.2105 

-- Save the Model --


In [None]:
# Save and Reuse Model
model.save('resnet50_model.keras')

-- Load the Model --

In [None]:
from tensorflow.keras.models import load_model

model = load_model('resnet50_model.keras')

5. Evaluate Model

In [None]:
# Evaluate the model
loss, accuracy = model.evaluate(test_generator)
print(f"Validation Accuracy: {accuracy * 100:.2f}%")