In [None]:
# Import dependencies
import pandas as pd
import matplotlib.pyplot as plt
import sklearn as skl
import tensorflow as tf
import os
import cv2
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense,Conv2D,MaxPooling2D,Flatten,Dropout, BatchNormalization, Rescaling
from tensorflow.keras.models import Model
from tensorflow.keras.layers import RandomFlip, RandomRotation, RandomZoom
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.layers import Input, Average

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Define base directory
base_dir = '/content/drive/MyDrive/BOOTCAMP/ColabNotebooks/ProjectWithGreg/Data'

# Verify each directory level
drive_dir = '/content/drive/MyDrive'
bootcamp_dir = os.path.join(drive_dir, 'BOOTCAMP')
colab_notebooks_dir = os.path.join(bootcamp_dir, 'ColabNotebooks')
project_dir = os.path.join(colab_notebooks_dir, 'ProjectWithGreg')
data_dir = os.path.join(project_dir, 'Data')

Mounted at /content/drive


In [None]:
# Use os.path.join() to concatenate base_dir and 'train', set train_dir to 'Data/train'
# os.path.join(base_dir, 'train') takes base_dir, which = 'Data/', and concatenates it with string 'train'
train_dir = os.path.join(base_dir, 'train')

# Concatenate base_dir and 'test', set test_dir to 'Data/test'
# os.path.join(base_dir, 'test') takes base_dir, which = 'Data/', and concatenates it with string 'test'
test_dir = os.path.join(base_dir, 'test')

# Concatenate base_dir and 'valid', set valid_dir to 'Data/valid'
# os.path.join(base_dir, 'valid') takes base_dir, which = 'Data/', and concatenates it with string 'valid'
valid_dir = os.path.join(base_dir, 'valid')

#Read contents of base_dir directory and return list of names of entries (files and directories) in it
os.listdir(base_dir)

['valid',
 'test',
 'train',
 'saved_model',
 'best_model_resnet.keras',
 'best_chained_model_resnet.keras']

In [None]:
# tf.keras.preprocessing.image_dataset_from_directory function generates tf.data.Dataset from image files in directory
# convenient way to load image data for training, validation, or testing in format that's easy to work with TensorFlow models

#FOR CHAINED MODEL, RESIZE IMAGES to 224, 224, 3 (what ResNet based model expects)

training_set = tf.keras.preprocessing.image_dataset_from_directory(     # image_dataset_from_directory method: images automatically labeled based on subdirectory names
                                                                        # each subdirectory treated as a class and labels assigned as integers starting from 0

train_dir,                  # Purpose: This is directory path where training images are stored
                            # Structure: should contain subdirectories, each representing different class; name of each subdirectory will be used as class label for images within it
seed=101,
image_size=(224, 224),
batch_size=32,
label_mode='int'           # to work with sparse labels, use 'int' as value for label_mode parameter
    )


testing_set = tf.keras.preprocessing.image_dataset_from_directory(
test_dir,                   # Purpose: This is directory path where test images are stored
                            # Structure: should contain subdirectories, each representing different class; name of each subdirectory will be used as class label for images within it
seed=101,
image_size=(224, 224),
batch_size=32,
label_mode='int'           # to work with sparse labels, use 'int' as value for label_mode parameter
    )


validation_set = tf.keras.preprocessing.image_dataset_from_directory(
valid_dir,                  # Purpose: This is directory path where valid images are stored
                            # Structure: should contain subdirectories, each representing different class; name of each subdirectory will be used as class label for images within it
seed=101,
image_size=(224, 224),
batch_size=32,
label_mode='int'           # to work with sparse labels, use 'int' as value for label_mode parameter
    )


Found 613 files belonging to 4 classes.
Found 315 files belonging to 4 classes.
Found 72 files belonging to 4 classes.


In [None]:
#Alterations necessary for chaining both models
#(1) Resize images to 224, 224, 3 (what ResNet based model expects)
#(2) Remove input_shape from Data Augmentation definition
#(3) Remove all MaxPooling2D and Flatten layers from custom model
#(4) Remove data augmentation and rescaling from custom_cnn model and move them to complete (chained) model
#(5) Add BatchNormalization layer right after base model/at start of custom model to normalize 1D vector output before its fed into dense layers of custom model
#(6) Remove extra Dropout layers
#(7) Add input layer as first layer of chained model, specify input_shape there
#(8) Remove BatchNormalization layer from within custom_cnn
#(9) DON'T Insert ConverToTensorLayer between ResNet50 and custom_cnn, otherwise model can't be evaluated

In [None]:
#Specify img_size, channels, img_shape, and class_count before defining model and data pipeline
img_size = (224, 224)       #Resize to 224x224
channels = 3
img_shape = (img_size[0], img_size[1], channels)
class_count = len(training_set.class_names)

In [None]:
# Define ResNet50 base model

base_model = ResNet50(
    include_top=False,            # remove top classification layer of ResNet50 because data still needs to go through chained model before classifications made
    weights="imagenet",           # use weights pre-trained on ImageNet dataset
    input_shape=img_shape,        # input_shape=img_shape sets input shape for model
    pooling='max')                # Global Max Pooling gives compact feature vector of shape (None, 2048) suitable for dense layers
                                  # Without Global Max Pooling: get 4D tensor with spatial dimensions (None, height, width, 2048); requires additional processing to connect to dense layers

for layer in base_model.layers:
    layer.trainable = False

# ResNet50 with include_top=False and pooling='max' produces 2D vector with shape =(None, 2048)
# pooling='max' argument ensures output is 2D vector with 2048 features, where individual sample is 1D vector with 2048 features. compatible with dense layers directly


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [None]:
# Define data augmentation
data_augmentation = Sequential([
    RandomFlip("horizontal"),
    ##input_shape=(224, 224, 3)),    #Omit input_shape here, specify it instead in input layer of chained model
    RandomRotation(0.2),
    RandomZoom(0.2),
])

 Defining custom_CNN model  
 MaxPooling2D Layers typically used to reduce spatial dimensions (height, width) of feature maps while retaining important information  
If using ResNet50 with pooling='max', output is already 1D vector (features extracted from entire image)  
 additional MaxPooling2D layers unnecessary since feature extraction has already been done  
 Flatten Layer used to convert 2D feature map into 1D vector, which is required to connect to dense layers  
When using pre-trained model with pooling='max', output is already 1D vector, so Flatten layer not needed  

In [None]:
# Define custom_CNN to be integrated with ResNet50

custom_cnn = Sequential([
    #Conv2D(filters=32, kernel_size=(3, 3), activation='relu'),

    #MaxPooling2D(pool_size=(2, 2)),          #When using pooling='max', ResNet50 outputs 1D vector with shape (batch_size, 2048), where 2048 is features
                                              #extracted by model; need to flatten output if to add dense layers directly (but we're not adding dense layers yet)
                                              #If you add additional layers, directly connect to this 1D vector --- NOT TRUE: must convert list to tensors first

    #Conv2D(filters=32, kernel_size=(3, 3), activation='relu'),
    #MaxPooling2D(pool_size=(2, 2)),
    #BatchNormalization(),                    #move BatchNormalization layer to before start of custom_cnn or else model can't be evaluated
    #Dropout(0.25),
    #Conv2D(filters=64, kernel_size=(3, 3), activation='relu'),
    #MaxPooling2D(pool_size=(2, 2)),
    #Dropout(0.25),
    #Flatten(),                               #Flatten layer only necessary if base model output is 2D feature map (grid of features); would then convert into 1D vector

    Dense(128, activation='relu'),            #Dense layer expects 2D tensor where each row represents single sample with its features
                                              #layer has 128 units (neurons), uses ReLU activation function;
                                              #operates on 2D tensors where each row (sample) has fixed number of features
    Dropout(0.25),                            #dropout layer prevents overfitting by randomly setting fraction (25%) of input units to 0 during training
    Dense(class_count, activation='softmax')  # Output layer with number of classes
])

#class_count variable defines number of output classes for model; sets number of units in final Dense layer of model


tf.keras.Sequential([...]):  
Usage: This method is used when you want to refer to Sequential through the TensorFlow library directly.  

import tensorflow as tf

model = tf.keras.Sequential([  
    tf.keras.layers.Dense(64, activation='relu'),  
    tf.keras.layers.Dense(10, activation='softmax')  
])  

Sequential([...]):  
Usage: This method is used when you've specifically imported the Sequential class from tensorflow.keras.models.  

from tensorflow.keras.models import Sequential  
from tensorflow.keras.layers import Dense  

model = Sequential([  
    Dense(64, activation='relu'),  
    Dense(10, activation='softmax')  
])  

Without pooling: You get a 4D tensor (None, 7, 7, 2048)  
With global pooling: You get a 2D tensor (None, 2048)

In [None]:
#CHAIN PRE-TRAINED MODEL AND CUSTOM MODEL
# Create chained model by chaining ResNet50 with custom_cnn
# Notes:
  # Add an input shape before the data augmentation layer. For some reason not having this layer caused an error.
  # Data augmentation should be applied before rescaling, but both should be only in chained model (remove from custom)
  # custom_cnn model expectation: layers added after base model must correctly handle 1D vector
  # therefore, adding layers like Dense, Dropout, and BatchNormalization is appropriate
  # You do not need Conv2D or Flatten layers after base model; they're for 2D feature maps, not 1D vectors
  # Do not need to put BatchNormalization layer in between base model and custom_cnn

model = Sequential([
    Input(shape=img_shape),   # Add Input layer as first layer, specify shape here; training threw errors without Input Layer
                              # including Input layer can be beneficial for clarity and ensuring compatibility
    data_augmentation,        # Apply data augmentation to chained model before rescaling
    Rescaling(1./255),        # Apply rescaling to chained model after augmentation
    base_model,               # Base model ResNet50, outputs 2D vector with shape =(None, 2048)
    custom_cnn                # Custom CNN on top of base model; it's first layer is Dense and can accept base_model output
])

#Compile model
optimizer = Adam()
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

#TensorFlow’s SavedModel format doesn’t use specific file extension; creates directory containing multiple files and subdirectories
#This format is TensorFlow’s default and is designed for better compatibility and integration with TensorFlow Serving

# Define filepath to save best model
# to save model to specific directory, provide complete absolute path
#Define base directory path if necessary
base_dir = '/content/drive/MyDrive/BOOTCAMP/ColabNotebooks/ProjectWithGreg/Data'

# Create base directory if it doesn't exist
if not os.path.exists(base_dir):
    os.makedirs(base_dir)

# Define full file path including base directory
filepath = os.path.join(base_dir, 'best_chained_model_resnet.keras')

model.save(filepath)  # Saves in specified directory

# Create ModelCheckpoint callback to save best model based on validation accuracy
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')


In [None]:
#Check Output Shape of Each Layer
#Use model.summary() function to display output shapes of each layer after model is built; helps identify mismatches in shape

model.summary()


The output shape (None, 2048) from the ResNet50 layer is a 2D tensor.  

The first dimension None represents the batch size, which is dynamic and can vary.   
The second dimension 2048 is the size of the feature vector output by the ResNet50 model after the global pooling layer (pooling='max' in this case).  
This output is a 2D tensor with shape (batch_size, 2048)  

The shapes (None, 2048) and (2048,) might seem different, but they are actually compatible due to how dimensions are interpreted in the context of neural network layers.  

Tensor Shapes in Neural Networks  
(None, 2048): This shape represents a 2D tensor:  
None: This dimension is the batch size, which can vary and is handled dynamically during training and inference.  
2048: This is the feature dimension or the number of features for each sample in the batch.  
(2048,): This shape represents a 1D tensor:  
2048: This is the feature dimension or the number of features for each individual sample.  

Compatibility  
In neural networks, the shape (None, 2048) means that each sample in a batch has 2048 features. When a layer expects an input shape of (2048,), it is designed to handle each individual sample's feature vector independently of the batch size.

Here's why they are compatible:  

Batch Dimension Handling: The first dimension, None, is flexible and can accommodate any batch size. This flexibility is managed internally by the neural network framework, so the model doesn’t need to know the batch size in advance.  

Feature Dimension Matching: The 2048 feature dimension in (None, 2048) matches exactly with the 2048 feature dimension expected by the layer with input_shape=(2048,). The 2048 feature dimension in the layer's input_shape specifies the number of features for each individual sample, not considering the batch size.  

How They Work Together  
Layer Input Handling: When a neural network layer receives an input tensor with shape (None, 2048), it processes each sample’s feature vector (with shape (2048,)) independently, and it handles the batch dimension automatically.  

Sequential Layers: In a Sequential model, you set input_shape=(2048,) for the first layer. This tells the model that each sample in the batch has 2048 features. The batch dimension is implicitly handled, so the model correctly processes tensors of shape (None, 2048).  

Batch Dimension: The batch size (None) is managed by the framework.    
Feature Dimension: The 2048 feature dimension aligns between the output of ResNet50 and the input expectation of custom_cnn.    
Thus, these shapes are compatible as they specify the same feature vector size for each sample, and the batch dimension is handled separately.    

In [None]:
# Train chained model with added callback
history = model.fit(
    x=training_set,
    epochs=100,
    verbose=1,
    validation_data=validation_set,
    callbacks=[checkpoint]
)

Epoch 1/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6s/step - accuracy: 0.3415 - loss: 2.4409
Epoch 1: val_accuracy improved from -inf to 0.48611, saving model to /content/drive/MyDrive/BOOTCAMP/ColabNotebooks/ProjectWithGreg/Data/best_chained_model_resnet.keras
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m149s[0m 7s/step - accuracy: 0.3434 - loss: 2.4202 - val_accuracy: 0.4861 - val_loss: 1.4885
Epoch 2/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6s/step - accuracy: 0.4972 - loss: 1.3830
Epoch 2: val_accuracy did not improve from 0.48611
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m195s[0m 7s/step - accuracy: 0.4970 - loss: 1.3773 - val_accuracy: 0.4722 - val_loss: 1.0158
Epoch 3/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6s/step - accuracy: 0.4690 - loss: 1.0330
Epoch 3: val_accuracy did not improve from 0.48611
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m133s[0m 6s/step -

If KeyboardInterrupt halts training before reaching number of specified epochs, reload saved model, recompile model, and continue training model.

If training does not reach 100 epochs but val accuracy did not improve from highest value, move on to evaluating the model.

In [None]:
#Prepare to load saved model

# Import necessary libraries
import numpy as np
import tensorflow as tf
import os
from tensorflow.keras.models import load_model

#Mount Google Drive:
from google.colab import drive
drive.mount('/content/drive')

# Define base directory
base_dir = '/content/drive/MyDrive/BOOTCAMP/ColabNotebooks/ProjectWithGreg/Data'

# List contents of directory
#!ls -l {directory_path}
!ls -l {base_dir}

# Verify each directory level
drive_dir = '/content/drive/MyDrive'
bootcamp_dir = os.path.join(drive_dir, 'BOOTCAMP')
colab_notebooks_dir = os.path.join(bootcamp_dir, 'ColabNotebooks')
project_dir = os.path.join(colab_notebooks_dir, 'ProjectWithGreg')
data_dir = os.path.join(project_dir, 'Data')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
total 189718
-rw------- 1 root root 98159789 Aug 13 03:56 best_chained_model_resnet.keras
-rw------- 1 root root 96094261 Aug  8 23:27 best_model_resnet.keras
drwx------ 2 root root     4096 Jul 31 17:32 saved_model
drwx------ 2 root root     4096 Jul 15 23:58 test
drwx------ 2 root root     4096 Jul 15 23:58 train
drwx------ 2 root root     4096 Jul 15 23:58 valid


In [None]:
# Specify correct path to model
model_path = os.path.join(base_dir, 'best_chained_model_resnet.keras')

#Load saved model
model = load_model(model_path)

In [None]:
# *** Skip this cell if not continuing to train model ***

#If you tracked number of epochs completed previously (e.g., from logs or previous training history), you can calculate how many more epochs
#are needed and specify that number

# Suppose you trained for 38 epochs previously and want to train for 12 more
previous_epochs = 38
additional_epochs = 12

# Continue training model
history = model.fit(
    x=training_set,
    epochs=previous_epochs + additional_epochs,
    initial_epoch=previous_epochs,  # resume training from where you left off
    verbose=1,
    validation_data=validation_set,
    callbacks=[checkpoint]
)

NameError: name 'training_set' is not defined

In [None]:
#Run this cell to redefine train_dir, test_dir, valid_dir

# Use os.path.join() to concatenate base_dir and 'train', set train_dir to 'Data/train'
# os.path.join(base_dir, 'train') takes base_dir, which = 'Data/', and concatenates it with string 'train'
train_dir = os.path.join(base_dir, 'train')

# Concatenate base_dir and 'test', set test_dir to 'Data/test'
# os.path.join(base_dir, 'test') takes base_dir, which = 'Data/', and concatenates it with string 'test'
test_dir = os.path.join(base_dir, 'test')

# Concatenate base_dir and 'valid', set valid_dir to 'Data/valid'
# os.path.join(base_dir, 'valid') takes base_dir, which = 'Data/', and concatenates it with string 'valid'
valid_dir = os.path.join(base_dir, 'valid')

#Read contents of base_dir directory and return list of names of entries (files and directories) in it
os.listdir(base_dir)

['valid',
 'test',
 'train',
 'saved_model',
 'best_model_resnet.keras',
 'best_chained_model_resnet.keras']

In [None]:
#Run this cell to redefine training_set, testing_set, validation_set

# tf.keras.preprocessing.image_dataset_from_directory function generates tf.data.Dataset from image files in directory
# convenient way to load image data for training, validation, or testing in format that's easy to work with TensorFlow models

#FOR CHAINED MODEL, RESIZE IMAGES to 224, 224, 3 (what ResNet based model expects)

training_set = tf.keras.preprocessing.image_dataset_from_directory(     # image_dataset_from_directory method: images automatically labeled based on subdirectory names
                                                                        # each subdirectory treated as a class and labels assigned as integers starting from 0

train_dir,                  # Purpose: This is directory path where training images are stored
                            # Structure: should contain subdirectories, each representing different class; name of each subdirectory will be used as class label for images within it
seed=101,
image_size=(224, 224),
batch_size=32,
label_mode='int'           # to work with sparse labels, use 'int' as value for label_mode parameter
    )


testing_set = tf.keras.preprocessing.image_dataset_from_directory(
test_dir,                   # Purpose: This is directory path where test images are stored
                            # Structure: should contain subdirectories, each representing different class; name of each subdirectory will be used as class label for images within it
seed=101,
image_size=(224, 224),
batch_size=32,
label_mode='int'           # to work with sparse labels, use 'int' as value for label_mode parameter
    )


validation_set = tf.keras.preprocessing.image_dataset_from_directory(
valid_dir,                  # Purpose: This is directory path where valid images are stored
                            # Structure: should contain subdirectories, each representing different class; name of each subdirectory will be used as class label for images within it
seed=101,
image_size=(224, 224),
batch_size=32,
label_mode='int'           # to work with sparse labels, use 'int' as value for label_mode parameter
    )

Found 613 files belonging to 4 classes.
Found 315 files belonging to 4 classes.
Found 72 files belonging to 4 classes.


In [None]:
#Run this cell to evaluate model on all three data sets

train_score = model.evaluate(training_set, verbose=1)
valid_score = model.evaluate(validation_set, verbose=1)
test_score = model.evaluate(testing_set, verbose=1)

# Print evaluation results
print("Train Loss: ", train_score[0])
print("Train Accuracy: ", train_score[1])
print('-' * 20)
print("Validation Loss: ", valid_score[0])
print("Validation Accuracy: ", valid_score[1])
print('-' * 20)
print("Test Loss: ", test_score[0])
print("Test Accuracy: ", test_score[1])

[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 7s/step - accuracy: 0.6250 - loss: 0.8627
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 4s/step - accuracy: 0.5030 - loss: 0.9682
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m73s[0m 7s/step - accuracy: 0.5497 - loss: 0.9249
Train Loss:  0.8534024357795715
Train Accuracy:  0.6329526901245117
--------------------
Validation Loss:  0.9649490714073181
Validation Accuracy:  0.5138888955116272
--------------------
Test Loss:  0.9231241941452026
Test Accuracy:  0.5333333611488342


In [None]:
# *** Skip this cell if not continuing to train model ***

from tensorflow.keras.optimizers import Adam

# Recompile model with same optimizer and loss function
optimizer = Adam()  # or whatever optimizer you were using
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Continue training model
history = model.fit(
    x=training_set,             # your training data
    epochs=remaining_epochs,    # the number of epochs you want to continue training for
    verbose=1,                  # or adjust as per your preference
    validation_data=validation_set,  # your validation data
    callbacks=[checkpoint]      # use the same ModelCheckpoint callback to continue saving the best model
)

You don't necessarily need to use test_steps unless your dataset is a generator or if you are working with very large datasets that don't fit entirely in memory.  

train_score = model.evaluate(training_set, steps=test_steps, verbose=1)  
valid_score = model.evaluate(validation_set, steps=test_steps, verbose=1)  
test_score = model.evaluate(testing_set, steps=test_steps, verbose=1)  

---
If you're using a generator or a custom data pipeline, you might want to calculate test_steps.

In the context of training or evaluating a machine learning model, a step refers to a single batch of data being processed by the model. When you specify a step limit or simply steps, you're defining the number of batches the model should process in a single epoch during training or during evaluation.   

Steps per Epoch: During training, steps per epoch is the number of batches the model processes before declaring one epoch complete. If your dataset has N samples and your batch size is B, then the steps per epoch would typically be N/B.  

Steps During Evaluation: When evaluating a model, steps determine how many batches of the data will be processed to compute the evaluation metrics like loss and accuracy. For instance, if you have a dataset with 1,000 samples and a batch size of 50, you would typically set the number of steps to 20 (1000/50 = 20).  
  
Step Limit:A step limit is essentially a maximum number of steps you allow the model to process in a single epoch or during evaluation. If you have a large dataset and you want to limit the number of batches processed per epoch or evaluation run, you can set a step limit.  
For example, even if your dataset would require 100 steps to cover all the data, you might set a step limit of 50, meaning only half of the data is processed in each epoch.  

When is Step Limit Used?  
Large Datasets: When datasets are large and you want to avoid processing the entire dataset in one go, either to save time or to avoid overfitting.  

Control Over Training: It allows for more control over training or evaluation processes, especially when the computation time or resources are limited.  

Custom Evaluation: In scenarios where you might want to evaluate a model on a subset of the data rather than the entire dataset.    
In your code, the concept of a step limit isn’t explicitly mentioned. However, test_steps is calculated based on the total number of samples and the batch size, and this effectively controls how much of the dataset is evaluated in each evaluation run. The test_batch_size calculation is done in such a way that the number of steps remains manageable and doesn't exceed a certain threshold (implicitly ensuring the steps don’t become too large).

In [None]:
#ts_length represents total number of samples in testing set
#ts_length used to determine batch size and number of steps needed for evaluation
#ts_length used to ensure evaluation of model is done efficiently, with optimal batch size and correct number of steps, so entire testing set is processed without exceeding step limit

# calculate ts_length by multiplying length of testing_set (len(testing_set)) by batch size and store calculation in variable 'ts_length'
##ts_length = len(testing_set) * 32                 # Adjust based on actual batch size

# ts_length used to:

#1. Calculate Batch Size, (test_batch_size): helps in dynamically calculating appropriate batch size for evaluating model on dataset
#test_batch_size: calculates batch size by finding largest factor of ts_length that results in batch size where number of steps <= 80
#this ensures batch size chosen in such a way that evaluation process can be done efficiently without exceeding step limit

#2. Calculate Evaluation Steps (test_steps): crucial for determining number of steps (batches) required to cover entire dataset during evaluation
#test_steps: number of steps calculated by dividing ts_length by test_batch_size
#determines how many batches will be processed during evaluation of dataset

# Determine batch size and steps for evaluation

#calculates optimal test_batch_size for processing a time series data (ts_length)
#tailored to process sequences or time series where ts_length represents length of sequence
##test_batch_size = max(sorted([ts_length // n for n in range(1, ts_length + 1) if ts_length % n == 0 and ts_length / n <= 80]))

##test_steps = ts_length // test_batch_size

In [None]:
#If you did not use a data image generator to create the training, testing, and validation sets of data, and you have the entire datasets loaded into memory,
#it is not necessary to use generators for evaluation. Using generators is beneficial when dealing with large datasets that cannot fit into memory all at once,
#as generators allow you to load data in batches during training and evaluation. When you have the entire dataset in memory, you can directly evaluate the model
#on the full dataset without the need for generators. Therefore, if you have already loaded the training, testing, and validation data into memory and do not need
#to process the data in batches, you can use the simpler method for evaluating the model (rather than the one below)

#ts_length = len(test_df)
#test_batch_size = test_batch_size = max(sorted([ts_length // n for n in range(1, ts_length + 1) if ts_length%n == 0 and ts_length/n <= 80]))
#test_steps = ts_length // test_batch_size
#train_score = model.evaluate(train_gen, steps= test_steps, verbose= 1)
#valid_score = model.evaluate(valid_gen, steps= test_steps, verbose= 1)
#test_score = model.evaluate(test_gen, steps= test_steps, verbose= 1)

#print("Train Loss: ", train_score[0])
#print("Train Accuracy: ", train_score[1])
#print('-' * 20)
#print("Validation Loss: ", valid_score[0])
#print("Validation Accuracy: ", valid_score[1])
#print('-' * 20)
#print("Test Loss: ", test_score[0])
#print("Test Accuracy: ", test_score[1])

SyntaxError: invalid syntax (<ipython-input-15-cfd9e07a233d>, line 3)

Evaluate Model on training, testing, and validation sets.

Without test_steps (Recommended if using tf.data.Dataset):  
train_score = model.evaluate(training_set, verbose=1)  
valid_score = model.evaluate(validation_set, verbose=1)  
test_score = model.evaluate(testing_set, verbose=1)  
model.evaluate(): This method will automatically go through the entire dataset, as long as training_set, validation_set, and testing_set are properly batched.

training_set, validation_set, testing_set are tf.data.Dataset objects that yield batches of images and labels for evaluation


Data augmentation and rescaling typically occur before the data is fed into the base_model or any other parts of the neural network.  
  
Data Augmentation and Rescaling: These preprocessing steps are essential to ensure that the input data is properly formatted and normalized before it reaches the model. Augmentation helps in generalizing the model by artificially increasing the diversity of the training data, and rescaling normalizes the pixel values, which helps in stabilizing and speeding up the training process.
  
Location in the Pipeline: These steps are applied to the images as they are being loaded and processed, which means they happen before the images are passed to the base_model. The purpose is to ensure that the images are in the right format and scale when they enter the model.  
  
Illustration of process:  
  
Image Loading: Images are loaded from the directory.  
Data Augmentation and Rescaling: Applied to the images as they are being loaded.  
Model Processing: The preprocessed images are then fed into the base_model and subsequently through the custom layers added on top.  

In a model where you are chaining a custom architecture with a pre-trained model like ResNet50, you need to place the data augmentation and rescaling layers appropriately. Here’s how you should approach it:

Steps to Chain a Custom Model with ResNet50
Data Augmentation: Data augmentation should be applied to the input images before they are fed into the base model (ResNet50). This preprocessing helps to increase the diversity of the training data by applying transformations like rotations, flips, etc.

Rescaling: Rescaling should be applied after data augmentation. This is because the Rescaling layer adjusts the pixel values to the appropriate range expected by the model. For ResNet50, this is usually between 0 and 1.

UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  warning indicates that you should use an Input layer as the first layer in your Sequential model, rather than specifying input_shape directly in the first layer. This is the preferred method in TensorFlow/Keras when building Sequential models

  You specified input_shape directly in the RandomFlip layer, which is part of the data augmentation process. This can lead to the warning you received. To address this, you should use an Input layer as the first layer in your Sequential model to define the input shape, and remove the input_shape argument from other layers.

The error you are encountering, AttributeError: 'list' object has no attribute 'shape', is likely due to the way the dataset is being passed to the model for evaluation. The BatchNormalization layer expects a tensor as input, but it seems that a list is being passed instead.

In the context of evaluating the model, the issue arises because the BatchNormalization layer expects a tensor as input, but it is receiving a list instead. During training, the model may be able to handle the list output from ResNet without issues because the training process involves backpropagation and gradient descent, which can accommodate certain data formats.

However, during evaluation, the model is expected to make predictions on new data without updating its weights. This prediction process requires the data to be in the correct tensor format for the layers to process the information correctly.

When the BatchNormalization layer encounters a list instead of a tensor during evaluation, it raises an error because it cannot process the data in that format. This discrepancy in data format between training and evaluation can lead to errors specifically related to the BatchNormalization layer, which requires tensor inputs to perform normalization operations.

Therefore, to ensure successful evaluation of the model, it is crucial to pass the evaluation datasets in the correct tensor format, allowing each layer, including BatchNormalization, to process the data appropriately and make predictions accurately.

BatchNormalization is a layer in neural networks that normalizes the inputs to a layer for each mini-batch. This helps in stabilizing the learning process and dramatically reduces the number of training epochs required to train deep networks.  
  
What Batch Normalization Does:  
Normalization: It normalizes the output of the previous activation layer by subtracting the batch mean and dividing by the batch standard deviation. This centers the data around zero with a standard deviation of one.

Scale and Shift: After normalization, it applies a scaling factor (gamma) and a shift factor (beta), which allows the layer to learn the optimal scale and mean of the inputs. This ensures the layer can represent the identity transformation if necessary.  
  
Why Use Batch Normalization:  
Speed Up Training: By normalizing the inputs, it helps in stabilizing the learning process. This often allows for higher learning rates, leading to faster convergence.  
  
Regularization: It has a slight regularizing effect, reducing the need for other forms of regularization like dropout. This happens because the mini-batch statistics add some noise to each training step.  
  
Reduce Internal Covariate Shift: It mitigates the problem of internal covariate shift, where the distribution of each layer's inputs changes during training. By maintaining the inputs to each layer with a more consistent distribution, it simplifies and accelerates training.  
  
Gradient Propagation: It helps in maintaining gradients in a range that prevents them from vanishing or exploding, making it easier to train deeper networks.  
  
Example of Batch Normalization in the Model:
In your case, BatchNormalization is used after the output of the base model and before the dense layers. This helps in ensuring that the inputs to the dense layers are normalized, improving the training process.










The error message you are encountering indicates that there is an issue with the type of data being passed to the model.evaluate() method. The model.evaluate() method typically expects tensors or numpy arrays as input data, but in this case, it seems like you are passing a dataset object (_PrefetchDataset) instead.

To resolve this issue, you need to extract the data from your dataset object before passing it to the model.evaluate() method. You can do this by iterating over the dataset and converting it to tensors or numpy arrays that can be used for evaluation.