In [21]:
import tensorflow.keras.backend as K
from tensorflow.keras.models import Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Flatten
from tensorflow.keras import optimizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
from sklearn.metrics import classification_report
import tensorflow as tf
import numpy as np
import pickle
import matplotlib.pyplot as plt
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.regularizers import l2

In [23]:
K.clear_session()

dataset = "Path"

# Creating Test Dataset

In [27]:
import os
# Set your paths
source_dir = "Original Path"
test_data_dir = "Dataset PAth"
test_split_ratio = 0.2  # Define the percentage of data to use for testing

# List your known class names
class_names = ['class1', 'class2', 'class3', 'class4']

# Create test data directory if it doesn't exist
os.makedirs(test_data_dir, exist_ok=True)

# Iterate over each specified class directory in the source directory
for class_name in class_names:
    class_source_path = os.path.join(source_dir, class_name)
    class_test_path = os.path.join(test_data_dir, class_name)
    
    # Check if the class directory exists
    if os.path.isdir(class_source_path):
        # Create class directory in the test set directory if it doesn't exist
        os.makedirs(class_test_path, exist_ok=True)
        
        # Get all file names in the class directory
        file_names = os.listdir(class_source_path)
        
        # Filter out hidden files
        file_names = [f for f in file_names if not f.startswith('.')]
        
        # Shuffle the list of file names
        random.shuffle(file_names)
        
        # Determine the number of files to move to the test set
        test_size = int(len(file_names) * test_split_ratio)
        
        # Move files to the test set directory
        for file_name in file_names[:test_size]:
            source_file = os.path.join(class_source_path, file_name)
            dest_file = os.path.join(class_test_path, file_name)
            shutil.move(source_file, dest_file)

print("Test set created successfully.")

Test set created successfully.


In [29]:
import shutil

# Function to manually remove .DS_Store directory if it exists
def remove_ds_store_directory(directory):
    ds_store_path = os.path.join(directory, '.DS_Store')
    if os.path.isdir(ds_store_path):
        shutil.rmtree(ds_store_path)
        print(f"Removed {ds_store_path}")

# Remove .DS_Store directory from the test data directory
remove_ds_store_directory(test_data_dir)


# Data Augmentation

trdata = tf.keras.preprocessing.image.ImageDataGenerator(rescale= 1./255,
                            rotation_range=33,
                            width_shift_range=0.2,
                            height_shift_range=0.2,
                            shear_range = 0.2,
                            zoom_range=0.4,
                            horizontal_flip=True,
                            fill_mode='nearest',
                            validation_split=0.2)

valdata = tf.keras.preprocessing.image.ImageDataGenerator(rescale= 1./255,
                                                          validation_split=0.2)
                                                    
tsdata_new = ImageDataGenerator(rescale=1./255)

In [31]:
train_data = trdata.flow_from_directory(directory=dataset,
                                       target_size = (224,224), 
                                       color_mode = 'rgb',                                       
                                       class_mode = 'categorical', 
                                       batch_size=32,
                                       shuffle=True,
                                       seed=42,
                                       subset='training')

Found 972 images belonging to 4 classes.


In [33]:
valid_data = valdata.flow_from_directory(directory=dataset,
                                      target_size = (224,224),
                                      color_mode = 'rgb',
                                      class_mode = 'categorical', 
                                      batch_size=32,
                                      subset='validation')

Found 413 images belonging to 4 classes.


In [None]:
# Test data
test_data_new = tsdata_new.flow_from_directory(directory=test_data_dir,
                                               target_size=(224, 224),
                                               color_mode='rgb',
                                               class_mode='categorical',
                                               batch_size=32,
                                               shuffle=False)
# Print class names in the new test set
print("\nClasses detected in the new test set:")
test_classes_new = test_data_new.class_indices
for class_name, class_index in test_classes_new.items():
    print(f"{class_name}: {class_index}")  # Test data should not be shuffled

print(f"Training samples: {train_data.samples}")
print(f"Validation samples: {valid_data.samples}")
print(f"Testing samples: {test_data_new.samples}")


In [None]:
images, labels = next(train_data)

# Plot four random images from the batch
fig, axes = plt.subplots(1, 4, figsize=(15, 5))
for i in range(4):
    # Generate a random index
    random_index = np.random.randint(0, 32)
    
    # Plot the image
    axes[i].imshow(images[random_index])
    
    # Get the corresponding label
    label = labels[random_index]
    
    # Set the title as the class name
    class_name = train_data.class_indices
    axes[i].set_title(list(class_name.keys())[np.argmax(label)])

plt.show()

# Call Pre Train Model

In [None]:
#Loading VGG16 model with weights after being pre-trained on ImageNet
label_map = (train_data.class_indices)
print(label_map)
vggmodel = VGG16(weights="imagenet", include_top='False', input_shape =(224,224,3))

In [None]:
mymodel = Sequential()

for layer in vggmodel.layers[:-4]:
    print(layer.name)
    mymodel.add(layer)
    
del vggmodel

for layer in mymodel.layers:
    print(layer.name, layer.trainable)
    layer.trainable = False

# Adding Layers

In [51]:
new_top = Sequential()
new_top.add(Flatten())
new_top.add(Dense(1024, activation='relu', kernel_regularizer=l2(0.001)))
new_top.add(BatchNormalization())
new_top.add(Dropout(0.5))
# new_top.add(Dense(512, activation='relu', kernel_regularizer=l2(0.001)))
# new_top.add(BatchNormalization())
# new_top.add(Dropout(0.5))
new_top.add(Dense(4, activation='softmax'))


In [None]:
for layer in new_top.layers:
    print(layer)

In [55]:
mymodel.add(new_top)
mymodel.summary()

# Compile and Run

In [57]:
mymodel.compile(optimizers.Adam(learning_rate=0.0002), 
                loss='categorical_crossentropy',
                metrics =['accuracy'])

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

history = mymodel.fit(train_data, 
                      steps_per_epoch=137, 
                      validation_data= valid_data,
                      validation_steps=35, 
                      epochs=100,
                      verbose=2,
                      callbacks=[early_stopping])

# Save & Load Model

In [None]:
# Save the entire model
mymodel.save('VGG16.h5')

In [None]:
from tensorflow.keras.models import load_model

# Load the saved model
loaded_model = load_model('VGG16.h5')



In [None]:
import matplotlib.pyplot as plt

# Extract accuracy and loss metrics
history_dict = history.history
train_accuracy = history_dict['accuracy']
val_accuracy = history_dict['val_accuracy']
train_loss = history_dict['loss']
val_loss = history_dict['val_loss']

# Plot accuracy
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(train_accuracy, label='Training Accuracy')
plt.plot(val_accuracy, label='Validation Accuracy')
plt.title('Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

# Plot loss
plt.subplot(1, 2, 2)
plt.plot(train_loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.title('Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()


In [None]:
score = mymodel.evaluate(test_data_new ,verbose=1)
print('Test Loss:', score[0])
print('Test accuracy:', score[1]*100)

In [None]:
# Evaluate on the training data
train_loss, train_accuracy = mymodel.evaluate(train_data)
train_accuracy_percent = train_accuracy * 100

# Evaluate on the validation data
val_loss, val_accuracy = mymodel.evaluate(valid_data)
val_accuracy_percent = val_accuracy * 100

print(f"Overall Training Accuracy: {train_accuracy_percent:.2f}%")
print(f"Overall Validation Accuracy: {val_accuracy_percent:.2f}%")


In [None]:
import numpy as np
from sklearn.metrics import classification_report,confusion_matrix

# Initialize empty lists to store true and predicted labels
y_true_all = []
y_pred_all = []

# Loop through all batches in the validation generator
for i in range(len(test_data_new)):
    # Get the next batch of data
    x_val, y_val = test_data_new[i]
    
    # Predict labels for the batch
    y_pred_prob = mymodel.predict(x_val)
    y_pred = np.argmax(y_pred_prob, axis=1)
    
    # Append true and predicted labels to the lists
    y_true_all.extend(np.argmax(y_val, axis=1))
    y_pred_all.extend(y_pred)

# Convert lists to numpy arrays
y_true_all = np.array(y_true_all)
y_pred_all = np.array(y_pred_all)

# Get class labels
class_labels = list(valid_data.class_indices.keys())

# Generate classification report
report = classification_report(y_true_all, y_pred_all, target_names=class_labels, zero_division=1)

In [None]:
print(report)