In [2]:
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras import layers, Model
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, Callback
from tensorflow.keras.applications.vgg16 import VGG16

# Define the path to the dataset
dataset_path = r"D://SEM-4//PROJECTS//ML//DATASET//Archive"

# Get file paths and labels from folder structure
file_paths = []
labels = []

for label in os.listdir(dataset_path):
    label_path = os.path.join(dataset_path, label)
    if os.path.isdir(label_path):
        for img_file in os.listdir(label_path):
            file_paths.append(os.path.join(label_path, img_file))
            labels.append(label)

# Convert labels to numerical format
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(file_paths, labels_encoded, test_size=0.2, random_state=42)

# Function to preprocess images
def preprocess_image(img_path):
    img = image.load_img(img_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    return preprocess_input(img_array)

# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Data generator with augmentation
def augmented_data_generator(file_paths, labels, batch_size=32):
    while True:
        for i in range(0, len(file_paths), batch_size):
            batch_paths = file_paths[i:i + batch_size]
            batch_labels = labels[i:i + batch_size]
            batch_images = np.vstack([preprocess_image(path) for path in batch_paths])
            augmented_images = next(datagen.flow(batch_images, batch_size=batch_size, shuffle=False))
            yield augmented_images, np.array(batch_labels)

# Create a Vision Transformer model with VGG16 base
def create_vision_transformer():
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    for layer in base_model.layers[-10:]:
        layer.trainable = True  # Unfreeze the last 10 layers

    inputs = tf.keras.Input(shape=(224, 224, 3))
    x = base_model(inputs, training=True)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(256, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(len(label_encoder.classes_), activation='softmax')(x)
    model = Model(inputs, outputs)
    return model

# Custom callback for early stopping based on training and validation accuracy difference
class CustomEarlyStopping(Callback):
    def on_epoch_end(self, epoch, logs=None):
        train_acc = logs.get("accuracy")
        val_acc = logs.get("val_accuracy")
        if train_acc and val_acc and (train_acc - val_acc > 0.05):
            print(f"\nStopping early: Training accuracy {train_acc:.4f} exceeds validation accuracy {val_acc:.4f} by more than 5%.")
            self.model.stop_training = True

vit_model = create_vision_transformer()
vit_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

batch_size = 84
epochs = 50

train_generator = augmented_data_generator(X_train, y_train, batch_size=batch_size)
test_generator = augmented_data_generator(X_test, y_test, batch_size=batch_size)

# Callbacks
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.00001)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
custom_early_stopping = CustomEarlyStopping()

# Train the model
vit_model.fit(train_generator, steps_per_epoch=len(X_train)//batch_size,
              epochs=epochs, validation_data=test_generator,
              validation_steps=len(X_test)//batch_size, callbacks=[reduce_lr, early_stopping, custom_early_stopping])

# Evaluate the model
test_loss, test_acc = vit_model.evaluate(test_generator, steps=len(X_test)//batch_size)
print(f'Test accuracy: {test_acc}')

# Predict and evaluate results
y_pred = vit_model.predict(test_generator, steps=len(X_test)//batch_size)
y_pred_classes = np.argmax(y_pred, axis=1)

print(classification_report(y_test[:len(y_pred_classes)], y_pred_classes))
print(confusion_matrix(y_test[:len(y_pred_classes)], y_pred_classes))


Epoch 1/50
[1m 66/110[0m [32m━━━━━━━━━━━━[0m[37m━━━━━━━━[0m [1m12:01[0m 16s/step - accuracy: 0.0848 - loss: 3.0011



[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1911s[0m 17s/step - accuracy: 0.0911 - loss: 2.8470 - val_accuracy: 0.1662 - val_loss: 2.5037 - learning_rate: 1.0000e-04
Epoch 2/50
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1901s[0m 17s/step - accuracy: 0.1313 - loss: 2.4911 - val_accuracy: 0.1830 - val_loss: 2.4118 - learning_rate: 1.0000e-04
Epoch 3/50
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1924s[0m 17s/step - accuracy: 0.1623 - loss: 2.4088 - val_accuracy: 0.2507 - val_loss: 2.2570 - learning_rate: 1.0000e-04
Epoch 4/50
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1917s[0m 17s/step - accuracy: 0.1978 - loss: 2.3085 - val_accuracy: 0.2493 - val_loss: 2.2154 - learning_rate: 1.0000e-04
Epoch 5/50
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1918s[0m 17s/step - accuracy: 0.2424 - loss: 2.2223 - val_accuracy: 0.3001 - val_loss: 2.0338 - learning_rate: 1.0000e-04
Epoch 6/50
[1m110/110[0m [32m━━━━━━━━━━━━━━