In [1]:
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras import layers, Model
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, Callback

# Define the path to the dataset
dataset_path = r"E:\ML EXTENSION\Archive_2"

# Get file paths and labels from folder structure
file_paths = []
labels = []

for label in os.listdir(dataset_path):
    label_path = os.path.join(dataset_path, label)
    if os.path.isdir(label_path):
        for img_file in os.listdir(label_path):
            file_paths.append(os.path.join(label_path, img_file))
            labels.append(label)

# Convert labels to numerical format
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(file_paths, labels_encoded, test_size=0.2, random_state=42)

# Function to preprocess images
def preprocess_image(img_path):
    img = image.load_img(img_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    return preprocess_input(img_array)

# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Data generator with augmentation
def augmented_data_generator(file_paths, labels, batch_size=32):
    while True:
        for i in range(0, len(file_paths), batch_size):
            batch_paths = file_paths[i:i + batch_size]
            batch_labels = labels[i:i + batch_size]
            batch_images = np.vstack([preprocess_image(path) for path in batch_paths])
            augmented_images = next(datagen.flow(batch_images, batch_size=batch_size, shuffle=False))
            yield augmented_images, np.array(batch_labels)

# Create a Vision Transformer model with fine-tuning using MobileNetV2
def create_vision_transformer():
    base_model = tf.keras.applications.MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    for layer in base_model.layers[-10:]:
        layer.trainable = True  # Unfreeze the last 10 layers

    inputs = tf.keras.Input(shape=(224, 224, 3))
    x = base_model(inputs, training=True)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(256, activation='relu')(x)
    x = layers.Dropout(0.5)(x)  # Dropout 0.5
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.5)(x)  # Dropout 0.5
    outputs = layers.Dense(len(label_encoder.classes_), activation='softmax')(x)
    model = Model(inputs, outputs)
    return model

# Custom callback for early stopping based on training and validation accuracy difference
class CustomEarlyStopping(Callback):
    def on_epoch_end(self, epoch, logs=None):
        train_acc = logs.get("accuracy")
        val_acc = logs.get("val_accuracy")
        if train_acc and val_acc and (train_acc - val_acc > 0.05):
            print(f"\nStopping early: Training accuracy {train_acc:.4f} exceeds validation accuracy {val_acc:.4f} by more than 5%.")
            self.model.stop_training = True

vit_model = create_vision_transformer()
vit_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

batch_size = 16
epochs = 50

train_generator = augmented_data_generator(X_train, y_train, batch_size=batch_size)
test_generator = augmented_data_generator(X_test, y_test, batch_size=batch_size)

# Callbacks
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.00001)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
custom_early_stopping = CustomEarlyStopping()

# Train the model
vit_model.fit(train_generator, steps_per_epoch=len(X_train)//batch_size,
              epochs=epochs, validation_data=test_generator,
              validation_steps=len(X_test)//batch_size, callbacks=[reduce_lr, early_stopping, custom_early_stopping])

# Evaluate the model
test_loss, test_acc = vit_model.evaluate(test_generator, steps=len(X_test)//batch_size)
print(f'Test accuracy: {test_acc}')

# Predict and evaluate results
y_pred = vit_model.predict(test_generator, steps=len(X_test)//batch_size)
y_pred_classes = np.argmax(y_pred, axis=1)

print(classification_report(y_test[:len(y_pred_classes)], y_pred_classes))
print(confusion_matrix(y_test[:len(y_pred_classes)], y_pred_classes))


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1us/step
Epoch 1/50
[1m131/650[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m11:24[0m 1s/step - accuracy: 0.0810 - loss: 2.9303



[1m150/650[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m10:57[0m 1s/step - accuracy: 0.0837 - loss: 2.9021



[1m528/650[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m2:27[0m 1s/step - accuracy: 0.1255 - loss: 2.6430



[1m650/650[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m878s[0m 1s/step - accuracy: 0.1368 - loss: 2.5985 - val_accuracy: 0.3503 - val_loss: 1.9237 - learning_rate: 1.0000e-04
Epoch 2/50
[1m650/650[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1020s[0m 2s/step - accuracy: 0.3146 - loss: 2.0626 - val_accuracy: 0.3847 - val_loss: 1.8269 - learning_rate: 1.0000e-04
Epoch 3/50
[1m650/650[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m937s[0m 1s/step - accuracy: 0.4087 - loss: 1.8239 - val_accuracy: 0.4760 - val_loss: 1.6563 - learning_rate: 1.0000e-04
Epoch 4/50
[1m650/650[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m983s[0m 2s/step - accuracy: 0.4761 - loss: 1.6506 - val_accuracy: 0.4567 - val_loss: 1.9385 - learning_rate: 1.0000e-04
Epoch 5/50
[1m650/650[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.5161 - loss: 1.5331
Stopping early: Training accuracy 0.5256 exceeds validation accuracy 0.4659 by more than 5%.
[1m650/650[0m [32m━━━━━━━━━━━━━