Test 3 VGG16

In [None]:
import warnings

# Ignore warnings
warnings.filterwarnings("ignore")

Loading data into numpy array


Below is the code for loading the image data from the train, test, and validation folders into numpy arrays, and performing normalization of every pixel in the range of 0 and 1

In [None]:
import os
import numpy as np
from PIL import Image
from tqdm import tqdm

In [None]:
# Example usage:
data_dir = "F:/thesis/data"

In [None]:
def load_data(dataset_dir):
    images = []
    labels = []

    # Get the total number of images
    num_images = sum(len(files) for _, _, files in os.walk(dataset_dir))

    # Create tqdm progress bar
    pbar = tqdm(total=num_images, desc=f'Loading {dataset_dir}', unit='image')

    # Iterate through each subfolder in the dataset directory
    for class_folder in sorted(os.listdir(dataset_dir)):
        class_dir = os.path.join(dataset_dir, class_folder)
        if os.path.isdir(class_dir):
            # Iterate through each image file in the class folder
            for image_file in sorted(os.listdir(class_dir)):
                image_path = os.path.join(class_dir, image_file)
                # Load image using PIL
                image = Image.open(image_path)
                # Resize image to 224x224 if necessary (optional)
                image = image.resize((224, 224))
                # Convert image to numpy array and normalize pixel values
                image = np.array(image) / 255.0
                # Append image and corresponding label to lists
                images.append(image)
                labels.append(int(class_folder))
                # Update progress bar
                pbar.update(1)

    # Close progress bar after completion
    pbar.close()

    return np.array(images), np.array(labels)

# Load data for training set
train_images, train_labels = load_data(os.path.join(data_dir, 'train'))

# Load data for test set
test_images, test_labels = load_data(os.path.join(data_dir, 'test'))

# Load data for validation set
val_images, val_labels = load_data(os.path.join(data_dir, 'validation'))

In [None]:
print("Shape of train images array:", train_images.shape)
print("Shape of train labels array:", train_labels.shape)
print("Shape of test images array:", test_images.shape)
print("Shape of test labels array:", test_labels.shape)
print("Shape of validation images array:", val_images.shape)
print("Shape of validation labels array:", val_labels.shape)
print(train_images)
print(type(train_images))

Model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

In [None]:
# Initialize the CNN model
model = Sequential()

# Add convolutional layer with 32 filters, kernel size of 3x3, and ReLU activation function
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)))

# Add max pooling layer with pool size of 2x2
model.add(MaxPooling2D((2, 2)))

# Add another convolutional layer with 64 filters, kernel size of 3x3, and ReLU activation function
model.add(Conv2D(64, (3, 3), activation='relu'))

# Add another max pooling layer with pool size of 2x2
model.add(MaxPooling2D((2, 2)))

# Flatten the output from the previous layer
model.add(Flatten())

# Add fully connected (dense) layer with 128 neurons and ReLU activation function
model.add(Dense(128, activation='relu'))

# Add dropout layer with dropout rate of 0.5 to prevent overfitting
model.add(Dropout(0.5))

# Add output layer with softmax activation function for multi-class classification
num_classes = 38  # Change this according to the number of classes in your dataset
model.add(Dense(num_classes, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Print model summary
model.summary()


Train

In [None]:
# Assuming you have already loaded your data into variables train_images, train_labels, test_images, test_labels, val_images, val_labels

# Train the model
history = model.fit(train_images, train_labels,
                    validation_data=(val_images, val_labels),
                    batch_size=32,
                    epochs=50,
                    verbose=1)

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(test_images, test_labels, verbose=0)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")

Evaluation

In [None]:
from sklearn.metrics import classification_report

# Predict labels for the validation set
val_predictions = model.predict(val_images)
val_predictions = np.argmax(val_predictions, axis=1)

# Calculate relevant metrics
report = classification_report(val_labels, val_predictions)

# Print the classification report
print("Classification Report:\n", report)

Model testing

In [None]:
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(test_images, test_labels, verbose=0)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")


AUC ROC


In [None]:
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import label_binarize

# Convert labels to one-hot encoded format
from tensorflow.keras.utils import to_categorical
test_labels_one_hot = to_categorical(test_labels)

# Predict probabilities for each class
test_probabilities = model.predict(test_images)

# Compute AUC-ROC for each class
auc_roc_scores = []
for i in range(num_classes):
    auc = roc_auc_score(test_labels_one_hot[:, i], test_probabilities[:, i])
    auc_roc_scores.append(auc)

# Print AUC-ROC scores for each class
for i, auc in enumerate(auc_roc_scores):
    print(f"Class {i}: AUC-ROC = {auc:.4f}")