In [None]:
import urllib.request
# 


In [None]:
# leave this cell if you already have n classes datset. The below one is 137 MB
urllib.request.urlretrieve("https://data.caltech.edu/records/mzrjq-6wc02/files/caltech-101.zip?download=1","objects_101.zip") 

In [None]:
import splitfolders
import os,shutil
# splitted_folder = 'split_data'
# if not os.path.exists(splitted_folder):
#     os.makedirs(splitted_folder)
#     shutil.rmtree(splitted_folder, ignore_errors=True)
#     print("Deleted '%s' directory successfully" % splitted_folder)

#     os.makedirs(splitted_folder)
# else:
#   # Deleting an non-empty folder
#   shutil.rmtree(splitted_folder, ignore_errors=True)
#   print("Deleted '%s' directory successfully" % splitted_folder)

#   os.makedirs(splitted_folder)

splitfolders.ratio('101_ObjectCategories', output='data', seed=42, ratio=(0.8, 0.1, 0.1))

In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import numpy as np

In [None]:
# Load datasets
train_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    'data\\train',
    image_size=(128, 128),
    batch_size=32
)
val_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    'data\\val',
    image_size=(128, 128),
    batch_size=32
)
test_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    'data\\test',
    image_size=(128, 128),
    batch_size=32
)


In [None]:
# Display class names
class_names = train_dataset.class_names
num_classes = len(class_names)
print(f"Class names: {class_names}")

In [None]:
# Visualize some training images
plt.figure(figsize=(10, 10))
for images, labels in train_dataset.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.title(f"{class_names[labels[i]]} {labels[i]}")
        plt.axis("off")
plt.show()


In [None]:
# Define the model for n-ary classification. # increase the layers if the testing accuracy is low
model = tf.keras.Sequential([
    tf.keras.layers.Rescaling(1./255, input_shape=(128, 128, 3)),
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(num_classes, activation='softmax')  # For multi-class classification
])

# Model summary
model.summary()


In [None]:
# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',  # Use categorical_crossentropy if labels are one-hot encoded
              metrics=['accuracy'])

In [None]:
# #Picking only valid batches

# BATCH_SIZE=32
# def full_batch_generator(data_generator):
#     while True:
#         x_batch, y_batch = next(data_generator)
#         if x_batch.shape[0]==BATCH_SIZE:
#             yield x_batch,y_batch


# train_generator_full = full_batch_generator(train_dataset)
# validation_generator_full = full_batch_generator(val_dataset)

In [None]:
# Train the model
history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=10
)


In [None]:
# Plot training history
plt.figure(figsize=(12, 4))

In [None]:
# Loss
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.legend()
plt.title("Loss")

In [None]:
# Accuracy
plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.legend()
plt.title("Accuracy")

plt.show()

In [None]:
# Evaluate on test dataset
test_loss, test_accuracy = model.evaluate(test_dataset)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")


In [None]:
# Generate predictions for the confusion matrix
y_pred, y_true = [], []
for images, labels in test_dataset:
    predictions = model.predict(images)
    y_pred.extend(np.argmax(predictions, axis=1))  # Convert predictions to class indices
    y_true.extend(labels.numpy())


In [None]:
# Confusion Matrix
cm = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
disp.plot(cmap=plt.cm.Blues)
plt.show()


In [None]:
# Display a batch of images with predictions and actual labels
for images, labels in test_dataset.take(1):
    predictions = model.predict(images)
    pred_labels = np.argmax(predictions, axis=1)
    true_labels = labels.numpy()

    plt.figure(figsize=(12, 12))
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        pred_label = class_names[int(pred_labels[i])]
        true_label = class_names[int(true_labels[i])]
        plt.title(f"Predicted: {pred_label}, Actual: {true_label}")
        plt.axis("off")
    plt.show()