In [2]:
import kagglehub
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.metrics import confusion_matrix, classification_report

In [3]:
# Download dataset
path = kagglehub.dataset_download("ayush1220/cifar10")
print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/cifar10


In [4]:
print("Contents:", os.listdir(path))

Contents: ['cifar10']


In [5]:
subdirs = os.listdir(path)
for sub in subdirs:
    print(sub, "=>", os.listdir(os.path.join(path, sub)))

cifar10 => ['test', 'train']


In [6]:
# Define directories
base_dir = os.path.join(path, 'cifar10')
train_dir = os.path.join(base_dir, 'train')
test_dir = os.path.join(base_dir, 'test')

In [7]:
# Parameters
img_height, img_width = 32, 32
batch_size = 64
epochs = 30

In [8]:
# Data Augmentation for training
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True,
    shear_range=0.1,
    fill_mode='nearest'
)

In [9]:
# No augmentation for testing (only rescale)
test_datagen = ImageDataGenerator(rescale=1./255)

In [10]:
# Data generators
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)

Found 50000 images belonging to 10 classes.
Found 10000 images belonging to 10 classes.


In [11]:
# CNN Model with dropout
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(img_height, img_width, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Dropout(0.25),

    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Dropout(0.25),

    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.Flatten(),

    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(10, activation='softmax')
])

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [12]:
# Callbacks
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
lr_reduce = ReduceLROnPlateau(monitor='val_loss', patience=2, factor=0.5, verbose=1)

In [13]:
# Train the model
history = model.fit(
    train_generator,
    validation_data=test_generator,
    epochs=epochs,
    callbacks=[early_stop, lr_reduce]
)

  self._warn_if_super_not_called()


Epoch 1/30
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m246s[0m 306ms/step - accuracy: 0.2528 - loss: 1.9875 - val_accuracy: 0.4715 - val_loss: 1.4410 - learning_rate: 0.0010
Epoch 2/30
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m96s[0m 100ms/step - accuracy: 0.4452 - loss: 1.5440 - val_accuracy: 0.5201 - val_loss: 1.3152 - learning_rate: 0.0010
Epoch 3/30
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 96ms/step - accuracy: 0.5027 - loss: 1.4084 - val_accuracy: 0.5833 - val_loss: 1.1607 - learning_rate: 0.0010
Epoch 4/30
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 96ms/step - accuracy: 0.5308 - loss: 1.3255 - val_accuracy: 0.6195 - val_loss: 1.0665 - learning_rate: 0.0010
Epoch 5/30
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 97ms/step - accuracy: 0.5544 - loss: 1.2704 - val_accuracy: 0.6431 - val_loss: 1.0173 - learning_rate: 0.0010
Epoch 6/30
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

In [26]:
# Save training history plot
plt.figure(figsize=(20, 8))

# Accuracy
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Acc')
plt.plot(history.history['val_accuracy'], label='Val Acc')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

# Loss
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.savefig('training_history.png')
plt.close()

In [14]:
# Evaluate
loss, accuracy = model.evaluate(test_generator)
print(f"Test Accuracy: {accuracy:.2f}")

[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 49ms/step - accuracy: 0.7080 - loss: 0.8418
Test Accuracy: 0.73


In [15]:
# Predict on test set
y_pred = model.predict(test_generator)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = test_generator.classes
class_labels = list(test_generator.class_indices.keys())

[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 52ms/step


In [22]:
# Confusion Matrix (save as image)
cm = confusion_matrix(y_true, y_pred_classes)
plt.figure(figsize=(20, 16))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=class_labels, yticklabels=class_labels)
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.savefig("confusion_matrix.png")
plt.close()

In [17]:
# Classification Report
print("Classification Report:\n")
print(classification_report(y_true, y_pred_classes, target_names=class_labels))

Classification Report:

              precision    recall  f1-score   support

    airplane       0.79      0.77      0.78      1000
  automobile       0.83      0.90      0.86      1000
        bird       0.75      0.49      0.59      1000
         cat       0.58      0.46      0.51      1000
        deer       0.71      0.67      0.69      1000
         dog       0.71      0.54      0.61      1000
        frog       0.60      0.92      0.73      1000
       horse       0.74      0.80      0.77      1000
        ship       0.82      0.89      0.85      1000
       truck       0.78      0.86      0.81      1000

    accuracy                           0.73     10000
   macro avg       0.73      0.73      0.72     10000
weighted avg       0.73      0.73      0.72     10000



In [23]:
import random

In [24]:
# Sample predictions visualization
x_test, y_test_true = next(test_datagen.flow_from_directory(
    test_dir,
    target_size=(img_height, img_width),
    batch_size=10000,  # load all images
    class_mode='categorical',
    shuffle=False
))

y_test_true_classes = np.argmax(y_test_true, axis=1)
y_test_pred_classes = np.argmax(model.predict(x_test), axis=1)

# Random 16 images
indices = random.sample(range(len(x_test)), 16)

plt.figure(figsize=(12, 12))
for i, idx in enumerate(indices):
    plt.subplot(4, 4, i + 1)
    plt.imshow(x_test[idx])
    plt.title(f'True: {class_labels[y_test_true_classes[idx]]}\nPred: {class_labels[y_test_pred_classes[idx]]}')
    plt.axis('off')

plt.tight_layout()
plt.savefig('sample_predictions.png')
plt.close()

Found 10000 images belonging to 10 classes.
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
