## 1. Data Preprocessing

In [2]:
import tensorflow as tf
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical

# Load dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Normalize pixel values to range between 0 and 1
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# Convert class labels into one-hot encoded format
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)


Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 0us/step


b. Data Augmentation (Optional)

Data augmentation can improve the generalization of the model. We'll apply random flips, rotations, and shifts.:

In [3]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True)
datagen.fit(x_train)


## 2.NETWORK ARCHITRCTURE DESIGN

In [4]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout

model = Sequential()

# Input Layer + Convolutional Layers
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Flatten the data for the Dense Layers
model.add(Flatten())

# Fully Connected Layers
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))

# Output Layer
model.add(Dense(10, activation='softmax'))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Justification:


Conv2D layers capture spatial features from the images.

MaxPooling reduces dimensionality, preventing overfitting.

Dropout is used to regularize the model.

Softmax in the output layer ensures proper multi-class classification.

## 3. Activation Functions
We'll use ReLU for hidden layers because:

ReLU accelerates convergence in deeper networks.
It helps to avoid vanishing gradient problems.
The softmax activation in the output layer ensures multi-class classification.

##4. Loss Function and Optimizer

Since this is a multi-class classification problem, we’ll use categorical crossentropy. Additionally, we can compare the performance of Mean Squared Error (MSE) and Hinge Loss.

In [5]:
model.compile(
    loss='categorical_crossentropy',  # Main loss function
    optimizer='adam',  # Adam optimizer
    metrics=['accuracy'])


Justification for Adam Optimizer:

Adam combines the benefits of SGD with momentum and RMSprop.
It dynamically adjusts the learning rate based on gradients.
Learning Rate: If the model is not converging, reducing the learning rate can help stabilize training.


## 5. Training the Model

In [6]:
# Training the model
history = model.fit(datagen.flow(x_train, y_train, batch_size=64),
                    epochs=5,validation_data=(x_test, y_test))

Epoch 1/5


  self._warn_if_super_not_called()


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m96s[0m 121ms/step - accuracy: 0.2433 - loss: 2.0168 - val_accuracy: 0.4539 - val_loss: 1.4956
Epoch 2/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m137s[0m 115ms/step - accuracy: 0.3945 - loss: 1.6631 - val_accuracy: 0.4838 - val_loss: 1.4268
Epoch 3/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 113ms/step - accuracy: 0.4304 - loss: 1.5623 - val_accuracy: 0.5284 - val_loss: 1.2965
Epoch 4/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 112ms/step - accuracy: 0.4630 - loss: 1.4904 - val_accuracy: 0.5484 - val_loss: 1.2466
Epoch 5/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 114ms/step - accuracy: 0.4800 - loss: 1.4442 - val_accuracy: 0.5781 - val_loss: 1.1992


## 6. Model Evaluation

In [7]:
# Evaluate on test data
test_loss, test_acc = model.evaluate(x_test, y_test)
print('Test accuracy:', test_acc)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.5801 - loss: 1.2049
Test accuracy: 0.5781000256538391


In [8]:
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

y_pred = np.argmax(model.predict(x_test), axis=-1)
y_true = np.argmax(y_test, axis=-1)

# Precision, Recall, F1-Score
print(classification_report(y_true, y_pred))

# Confusion Matrix
print(confusion_matrix(y_true, y_pred))


[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 13ms/step
              precision    recall  f1-score   support

           0       0.74      0.56      0.64      1000
           1       0.62      0.79      0.69      1000
           2       0.60      0.22      0.32      1000
           3       0.45      0.32      0.37      1000
           4       0.54      0.46      0.49      1000
           5       0.51      0.55      0.53      1000
           6       0.56      0.75      0.65      1000
           7       0.59      0.69      0.63      1000
           8       0.75      0.68      0.71      1000
           9       0.49      0.78      0.60      1000

    accuracy                           0.58     10000
   macro avg       0.59      0.58      0.56     10000
weighted avg       0.59      0.58      0.56     10000

[[555  73  46  13  20   6   9  21 114 143]
 [  2 794   0   2   1   2   8   1   5 185]
 [ 73  28 221  65 183 122 166  66  30  46]
 [  8  42  30 318  46 249 132  78  2

7. Optimization Strategies

Early Stopping can be implemented to prevent overfitting by halting training when the validation loss stops improving.

Learning Rate Scheduling helps reduce the learning rate progressively to fine-tune the model towards the end of training.

Weight Initialization: Efficient weight initialization (like Xavier initialization) ensures the model starts with optimal weight distributions, avoiding vanishing or exploding gradients.

In [9]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Learning rate scheduling
lr_schedule = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5)

history = model.fit(datagen.flow(x_train, y_train, batch_size=64),
                    epochs=5,
                    validation_data=(x_test, y_test),
                    callbacks=[early_stopping, lr_schedule])


Epoch 1/5


  self._warn_if_super_not_called()


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 116ms/step - accuracy: 0.4933 - loss: 1.4156 - val_accuracy: 0.5918 - val_loss: 1.1445 - learning_rate: 0.0010
Epoch 2/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 113ms/step - accuracy: 0.5070 - loss: 1.3822 - val_accuracy: 0.6046 - val_loss: 1.0969 - learning_rate: 0.0010
Epoch 3/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 113ms/step - accuracy: 0.5090 - loss: 1.3544 - val_accuracy: 0.6020 - val_loss: 1.1220 - learning_rate: 0.0010
Epoch 4/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 112ms/step - accuracy: 0.5139 - loss: 1.3480 - val_accuracy: 0.6098 - val_loss: 1.0858 - learning_rate: 0.0010
Epoch 5/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 111ms/step - accuracy: 0.5288 - loss: 1.3171 - val_accuracy: 0.6077 - val_loss: 1.1046 - learning_rate: 0.0010
