In [1]:
import pandas as pd
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tensorflow.keras.callbacks import EarlyStopping

### Import the CIFAR-10 dataset

In [16]:
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()

### Normlize the set

In [17]:
x_train = x_train.astype(float)/255.0
x_test = x_test.astype(float)/255.0

In [18]:
print(x_train.shape)
print(y_train.shape)

(50000, 32, 32, 3)
(50000, 1)


### Define the model

In [19]:
model = keras.Sequential()
model.add(layers.Flatten(input_shape=(32, 32, 3)))
model.add(layers.Dense(1024, activation='relu'))
layers.Dropout(0.1),
model.add(layers.Dense(512, activation='relu'))
layers.Dropout(0.1),
model.add(layers.Dense(256, activation='relu'))
layers.Dropout(0.1),
model.add(layers.Dense(10, activation='softmax'))

  super().__init__(**kwargs)


### Load the optimizer

In [20]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

### Early stopping

In [21]:
es = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

### Trian the model

In [23]:
legend = model.fit(x_train, y_train, epochs=50, batch_size=128, validation_split=0.1, callbacks=[es])

Epoch 1/50
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 57ms/step - accuracy: 0.3917 - loss: 1.6927 - val_accuracy: 0.3958 - val_loss: 1.6786
Epoch 2/50
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 59ms/step - accuracy: 0.4232 - loss: 1.6078 - val_accuracy: 0.4272 - val_loss: 1.5905
Epoch 3/50
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 67ms/step - accuracy: 0.4420 - loss: 1.5513 - val_accuracy: 0.4368 - val_loss: 1.5816
Epoch 4/50
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 62ms/step - accuracy: 0.4630 - loss: 1.5044 - val_accuracy: 0.4598 - val_loss: 1.5141
Epoch 5/50
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 60ms/step - accuracy: 0.4776 - loss: 1.4628 - val_accuracy: 0.4586 - val_loss: 1.5093
Epoch 6/50
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 70ms/step - accuracy: 0.4937 - loss: 1.4195 - val_accuracy: 0.4722 - val_loss: 1.4776
Epoch 7/50
[1m3

### Test the model

In [24]:
y_pred = model.predict(x_test)
y_pred_classes = np.argmax(y_pred, axis=1)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step


### evaluation metrics

In [32]:
print("accuracy: ", accuracy_score(y_test, y_pred_classes))
print("presision: ", precision_score(y_test, y_pred_classes, average="macro"))
print("recall: ", recall_score(y_test, y_pred_classes, average="macro"))
print("f1: ", f1_score(y_test, y_pred_classes, average="macro"))
print("validation accuracy:", legend.history['val_accuracy'][-1])

accuracy:  0.5132
presision:  0.5160608367036572
recall:  0.5132000000000001
f1:  0.5076159744128278
validation accuracy: 0.5249999761581421


### Define the model (Regularization)

In [11]:
# Original (kept as comment):
# model = keras.Sequential()
# model.add(layers.Flatten(input_shape=(32, 32, 3)))
# model.add(layers.Dense(512, activation='relu', kernel_regularizer=keras.regularizers.l2(0.001)))
# model.add(layers.Dense(256, activation='relu', kernel_regularizer=keras.regularizers.l2(0.001)))
# model.add(layers.Dense(10, activation='sigmoid'))

# Regularized model at the end section: add Dropout and use softmax
model = keras.Sequential([
    layers.Flatten(input_shape=(32, 32, 3)),
    layers.Dense(1024, activation='relu', kernel_regularizer=keras.regularizers.l2(1e-4)),
    layers.Dropout(0.1),
    layers.Dense(512, activation='relu', kernel_regularizer=keras.regularizers.l2(1e-4)),
    layers.Dropout(0.1),
    layers.Dense(256, activation='relu', kernel_regularizer=keras.regularizers.l2(1e-4)),
    layers.Dropout(0.1),
    layers.Dense(10, activation='softmax')
])

  super().__init__(**kwargs)


### Load optimizer

In [12]:
# Original (kept as comment):
# model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.compile(optimizer=keras.optimizers.Adam(1e-3),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

### Train the model

In [13]:
# Original (kept as comment):
# model.fit(x_train, y_train, epochs=20, batch_size=256, validation_split=0.1)
history = model.fit(
    x_train, y_train,
    epochs=50,
    batch_size=128,
    validation_split=0.1,
    callbacks=[es]
)

Epoch 1/50
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 75ms/step - accuracy: 0.2873 - loss: 2.1316 - val_accuracy: 0.3500 - val_loss: 1.9138
Epoch 2/50
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 64ms/step - accuracy: 0.3697 - loss: 1.8602 - val_accuracy: 0.4008 - val_loss: 1.7657
Epoch 3/50
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 69ms/step - accuracy: 0.3961 - loss: 1.7620 - val_accuracy: 0.4098 - val_loss: 1.7076
Epoch 4/50
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 79ms/step - accuracy: 0.4171 - loss: 1.6934 - val_accuracy: 0.4420 - val_loss: 1.6355
Epoch 5/50
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 78ms/step - accuracy: 0.4280 - loss: 1.6528 - val_accuracy: 0.4322 - val_loss: 1.6429
Epoch 6/50
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 79ms/step - accuracy: 0.4412 - loss: 1.6191 - val_accuracy: 0.4474 - val_loss: 1.6024
Epoch 7/50
[1m3

### Test the model

In [14]:
# Original (kept as comment):
# print("accuracy: ", accuracy_score(y_test, y_pred_classes))
# print("presision: ", precision_score(y_test, y_pred_classes, average="macro"))
# print("recall: ", recall_score(y_test, y_pred_classes, average="macro"))
# print("f1: ", f1_score(y_test, y_pred_classes, average="macro"))

# Re-evaluate after training the regularized model
y_pred = model.predict(x_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_flat = y_test.ravel()

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step


### Evaluate the model

In [27]:
print("accuracy:", accuracy_score(y_test_flat, y_pred_classes))
print("precision:", precision_score(y_test_flat, y_pred_classes, average="macro", zero_division=0))
print("recall:", recall_score(y_test_flat, y_pred_classes, average="macro", zero_division=0))
print("f1:", f1_score(y_test_flat, y_pred_classes, average="macro", zero_division=0))
print("validation accuracy:", history.history['val_accuracy'][-1])

accuracy: 0.5132
precision: 0.5160608367036572
recall: 0.5132000000000001
f1: 0.5076159744128278
validation accuracy: 0.5027999877929688


## How does adding dropout layers affect training vs validation accuracy?

#### The accuracy slightly decreases but validiation accuracy is almost same.

## Does early stopping prevent wasted training time?

#### YES

## Can L2 weight regularization improve generalization?

#### YES

## How does model depth affect overfitting on CIFAR-10?

#### It allows the model to learn more complex patterns but also increases the risk of overfitting.