In [13]:
# https://www.kaggle.com/code/guesejustin/object-recognition-mlp-cnn-efficientnet-on-cifar

import tensorflow as tf
from sklearn.model_selection import train_test_split
import tensorflow_datasets as tfds
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, LearningRateScheduler
from keras.utils import to_categorical # FIX ask if this is allowed

In [None]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
assert x_train.shape == (50000, 32, 32, 3)
assert x_test.shape == (10000, 32, 32, 3)
assert y_train.shape == (50000, 1)
assert y_test.shape == (10000, 1)

In [None]:
x_train.shape,x_test.shape,y_train.shape,y_test.shape

In [None]:
#from keras.utils import np_utils
import keras
# one-hot encode the labels
num_classes = len(np.unique(y_train))
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

# break training set into training and validation sets
(x_train, x_valid) = x_train[5000:], x_train[:5000]
(y_train, y_valid) = y_train[5000:], y_train[:5000]

# print shape of training set
print('x_train shape:', x_train.shape)

# print number of training, validation, and test images
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
print(x_valid.shape[0], 'validation samples')

In [None]:
x_train.shape,x_test.shape,y_train.shape,y_test.shape

In [None]:
# let us first construct a basic mlp model, feel free to play around!
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten

# define the model
model = Sequential()
model.add(Flatten(input_shape = x_train.shape[1:]))
model.add(Dense(1000, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(num_classes, activation='softmax'))

model.summary()

In [None]:
# compile the model
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', 
                  metrics=['accuracy'])

In [None]:
from keras.callbacks import ModelCheckpoint   
import time
# train our model and save the best results in the file: MLP.best_weights
# additionally count the time how long it took

mlp_start = time.time() # for stopwatch

hist = model.fit(x_train, y_train, batch_size=32, epochs=20,
          validation_data=(x_valid, y_valid), verbose=2, shuffle=True)

mlp_end = time.time()
mlp_took = mlp_end -mlp_start
print("took %s seconds"%(mlp_took))

---

In [5]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()
assert X_train.shape == (50000, 32, 32, 3)
assert X_test.shape == (10000, 32, 32, 3)
assert y_train.shape == (50000, 1)
assert y_test.shape == (10000, 1)

In [7]:
# 2.1 - Split the new train and validation (test) set - 80/20 ratio
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.2, random_state=1)

In [8]:
def build_mlp_model(kernel_initializer,learning_rate,dropout_rate):
    '''
    Parameters for optimal:
    > kernel_initializer = 'glorot_uniform'(default),'glorot_normal','he_uniform','he_normal'           
    > learning_rate = 0.001, 0.01(default), 0.1, ...
    > dropout_rate = 0.1, 0.2(default), 0.3, ...
    '''
    # Build the model
    mlp_model = Sequential([
        # Flatten the Input
        keras.layers.Flatten(input_shape=(32,32,3),name='Flatten'),
        # First Hidden Layer（less neurons） - ReLU activation
        keras.layers.Dense(512, activation='relu',name='Hidden_1st'),
        # Second Hidden Layer (less neurons) - ReLU activation
        keras.layers.Dense(256, activation='relu',name='Hidden_2nd'),
        # Output Layer - Softmax activation
        keras.layers.Dense(10, activation='softmax',name='Output_Softmax') 
    ],name='MLP_model')
    
    # Compile the model (learning_rate used here)
    mlp_model.compile(optimizer='sgd',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
    return mlp_model

In [14]:
# 3.2 - Callbacks the Learning rate scheduling

# Learning rate - Exponential scheduler
def Exponential_scheduler(epoch,lr):
    if epoch < 10:
        return lr
    else:
        return lr*tf.math.exp(-0.1*epoch)
    
# Callbacks learning rate scheduling and early stopping
lrs = LearningRateScheduler(Exponential_scheduler)
es = EarlyStopping('val_loss',patience=3,restore_best_weights=True)
callbacks = [es,lrs]

In [16]:
mlp_model = build_mlp_model(kernel_initializer='he_normal',learning_rate=0.1,dropout_rate=0.2)
mlp_model.summary() # Display its architucture


Model: "MLP_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Flatten (Flatten)           (None, 3072)              0         
                                                                 
 Hidden_1st (Dense)          (None, 512)               1573376   
                                                                 
 Hidden_2nd (Dense)          (None, 256)               131328    
                                                                 
 Output_Softmax (Dense)      (None, 10)                2570      
                                                                 
Total params: 1707274 (6.51 MB)
Trainable params: 1707274 (6.51 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [17]:
HundredEpochs_history = mlp_model.fit(X_train,y_train,epochs=100,callbacks=callbacks, validation_data=(X_valid,y_valid),verbose=2)

Epoch 1/100


2023-08-25 23:10:36.847237: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-08-25 23:10:45.541222: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


1250/1250 - 11s - loss: nan - accuracy: 0.1003 - val_loss: nan - val_accuracy: 0.0986 - lr: 0.0100 - 11s/epoch - 8ms/step
Epoch 2/100
1250/1250 - 8s - loss: nan - accuracy: 0.1003 - val_loss: nan - val_accuracy: 0.0986 - lr: 0.0100 - 8s/epoch - 6ms/step
Epoch 3/100
1250/1250 - 7s - loss: nan - accuracy: 0.1003 - val_loss: nan - val_accuracy: 0.0986 - lr: 0.0100 - 7s/epoch - 6ms/step
