In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, activations, losses, metrics, optimizers
import numpy as np

In [2]:
def create_model(input_shape=(224,224,3), num_of_classes = 1000):
    inputs = tf.keras.Input(shape=input_shape)
    x = inputs
    if input_shape[0] > 200:  
        # The following layer is the original AlexNet first conv layer. 
        # Removed since we are dealing with cifar100 which is 32X32X3.
        # Alexnet is designed for imagenet which is 224X224X3
        x = layers.Conv2D(96, 11, strides=(4,4), activation="relu")(x)
        x = layers.BatchNormalization()(x)
        x = layers.MaxPooling2D(pool_size=(3,3), strides=(2,2), padding='same')(x)
    x = layers.Conv2D(256, 5, activation="relu")(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D(pool_size=(3,3), strides=(2,2), padding='same')(x)
    x = layers.Conv2D(384, 3, activation="relu")(x)
    x = layers.Conv2D(384, 3, activation="relu")(x)
    x = layers.Conv2D(256, 3, activation="relu")(x)
    x = layers.MaxPooling2D(pool_size=(3,3), strides=(2,2))(x)
    x = layers.Flatten()(x)
    x = layers.Dense(4096, activation='relu')(x)
    x = layers.Dropout(0.5)(x)
    x = layers.Dense(4096, activation='relu')(x)
    x = layers.Dropout(0.5)(x)
    predictions = layers.Dense(num_of_classes, activation='softmax')(x)
    model = tf.keras.Model(inputs=inputs, outputs=predictions)
    return model

# model = create_model((224,224,3)) # for imagenet
model = create_model((32,32,3), 100) # for cifar100
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 32, 32, 3)]       0         
_________________________________________________________________
conv2d (Conv2D)              (None, 28, 28, 256)       19456     
_________________________________________________________________
batch_normalization (BatchNo (None, 28, 28, 256)       1024      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 256)       0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 12, 12, 384)       885120    
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 10, 10, 384)       1327488   
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 8, 8, 256)         884992

In [3]:
import tensorflow_addons as tfa

# step = tf.Variable(0, trainable=False)
# schedule = tf.optimizers.schedules.PiecewiseConstantDecay(
#     [10000, 15000], [1e-0, 1e-1, 1e-2])
# lr and wd can be a function or a tensor
# lr = 1e-1 * schedule(step)
# wd = lambda: 5e-4 * schedule(step)

lr=0.01
momentum=0.9
wd=5e-4

optimizer = tfa.optimizers.SGDW(learning_rate=lr, weight_decay=wd, momentum=momentum)
# optimizer = tf.optimizers.SGD(learning_rate=lr, momentum=momentum)


In [4]:
model.compile(
#     loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    loss = keras.losses.CategoricalCrossentropy(from_logits=False),
    optimizer = optimizer,
    metrics=["accuracy"],
)


In [5]:
from tensorflow.python.keras.utils.np_utils import to_categorical

(x_train, y_train), (x_test, y_test) = keras.datasets.cifar100.load_data()

y_train = to_categorical(y_train, num_classes=100)
y_test = to_categorical(y_test, num_classes=100)

x_train.shape, y_train.shape, x_test.shape, y_test.shape

((50000, 32, 32, 3), (50000, 100), (10000, 32, 32, 3), (10000, 100))

In [6]:
# datagen = keras.preprocessing.image.ImageDataGenerator(
#     featurewise_center=True,
#     featurewise_std_normalization=True,
#     rotation_range=20,
#     width_shift_range=0.4,
#     height_shift_range=0.4,
#     vertical_flip=True,
#     horizontal_flip=True,
#     validation_split=0.2)
# compute quantities required for featurewise normalization
# (std, mean, and principal components if ZCA whitening is applied)
# datagen.fit(x_train)


In [7]:
history = model.fit(x_train, y_train, batch_size=128, epochs=100, validation_split=0.3)

# fits the model on batches with real-time data augmentation:
# history = model.fit(datagen.flow(x_train, y_train, batch_size=128,
#          subset='training'),
#          validation_data=datagen.flow(x_train, y_train, batch_size=32, subset='validation'),
#          epochs=20)

test_scores = model.evaluate(x_test, y_test)
print("Test loss:", test_scores[0])
print("Test accuracy:", test_scores[1])# summarize history for accuracy

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100

KeyboardInterrupt: 

In [8]:
import matplotlib.pyplot as plt
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

NameError: name 'history' is not defined