<a href="https://colab.research.google.com/github/szeshiang/Advanced-Computer-Vision/blob/main/070624_AlexNet_VGG16.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**AlexNet Architecture**

In [1]:
# import the library
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D,AveragePooling2D,Flatten,Dense,MaxPool2D, BatchNormalization,Dropout
from tensorflow.keras.datasets import mnist
print('library imported')

library imported


In [2]:
# load and preprocess the mnist dataset
(X_train,y_train),(X_test,y_test)=mnist.load_data()

In [3]:
X_train.shape

(60000, 28, 28)

In [4]:
X_train=X_train.reshape(-1,28,28,1).astype('float32')/255.0
X_test=X_test.reshape(-1,28,28,1).astype('float32')/255.0

In [5]:
X_train.shape

(60000, 28, 28, 1)

In [6]:
y_train[0:5]

array([5, 0, 4, 1, 9], dtype=uint8)

In [7]:
from tensorflow.keras.utils import to_categorical
y_train=to_categorical(y_train,10)
y_test=to_categorical(y_test,10)

In [8]:
y_train[0:5]

array([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]], dtype=float32)

In [9]:
# Define the AlexNet-like model for MNIST
def alexnet_mnist():
    model=Sequential()


    # Conv1
    model.add(Conv2D(96, kernel_size=(3, 3), strides=(1, 1), activation='relu', input_shape=(28, 28, 1), padding='same'))
    model.add(BatchNormalization())
    model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))


    # Conv2
    model.add(Conv2D(256, kernel_size=(3, 3), strides=(1, 1), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))


    # Conv3
    model.add(Conv2D(384, kernel_size=(3, 3), strides=(1, 1), activation='relu', padding='same'))


    # Conv4
    model.add(Conv2D(384, kernel_size=(3, 3), strides=(1, 1), activation='relu', padding='same'))


    # Conv5
    model.add(Conv2D(256, kernel_size=(3, 3), strides=(1, 1), activation='relu', padding='same'))
    model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))


    # Flatten and Fully Connected Layers
    model.add(Flatten())
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))


    # Output Layer
    model.add(Dense(10, activation='softmax'))

    return model

In [10]:
# Call the function to create the model
model = alexnet_mnist()
model.summary() # Moved outside the function

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 28, 28, 96)        960       
                                                                 
 batch_normalization (Batch  (None, 28, 28, 96)        384       
 Normalization)                                                  
                                                                 
 max_pooling2d (MaxPooling2  (None, 14, 14, 96)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 14, 14, 256)       221440    
                                                                 
 batch_normalization_1 (Bat  (None, 14, 14, 256)       1024      
 chNormalization)                                                
                                                        

In [11]:
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
print('model is compiled')

model is compiled


In [12]:
def lr_schedule(epoch,lr):
  if epoch<5:
    return lr
  else:
    return lr*tf.math.exp(-0.1)

from tensorflow.keras.callbacks import LearningRateScheduler, ModelCheckpoint
lr_scheduler=LearningRateScheduler(lr_schedule)


In [13]:
# Data Augmentation
from tensorflow.keras.preprocessing.image import ImageDataGenerator
datagen=ImageDataGenerator(rotation_range=10,zoom_range=0.1,width_shift_range=0.1,height_shift_range=0.1)

In [14]:
# use modelcheckpoint
checkpoint_callback=ModelCheckpoint(filepath='alexnet_model.h5',
                                    monitor='val_accuracy',
                                    save_best_only=True,
                                    mode='max',
                                    save_freq='epoch')

In [15]:
# Train the model

history=model.fit(datagen.flow(X_train,y_train,batch_size=64),epochs=5,
                  validation_data=(X_test,y_test),
                  callbacks=[lr_scheduler,checkpoint_callback])

Epoch 1/5

  saving_api.save_model(


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [16]:
# evaluate the model
test_loss,test_acc=model.evaluate(X_test,y_test)
print('Test Accuracy',test_acc)
print('Test Loss',test_loss)

Test Accuracy 0.9843000173568726
Test Loss 0.06796978414058685


In [17]:
# load the model
import tensorflow as tf
best_model=tf.keras.models.load_model('/content/alexnet_model.h5')
test_loss,test_acc=best_model.evaluate(X_test,y_test)
print('Test Accuracy',test_acc)
print('Test Loss',test_loss)

Test Accuracy 0.9843000173568726
Test Loss 0.06796978414058685


**VGG16 Architecture**

In [18]:
# load dataset
from tensorflow.keras.datasets import cifar10
(X_train,y_train),(X_test,y_test)=cifar10.load_data()

In [19]:
X_train=X_train.astype('float32')/255.0
X_test=X_test.astype('float32')/255.0

In [20]:
y_train=to_categorical(y_train,10)
y_test=to_categorical(y_test,10)

In [21]:
from tensorflow.keras.layers import MaxPooling2D
# Define the VGG-16 model
def vgg16():
    model = Sequential()


    # Block 1
    model.add(Conv2D(64, (3, 3), activation='relu', padding='same', input_shape=(32, 32, 3)))
    model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))


    # Block 2
    model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
    model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))


    # Block 3
    model.add(Conv2D(256, (3, 3), activation='relu', padding='same'))
    model.add(Conv2D(256, (3, 3), activation='relu', padding='same'))
    model.add(Conv2D(256, (3, 3), activation='relu', padding='same'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))


    # Block 4
    model.add(Conv2D(512, (3, 3), activation='relu', padding='same'))
    model.add(Conv2D(512, (3, 3), activation='relu', padding='same'))
    model.add(Conv2D(512, (3, 3), activation='relu', padding='same'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))


    # Block 5
    model.add(Conv2D(512, (3, 3), activation='relu', padding='same'))
    model.add(Conv2D(512, (3, 3), activation='relu', padding='same'))
    model.add(Conv2D(512, (3, 3), activation='relu', padding='same'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))


    # Fully Connected Layers
    model.add(Flatten())
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))


    # Output Layer
    model.add(Dense(10, activation='softmax'))


    return model


In [22]:
model_vgg=vgg16()
model_vgg.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_5 (Conv2D)           (None, 32, 32, 64)        1792      
                                                                 
 conv2d_6 (Conv2D)           (None, 32, 32, 64)        36928     
                                                                 
 max_pooling2d_3 (MaxPoolin  (None, 16, 16, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_7 (Conv2D)           (None, 16, 16, 128)       73856     
                                                                 
 conv2d_8 (Conv2D)           (None, 16, 16, 128)       147584    
                                                                 
 max_pooling2d_4 (MaxPoolin  (None, 8, 8, 128)         0         
 g2D)                                                 

In [23]:
model_vgg.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
print('model is compiled')


history=model_vgg.fit(datagen.flow(X_train,y_train,batch_size=64),epochs=5,
                  validation_data=(X_test,y_test),
                  callbacks=[lr_scheduler,checkpoint_callback])

model is compiled
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


**Transfer Learning**

1. Load base model VGG16 for feature extraction (from "imagenet")
2. Classification layer will be my own layer

In [24]:
# base model (no training, just load)
base_model=tf.keras.applications.VGG16(include_top=False,weights="imagenet",input_tensor=None, input_shape=(32,32,3),pooling=None,classes=1000,classifier_activation="softmax",)

In [25]:
# freeze the base model
base_model.trainable=False

In [26]:
from tensorflow.keras.layers import GlobalAveragePooling2D, Dropout

In [27]:
# create Architecture
model_transfer_vgg=Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(512,activation='relu'),
    Dropout(0.5),
    Dense(10,activation='softmax')

])

model_transfer_vgg.summary()


Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 1, 1, 512)         14714688  
                                                                 
 global_average_pooling2d (  (None, 512)               0         
 GlobalAveragePooling2D)                                         
                                                                 
 dense_6 (Dense)             (None, 512)               262656    
                                                                 
 dropout_4 (Dropout)         (None, 512)               0         
                                                                 
 dense_7 (Dense)             (None, 10)                5130      
                                                                 
Total params: 14982474 (57.15 MB)
Trainable params: 267786 (1.02 MB)
Non-trainable params: 14714688 (56.13 MB)
_________

In [28]:
model_transfer_vgg.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
print('model is compiled')

model is compiled


In [29]:
history=model_transfer_vgg.fit(datagen.flow(X_train,y_train,batch_size=64),epochs=5,
                  validation_data=(X_test,y_test),
                  callbacks=[lr_scheduler,checkpoint_callback])

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


**GoogleNet/Inception V3**

In [30]:
import tensorflow as tf
from tensorflow.keras import layers, models, datasets,callbacks, preprocessing
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
# Load CIFAR-10 dataset
(train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data()

In [31]:
# Normalize the images
train_images = train_images.astype('float32') / 255.0
test_images = test_images.astype('float32') / 255.0

In [32]:
# Inception module
def inception_module(x, filters):
    f1, f3_r, f3, f5_r, f5, pool_proj = filters


    # 1x1 convolution branch
    conv_1x1 = layers.Conv2D(f1, (1, 1), padding='same', activation='relu')(x)


    # 1x1 convolution followed by 3x3 convolution branch
    conv_3x3 = layers.Conv2D(f3_r, (1, 1), padding='same', activation='relu')(x)
    conv_3x3 = layers.Conv2D(f3, (3, 3), padding='same', activation='relu')(conv_3x3)


    # 1x1 convolution followed by 5x5 convolution branch
    conv_5x5 = layers.Conv2D(f5_r, (1, 1), padding='same', activation='relu')(x)
    conv_5x5 = layers.Conv2D(f5, (5, 5), padding='same', activation='relu')(conv_5x5)


    # 3x3 max pooling followed by 1x1 convolution branch
    pool_proj_layer = layers.MaxPooling2D((3, 3), strides=(1, 1), padding='same')(x)
    pool_proj_out = layers.Conv2D(pool_proj, (1, 1), padding='same', activation='relu')(pool_proj_layer)  # Use pool_proj from filters list as number of filters


    # Concatenate the outputs of the four branches
    output = layers.concatenate([conv_1x1, conv_3x3, conv_5x5, pool_proj_out], axis=-1)
    return output

In [33]:
# Googlenet, the complete CNN architecture

def googlenet(input_shape=(32, 32, 3), num_classes=10):
    input_layer = layers.Input(shape=input_shape)


    # Initial layers (stem of the network)
    x = layers.Conv2D(64, (7, 7), strides=(2, 2), padding='same', activation='relu')(input_layer)
    x = layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)
    x = layers.Conv2D(64, (1, 1), padding='same', activation='relu')(x)
    x = layers.Conv2D(192, (3, 3), padding='same', activation='relu')(x)
    x = layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)


    # Inception modules
    x = inception_module(x, [64, 96, 128, 16, 32, 32])
    x = inception_module(x, [128, 128, 192, 32, 96, 64])
    x = layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)
    x = inception_module(x, [192, 96, 208, 16, 48, 64])
    x = inception_module(x, [160, 112, 224, 24, 64, 64])
    x = inception_module(x, [128, 128, 256, 24, 64, 64])
    x = inception_module(x, [112, 144, 288, 32, 64, 64])
    x = inception_module(x, [256, 160, 320, 32, 128, 128])
    x = layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)
    x = inception_module(x, [256, 160, 320, 32, 128, 128])
    x = inception_module(x, [384, 192, 384, 48, 128, 128])


    # Average pooling layer
    # Use a smaller pool size or add padding to handle smaller feature maps
    x = layers.AveragePooling2D((2, 2), strides=(1, 1), padding='same')(x)
    x = layers.Dropout(0.4)(x)
    x = layers.Flatten()(x)
    output_layer = layers.Dense(num_classes, activation='softmax')(x)


    model = models.Model(input_layer, output_layer)
    return model


In [34]:
model_gnet = googlenet()
model_gnet.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_2 (InputLayer)        [(None, 32, 32, 3)]          0         []                            
                                                                                                  
 conv2d_18 (Conv2D)          (None, 16, 16, 64)           9472      ['input_2[0][0]']             
                                                                                                  
 max_pooling2d_8 (MaxPoolin  (None, 8, 8, 64)             0         ['conv2d_18[0][0]']           
 g2D)                                                                                             
                                                                                                  
 conv2d_19 (Conv2D)          (None, 8, 8, 64)             4160      ['max_pooling2d_8[0][0]'] 

In [35]:
train_labels=to_categorical(train_labels,10)
test_labels=to_categorical(test_labels,10)

In [36]:
# Compile the model
model_gnet.compile(optimizer='adam',  # Choose an optimizer like Adam
              loss='categorical_crossentropy',  # Use categorical crossentropy for multi-class classification
              metrics=['accuracy'])

In [37]:
history=model_gnet.fit(train_images,train_labels,batch_size=64,epochs=5,
                  validation_data=(test_images,test_labels),
                  callbacks=[lr_scheduler,checkpoint_callback])

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


**Transfer Learning using InceptionV3**

In [38]:
# base model
import keras
base_incepv3 = keras.applications.InceptionV3(
    include_top=False,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
)

In [39]:
# freeze the base model
for layer in base_incepv3.layers:
    layer.trainable = False

In [40]:
from tensorflow.keras.layers import GlobalAveragePooling2D, Dropout

In [41]:
# create Architecture
model_transfer_incepv3=Sequential([
    base_incepv3,
    GlobalAveragePooling2D(),
    Dense(512,activation='relu'),
    Dropout(0.5),
    Dense(10,activation='softmax')

])

model_transfer_incepv3.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 inception_v3 (Functional)   (None, None, None, 2048   21802784  
                             )                                   
                                                                 
 global_average_pooling2d_1  (None, 2048)              0         
  (GlobalAveragePooling2D)                                       
                                                                 
 dense_9 (Dense)             (None, 512)               1049088   
                                                                 
 dropout_6 (Dropout)         (None, 512)               0         
                                                                 
 dense_10 (Dense)            (None, 10)                5130      
                                                                 
Total params: 22857002 (87.19 MB)
Trainable params: 10

In [42]:
model_transfer_incepv3.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
print('model is compiled')

model is compiled


In [52]:
# One-hot encode the labels
train_labels = tf.keras.utils.to_categorical(train_labels, num_classes=10)
test_labels = tf.keras.utils.to_categorical(test_labels, num_classes=10)

In [56]:
test_images.shape

(10000, 32, 32, 3)

In [53]:
train_images.shape

(50000, 32, 32, 3)

In [62]:
# Check the shape of your model's output
print(model_transfer_incepv3.output_shape)

# Check the number of classes in your one-hot encoded labels
print(test_labels.shape[1])

(None, 10)
10


In [64]:
history=model_transfer_incepv3.fit(datagen.flow(train_images,train_labels,batch_size=64,target_size=(299,299)),epochs=5,
                  validation_data=(test_images,test_labels),
                  callbacks=[lr_scheduler,checkpoint_callback])

TypeError: ImageDataGenerator.flow() got an unexpected keyword argument 'target_size'

**ResNet
Identity Block**

In [46]:
def identity_block(X, filters):
    F1, F2 = filters

    # Save the input value
    X_shortcut = X

    # First component of main path
    X = layers.Conv2D(F1, (3, 3), padding='same')(X)
    X = layers.BatchNormalization()(X)
    X = layers.Activation('relu')(X)

    # Second component of main path
    X = layers.Conv2D(F2, (3, 3), padding='same')(X)
    X = layers.BatchNormalization()(X)

    # Add shortcut value to main path
    X = layers.Add()([X, X_shortcut])
    X = layers.Activation('relu')(X)

    return X


In [47]:
def convolutional_block(X, filters, strides=(2, 2)):
    F1, F2 = filters

    # Save the input value
    X_shortcut = X


    # First component of main path
    X = layers.Conv2D(F1, (3, 3), strides=strides, padding='same')(X)
    X = layers.BatchNormalization()(X)
    X = layers.Activation('relu')(X)

    # Second component of main path
    X = layers.Conv2D(F2, (3, 3), padding='same')(X)
    X = layers.BatchNormalization()(X)

    # Shortcut path
    X_shortcut = layers.Conv2D(F2, (1, 1), strides=strides, padding='same')(X_shortcut)
    X_shortcut = layers.BatchNormalization()(X_shortcut)

    # Add shortcut value to main path
    X = layers.Add()([X, X_shortcut])
    X = layers.Activation('relu')(X)

    return X


In [48]:
def ResNet(input_shape=(32, 32, 3), classes=10):
    # Define the input as a tensor with shape input_shape
    X_input = layers.Input(input_shape)


    # Zero-Padding
    X = layers.ZeroPadding2D((3, 3))(X_input)

    # Stage 1
    X = layers.Conv2D(64, (3, 3), strides=(1, 1), padding='same')(X)
    X = layers.BatchNormalization()(X)
    X = layers.Activation('relu')(X)
    X = layers.MaxPooling2D((3, 3), strides=(1, 1), padding='same')(X)


    # Stage 2
    X = convolutional_block(X, filters=[64, 64], strides=(1, 1))
    X = identity_block(X, filters=[64, 64])


    # Stage 3
    X = convolutional_block(X, filters=[128, 128], strides=(2, 2))
    X = identity_block(X, filters=[128, 128])


    # Stage 4
    X = convolutional_block(X, filters=[256, 256], strides=(2, 2))
    X = identity_block(X, filters=[256, 256])


    # Stage 5
    X = convolutional_block(X, filters=[512, 512], strides=(2, 2))
    X = identity_block(X, filters=[512, 512])


    # Average Pooling
    X = layers.AveragePooling2D(pool_size=(2, 2))(X)

    # Output layer
    X = layers.Flatten()(X)
    X = layers.Dense(classes, activation='softmax')(X)

    # Create model
    model = models.Model(inputs=X_input, outputs=X, name='ResNet')


    return model


In [49]:
# Create the model
model = ResNet(input_shape=(32, 32, 3), classes=10)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()


Model: "ResNet"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_4 (InputLayer)        [(None, 32, 32, 3)]          0         []                            
                                                                                                  
 zero_padding2d (ZeroPaddin  (None, 38, 38, 3)            0         ['input_4[0][0]']             
 g2D)                                                                                             
                                                                                                  
 conv2d_169 (Conv2D)         (None, 38, 38, 64)           1792      ['zero_padding2d[0][0]']      
                                                                                                  
 batch_normalization_96 (Ba  (None, 38, 38, 64)           256       ['conv2d_169[0][0]']     