In [10]:
# Download data
!wget http://ufldl.stanford.edu/housenumbers/train_32x32.mat
!wget http://ufldl.stanford.edu/housenumbers/test_32x32.mat

--2024-03-27 14:49:10--  http://ufldl.stanford.edu/housenumbers/train_32x32.mat
Resolving ufldl.stanford.edu (ufldl.stanford.edu)... 171.64.68.10
Connecting to ufldl.stanford.edu (ufldl.stanford.edu)|171.64.68.10|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 182040794 (174M) [text/plain]
Saving to: ‘train_32x32.mat.1’


2024-03-27 14:49:21 (16.4 MB/s) - ‘train_32x32.mat.1’ saved [182040794/182040794]

--2024-03-27 14:49:21--  http://ufldl.stanford.edu/housenumbers/test_32x32.mat
Resolving ufldl.stanford.edu (ufldl.stanford.edu)... 171.64.68.10
Connecting to ufldl.stanford.edu (ufldl.stanford.edu)|171.64.68.10|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 64275384 (61M) [text/plain]
Saving to: ‘test_32x32.mat.1’


2024-03-27 14:49:24 (18.9 MB/s) - ‘test_32x32.mat.1’ saved [64275384/64275384]



In [22]:
from scipy.io import loadmat
import numpy as np
import keras
from keras.applications.resnet50 import ResNet50
from keras.applications.resnet50 import preprocess_input, decode_predictions
from keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from PIL import Image
from keras.applications.resnet50 import ResNet50, preprocess_input, decode_predictions
from keras.preprocessing.image import img_to_array, load_img

In [12]:
train = loadmat('train_32x32.mat')
test = loadmat('test_32x32.mat')
X_train, y_train = train['X'], train['y']
X_test, y_test = test['X'], test['y']

In [13]:
X_train_subset=X_train[:,:,:,:22500]
y_train_subset=y_train[:22500]
X_test_subset=X_train[:,:,:,22500:25000]
y_test_subset=y_train[22500:25000]

In [14]:
y_train_subset=y_train_subset.reshape(22500,)
y_test_subset=y_test_subset.reshape(2500,)

In [16]:
# Check shapes of training and testing sets
print('X_train has a shape of {}, y_train has a shape of {}'.format(X_train_subset.shape,y_train_subset.shape))
print('X_test has a shape of {}, y_test has a shape of {}'.format(X_test_subset.shape,y_test_subset.shape))

X_train has a shape of (32, 32, 3, 22500), y_train has a shape of (22500,)
X_test has a shape of (32, 32, 3, 2500), y_test has a shape of (2500,)


In [15]:
# We noticed that 0 is labelled as 10 in the dataset, so we change it back
y_train_subset = np.asarray([0 if i==10 else i for i in y_train_subset])
y_test_subset = np.asarray([0 if i==10 else i for i in y_test_subset])
np.unique(y_train_subset)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [16]:
# Finally, we normalize pixels in X_train and X_test to the range [0,1] for faster convergence
X_train_subset = X_train_subset.astype('float32')
X_train_subset /= 255

X_test_subset = X_test_subset.astype('float32')
X_test_subset /= 255

In [19]:
model=Sequential()
train_ds=keras.applications.ResNet50(
    input_shape=(32,32,3),
    include_top=False,
    weights="imagenet",
    pooling='avg',
    classes=10,
)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [21]:
for layer in train_ds.layers:
  layer.trainable=False

In [22]:
model.add(train_ds)
model.add(Dense(10, activation='softmax'))

In [23]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resnet50 (Functional)       (None, 2048)              23587712  
                                                                 
 dense (Dense)               (None, 10)                20490     
                                                                 
Total params: 23608202 (90.06 MB)
Trainable params: 20490 (80.04 KB)
Non-trainable params: 23587712 (89.98 MB)
_________________________________________________________________


In [24]:
model.compile(loss="sparse_categorical_crossentropy", optimizer='adam', metrics=["accuracy"])

In [19]:
X_train_subset = X_train_subset.transpose((3, 0, 1, 2))
X_test_subset = X_test_subset.transpose((3, 0, 1, 2))

In [26]:
print("X_train_subset shape:", X_train_subset.shape)
print("X_test_subset shape:", X_test_subset.shape)

X_train_subset shape: (22500, 32, 32, 3)
X_test_subset shape: (2500, 32, 32, 3)


In [27]:
history = model.fit(X_train_subset, y_train_subset, batch_size = 64, epochs=1, validation_data=(X_test_subset, y_test_subset))



In [28]:
test_loss, test_acc = model.evaluate(X_test_subset, y_test_subset)
print('Test accuracy:', test_acc)

Test accuracy: 0.20399999618530273


In [31]:
model2=Sequential()
train_ds_2=keras.applications.VGG16(
    input_shape=(32,32,3),
    include_top=False,
    weights="imagenet",
    pooling='avg',
    classes=10,
)

for layer in train_ds_2.layers:
  layer.trainable=False

model2.add(train_ds_2)
model2.add(Dense(10, activation='softmax'))

model2.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 512)               14714688  
                                                                 
 dense_2 (Dense)             (None, 10)                5130      
                                                                 
Total params: 14719818 (56.15 MB)
Trainable params: 5130 (20.04 KB)
Non-trainable params: 14714688 (56.13 MB)
_________________________________________________________________


In [32]:
model2.compile(loss="sparse_categorical_crossentropy", optimizer='adam', metrics=["accuracy"])
history = model2.fit(X_train_subset, y_train_subset, batch_size = 64, epochs=1, validation_data=(X_test_subset, y_test_subset))
test_loss, test_acc = model2.evaluate(X_test_subset, y_test_subset)
print('Test accuracy:', test_acc)

Test accuracy: 0.4020000100135803


In [33]:
model3=Sequential()
train_ds_3=keras.applications.ResNet101(
    input_shape=(32,32,3),
    include_top=False,
    weights="imagenet",
    pooling='avg',
    classes=10,
)

for layer in train_ds_3.layers:
  layer.trainable=False

model3.add(train_ds_3)
model3.add(Dense(10, activation='softmax'))

model3.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet101_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resnet101 (Functional)      (None, 2048)              42658176  
                                                                 
 dense_3 (Dense)             (None, 10)                20490     
                                                                 
Total params: 42678666 (162.81 MB)
Trainable params: 20490 (80.04 KB)
Non-trainable params: 42658176 (162.73 MB)
_________________________________________________________________


In [34]:
model3.compile(loss="sparse_categorical_crossentropy", optimizer='adam', metrics=["accuracy"])
history = model3.fit(X_train_subset, y_train_subset, batch_size = 64, epochs=1, validation_data=(X_test_subset, y_test_subset))
test_loss, test_acc = model3.evaluate(X_test_subset, y_test_subset)
print('Test accuracy:', test_acc)

Test accuracy: 0.1687999963760376


In [17]:
# Lenet
model4 = Sequential()

model4.add(Conv2D(6, (5, 5), activation='relu', input_shape=(32, 32, 3)))
model4.add(MaxPooling2D((2, 2)))
model4.add(Conv2D(16, (5, 5), activation='relu'))
model4.add(MaxPooling2D((2, 2)))
model4.add(Flatten())
model4.add(Dense(120, activation='relu'))
model4.add(Dense(84, activation='relu'))

model4.add(Dense(10, activation='softmax'))

model4.summary()


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_2 (Conv2D)           (None, 28, 28, 6)         456       
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 14, 14, 6)         0         
 g2D)                                                            
                                                                 
 conv2d_3 (Conv2D)           (None, 10, 10, 16)        2416      
                                                                 
 max_pooling2d_3 (MaxPoolin  (None, 5, 5, 16)          0         
 g2D)                                                            
                                                                 
 flatten_1 (Flatten)         (None, 400)               0         
                                                                 
 dense_3 (Dense)             (None, 120)              

In [20]:
model4.compile(loss="sparse_categorical_crossentropy", optimizer='adam', metrics=["accuracy"])
history = model4.fit(X_train_subset, y_train_subset, batch_size = 64, epochs=1, validation_data=(X_test_subset, y_test_subset))
test_loss, test_acc = model4.evaluate(X_test_subset, y_test_subset)
print('Test accuracy:', test_acc)

Test accuracy: 0.7603999972343445


In [23]:
# Alexnet
model5 = Sequential()

model5.add(Conv2D(filters=96, kernel_size=(11, 11), strides=(4, 4), activation='relu', input_shape=(224, 224, 3)))
model5.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
model5.add(Conv2D(filters=256, kernel_size=(5, 5), padding='same', activation='relu'))
model5.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))


model5.add(Conv2D(filters=384, kernel_size=(3, 3), padding='same', activation='relu'))
model5.add(Conv2D(filters=384, kernel_size=(3, 3), padding='same', activation='relu'))
model5.add(Conv2D(filters=256, kernel_size=(3, 3), padding='same', activation='relu'))
model5.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))


model5.add(Flatten())
model5.add(Dense(4096, activation='relu'))
model5.add(Dropout(0.5))
model5.add(Dense(4096, activation='relu'))
model5.add(Dropout(0.5))
model5.add(Dense(10, activation='softmax'))

model5.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_9 (Conv2D)           (None, 54, 54, 96)        34944     
                                                                 
 max_pooling2d_7 (MaxPoolin  (None, 26, 26, 96)        0         
 g2D)                                                            
                                                                 
 conv2d_10 (Conv2D)          (None, 26, 26, 256)       614656    
                                                                 
 max_pooling2d_8 (MaxPoolin  (None, 12, 12, 256)       0         
 g2D)                                                            
                                                                 
 conv2d_11 (Conv2D)          (None, 12, 12, 384)       885120    
                                                                 
 conv2d_12 (Conv2D)          (None, 12, 12, 384)      

In [None]:
import tensorflow as tf
X_train_subset = tf.image.resize(X_train_subset, (224, 224))
X_test_subset = tf.image.resize(X_test_subset, (224, 224))

In [1]:
model5.compile(loss="sparse_categorical_crossentropy", optimizer='adam', metrics=["accuracy"])
history = model5.fit(X_train_subset, y_train_subset, batch_size = 64, epochs=1, validation_data=(X_test_subset, y_test_subset))
test_loss, test_acc = model5.evaluate(X_test_subset, y_test_subset)
print('Test accuracy:', test_acc)

Thus, from the above results, it is evident that the best accuracy for the SVHN dataset was obtained by using the Lenet model.