In this project, we will classify brands of cars using deep learning techniques, specifically the ResNet50 from tensorflow.  
We will reuse the algorithm weights of ResNet50 to create our own model, a kind of transfer learning.
We have 3 outputs: Audi, Lamborghini, and Mercedes.
We will cut the last layer and 3 nodes at the end of that particular layer. 

In [4]:
#import necessary libraries and packages
from tensorflow.keras.layers import Input, Dense, Flatten, Lambda
from tensorflow.keras.models import Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img
import numpy as np
from glob import glob
import matplotlib.pyplot as plt

In [6]:
import zipfile as zf
files = zf.ZipFile("Datasets.zip", 'r')
files.extractall('/Users/palakprashant/Car_Brand_Classification_Deep_Learning')
files.close()

In [7]:
#re-size all the images to this
image_size = [224, 224]
train_path = '/Users/palakprashant/Car_Brand_Classification_Deep_Learning/Train'
test_path = '/Users/palakprashant/Car_Brand_Classification_Deep_Learning/Test'

#adding [3] to input shape because we are adding  RGB channel
#Add preprocessing layer to the front of Resnet 50 
resnet = ResNet50(input_shape = image_size + [3], weights = 'imagenet', include_top = False)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 0us/step


In [9]:
#Let's view the resnet summary:
resnet.summary()

In [10]:
#Reuse existing layers, we will only retrain the final layer
for layer in resnet.layers:
    layer.trainable = False

In [11]:
#get number of output classes
folders = glob('Datasets/Train/*')
folders

['Datasets/Train/lamborghini',
 'Datasets/Train/mercedes',
 'Datasets/Train/audi']

In [13]:
#flatten resnet output after downloading it
#Flattening is required before connecting to a fully connected neural network - so we make it a single number of features
x = Flatten()(resnet.output)

In [14]:
#use the Dense model to use the length of folders as our output
folder_length = len(folders)
#softmax is like a sigmoid function, which would be able to classify multiple categories.
prediction = Dense(len(folders), activation = 'softmax')(x)
#create model object
model = Model(inputs = resnet.input, outputs = prediction)

In [15]:
#View model structure
model.summary()

In [16]:
#the last layer now has 3 nodes as our output

#Now, we will tell the model what cost and optimization method to use
model.compile(
    loss = 'categorical_crossentropy',
    optimizer = 'adam',
    metrics = ['accuracy']
)

In [19]:
#Use the image data generator to import images from the dataset
from tensorflow.keras.preprocessing.image import ImageDataGenerator

#We will now do data augmentation to create variations of existing images
train_datagenerator = ImageDataGenerator(rescale = 1./255,
                                        shear_range = 0.2,
                                        zoom_range = 0.2,
                                        horizontal_flip = True)
test_datagenerator = ImageDataGenerator(rescale = 1./255)

In [20]:
#Make sure you provide the same target size as initiated for the image size
#Read all images from training data  
training_data = train_datagenerator.flow_from_directory('Datasets/Train',
                                                       target_size = (224, 224),
                                                       batch_size = 32,
                                                       class_mode = 'categorical')


Found 64 images belonging to 3 classes.


We should never perform data augmentation on test data! 

In [21]:
testing_data = test_datagenerator.flow_from_directory('Datasets/Test',
                                                       target_size = (224, 224),
                                                       batch_size = 32,
                                                       class_mode = 'categorical')

Found 58 images belonging to 3 classes.


In [39]:
#Fit the model 
resnet = model.fit(
    training_data,
    validation_data = testing_data,
    epochs = 50,
    steps_per_epoch = len(training_data),
    validation_steps = len(testing_data)
)

Epoch 1/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2s/step - accuracy: 1.0000 - loss: 0.1206 - val_accuracy: 0.7241 - val_loss: 0.9269
Epoch 2/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 3/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2s/step - accuracy: 0.9792 - loss: 0.1226 - val_accuracy: 0.7931 - val_loss: 0.8805
Epoch 4/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 5/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2s/step - accuracy: 0.9792 - loss: 0.1246 - val_accuracy: 0.7414 - val_loss: 0.9023
Epoch 6/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 7/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2s/step - accuracy: 1.0000 - loss: 0.1376 - val_accuracy: 0.7241 - val_loss: 0.9678


2025-02-13 12:28:44.526405: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2s/step - accuracy: 1.0000 - loss: 0.1099 - val_accuracy: 0.7586 - val_loss: 0.8935
Epoch 10/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 11/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2s/step - accuracy: 1.0000 - loss: 0.1285 - val_accuracy: 0.7931 - val_loss: 0.8771
Epoch 12/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 13/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2s/step - accuracy: 0.9896 - loss: 0.1208 - val_accuracy: 0.7069 - val_loss: 0.9818
Epoch 14/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 15/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2s/step - accuracy: 0.9792 - loss: 0.1190 - val_accuracy: 0.7414 - val_loss: 0.8886
Epoc

Accuracy is around 93% after model fitting (first round).
This model will also not lead to overfitting. If the accuracy is not high enough, add more images to the dataset using data augmentation.

In [43]:
#save model
from tensorflow.keras.models import load_model
model.save('model_resnet50.h5')



In [44]:
#get predictions
y_pred = model.predict(testing_data)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1s/step


In [46]:
y_pred
#0th index belongs to audi class, 1st belongs to lamborghini class, 2nd belongs to mercedes class.

array([[2.54192054e-02, 8.89311492e-01, 8.52693021e-02],
       [3.36062647e-02, 6.36570603e-02, 9.02736604e-01],
       [2.58009344e-01, 5.15953720e-01, 2.26036951e-01],
       [3.65952752e-03, 9.96232867e-01, 1.07766224e-04],
       [7.34971252e-07, 5.21239068e-04, 9.99478042e-01],
       [2.47645319e-01, 7.33660519e-01, 1.86942425e-02],
       [1.07061099e-02, 9.09337425e-04, 9.88384485e-01],
       [8.22716579e-03, 9.69504714e-01, 2.22681798e-02],
       [2.35014521e-02, 7.12485671e-01, 2.64012814e-01],
       [3.57970119e-01, 3.63214128e-02, 6.05708420e-01],
       [5.88955171e-02, 4.27505141e-03, 9.36829448e-01],
       [1.79456286e-02, 2.15814188e-01, 7.66240239e-01],
       [7.13922307e-02, 5.92082560e-01, 3.36525172e-01],
       [7.17023671e-01, 1.34354249e-01, 1.48622036e-01],
       [9.62459505e-01, 2.55829450e-02, 1.19576287e-02],
       [2.25500436e-03, 4.63875234e-01, 5.33869743e-01],
       [6.01441343e-06, 2.20897811e-04, 9.99773085e-01],
       [1.49839465e-02, 4.21719

In [47]:
import numpy as np
y_pred = np.argmax(y_pred, axis = 1)

In [49]:
y_pred #take indices with highest probabilities

array([1, 2, 1, 1, 2, 1, 2, 1, 1, 2, 2, 2, 1, 0, 0, 2, 2, 2, 2, 1, 1, 0,
       1, 1, 1, 1, 0, 1, 1, 2, 1, 2, 1, 0, 1, 0, 1, 1, 1, 1, 2, 1, 1, 0,
       1, 1, 2, 1, 2, 1, 2, 1, 0, 2, 1, 2, 2, 1])

In [50]:
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
model = load_model('model_resnet50.h5')



In [52]:
#read 1 image in the test data
img = image.load_img('Datasets/Test/lamborghini/10.jpg', target_size = (224, 224))

In [53]:
image

<module 'tensorflow.keras.preprocessing.image' from '/opt/anaconda3/lib/python3.12/site-packages/keras/_tf_keras/keras/preprocessing/image/__init__.py'>

In [54]:
#convert image into image array. This step is very important.
x = image.img_to_array(img)
x

array([[[ 17.,   7.,   0.],
        [ 17.,   7.,   0.],
        [ 17.,   7.,   0.],
        ...,
        [ 13.,   2.,   0.],
        [ 14.,   4.,   3.],
        [ 14.,   4.,   3.]],

       [[ 17.,   7.,   0.],
        [ 17.,   7.,   0.],
        [ 18.,   8.,   0.],
        ...,
        [ 14.,   3.,   1.],
        [ 14.,   4.,   3.],
        [ 14.,   4.,   3.]],

       [[ 18.,   8.,   0.],
        [ 18.,   8.,   0.],
        [ 19.,   9.,   0.],
        ...,
        [ 14.,   3.,   1.],
        [ 14.,   4.,   3.],
        [ 14.,   4.,   3.]],

       ...,

       [[209., 129.,  92.],
        [210., 130.,  93.],
        [212., 132.,  95.],
        ...,
        [216., 132.,  96.],
        [214., 129.,  92.],
        [213., 129.,  92.]],

       [[208., 128.,  91.],
        [208., 128.,  91.],
        [211., 131.,  94.],
        ...,
        [215., 131.,  95.],
        [209., 129.,  94.],
        [207., 129.,  93.]],

       [[206., 128.,  90.],
        [206., 128.,  90.],
        [209., 1

In [55]:
x.shape

(224, 224, 3)

In [56]:
x = x/255 #this step is required because in the test data, we did the rescaling by dividing by 255. so, the new data must also be divided by 255.

In [58]:
x = np.expand_dims(x, axis = 0)
image_data = preprocess_input(x)
image_data.shape

(1, 224, 224, 3)

In [60]:
model.predict(image_data)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 431ms/step


array([[0.07764878, 0.2537732 , 0.66857797]], dtype=float32)

In [62]:
a = np.argmax(model.predict(image_data), axis = 1)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step


In [64]:
a==2

array([ True])