In [1]:
#importing required packages
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from keras.preprocessing import image
from sklearn.model_selection import train_test_split
from keras.applications.resnet50 import ResNet50
from keras.models import Model
from keras.layers import *
from keras.optimizers import Adam

In [109]:
#loading the training data in the form of Pandas DataFrame
df = pd.read_csv('dataset/train.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1399 entries, 0 to 1398
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Image_path         1399 non-null   object 
 1   Insurance_company  1399 non-null   object 
 2   Cost_of_vehicle    1310 non-null   float64
 3   Min_coverage       1310 non-null   float64
 4   Expiry_date        1399 non-null   object 
 5   Max_coverage       1310 non-null   float64
 6   Condition          1399 non-null   int64  
 7   Amount             1388 non-null   float64
dtypes: float64(4), int64(1), object(3)
memory usage: 87.6+ KB


In [110]:
df.head() 

Unnamed: 0,Image_path,Insurance_company,Cost_of_vehicle,Min_coverage,Expiry_date,Max_coverage,Condition,Amount
0,img_4513976.jpg,BQ,41500.0,1037.5,2026-12-03,36142.68,0,0.0
1,img_7764995.jpg,BQ,50700.0,1267.5,2025-07-10,12753.0,1,6194.0
2,img_451308.jpg,A,49500.0,1237.5,2022-08-11,43102.68,0,0.0
3,img_7768372.jpg,A,33500.0,837.5,2022-08-02,8453.0,1,7699.0
4,img_7765274.jpg,AC,27600.0,690.0,2026-05-01,6978.0,1,8849.0


In [111]:
#taking required fetures for image classification
X = pd.concat((df['Image_path'], df['Condition']), axis= 1, keys = ('Image_path', 'Condition'))

In [112]:
X.head()

Unnamed: 0,Image_path,Condition
0,img_4513976.jpg,0
1,img_7764995.jpg,1
2,img_451308.jpg,0
3,img_7768372.jpg,1
4,img_7765274.jpg,1


In [165]:
#list of loaded images
img_list = []

In [166]:
#loading images and appending in the img_list
#size of the loaded images are (224,224,3) which is the specified size to passed to resnet
base_path = 'dataset/trainImages/'
for i in range(X.shape[0]):
    img = image.load_img( (base_path+X.Image_path[i]), target_size = (224,224))
    img = image.img_to_array(img)
    img_list.append(img)

In [167]:
#converting list to array
img_array = np.array(img_list)

In [168]:
img_array.shape

(1399, 224, 224, 3)

In [117]:
#target value
y = np.array(X.Condition)
y.shape

(1399,)

In [118]:
#no. of datapoints for each conditon(damaged and non damaged)
print(np.sum(y==0))
print(np.sum(y==1))

99
1300


The data is very much imbalaced wrt **Condition**

In [119]:
#preparing data for trainng model
x_train, x_val, y_train, y_val = train_test_split(img_array, y, test_size = 0.2, stratify=y)

In [120]:
print(x_train.shape, y_train.shape)
print(x_val.shape, y_val.shape)

(1119, 224, 224, 3) (1119,)
(280, 224, 224, 3) (280,)


We will use the pretrained resnet50 model as a feature extractor and train our own classifier on top of it. We will fine tune some of the top layers of feature extracter so that it is able to extract features of damaged and non damaged cars.

In [121]:
#loading the pretrained resnet50 model with imagenet weights
model = ResNet50(include_top= False, weights = 'imagenet', input_shape = (224,224,3))
model.summary()

Model: "resnet50"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 230, 230, 3)  0           input_4[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 112, 112, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, 112, 112, 64) 256         conv1_conv[0][0]                 
___________________________________________________________________________________________

In [122]:
#adding our own classifier on top of feature extractor
avg = GlobalAveragePooling2D()(model.output)
fc1 = Dense(256, activation='relu')(avg)
d1 = Dropout(0.5)(fc1)
fc2 = Dense(1, activation = 'sigmoid')(d1)

In [123]:
#initalising our final model
model_new = Model(inputs=model.input,outputs=fc2)
model_new.summary()

Model: "model_4"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 230, 230, 3)  0           input_4[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 112, 112, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, 112, 112, 64) 256         conv1_conv[0][0]                 
____________________________________________________________________________________________

conv3_block1_3_bn (BatchNormali (None, 28, 28, 512)  2048        conv3_block1_3_conv[0][0]        
__________________________________________________________________________________________________
conv3_block1_add (Add)          (None, 28, 28, 512)  0           conv3_block1_0_bn[0][0]          
                                                                 conv3_block1_3_bn[0][0]          
__________________________________________________________________________________________________
conv3_block1_out (Activation)   (None, 28, 28, 512)  0           conv3_block1_add[0][0]           
__________________________________________________________________________________________________
conv3_block2_1_conv (Conv2D)    (None, 28, 28, 128)  65664       conv3_block1_out[0][0]           
__________________________________________________________________________________________________
conv3_block2_1_bn (BatchNormali (None, 28, 28, 128)  512         conv3_block2_1_conv[0][0]        
__________

In [124]:
for ix in range(len(model_new.layers)):
    print(ix, model_new.layers[ix])

0 <tensorflow.python.keras.engine.input_layer.InputLayer object at 0x000002A8127C6820>
1 <tensorflow.python.keras.layers.convolutional.ZeroPadding2D object at 0x000002A8127A3A90>
2 <tensorflow.python.keras.layers.convolutional.Conv2D object at 0x000002A83EFE3700>
3 <tensorflow.python.keras.layers.normalization_v2.BatchNormalization object at 0x000002A83F7FCD30>
4 <tensorflow.python.keras.layers.core.Activation object at 0x000002A83F7E5400>
5 <tensorflow.python.keras.layers.convolutional.ZeroPadding2D object at 0x000002A83F81F7F0>
6 <tensorflow.python.keras.layers.pooling.MaxPooling2D object at 0x000002A8127E0F70>
7 <tensorflow.python.keras.layers.convolutional.Conv2D object at 0x000002A8127DCE20>
8 <tensorflow.python.keras.layers.normalization_v2.BatchNormalization object at 0x000002A8127F3070>
9 <tensorflow.python.keras.layers.core.Activation object at 0x000002A8127F3E50>
10 <tensorflow.python.keras.layers.convolutional.Conv2D object at 0x000002A8127D50D0>
11 <tensorflow.python.keras.

123 <tensorflow.python.keras.layers.convolutional.Conv2D object at 0x000002A812A393D0>
124 <tensorflow.python.keras.layers.normalization_v2.BatchNormalization object at 0x000002A812A39460>
125 <tensorflow.python.keras.layers.core.Activation object at 0x000002A812A65760>
126 <tensorflow.python.keras.layers.convolutional.Conv2D object at 0x000002A812A5A250>
127 <tensorflow.python.keras.layers.normalization_v2.BatchNormalization object at 0x000002A812A56C70>
128 <tensorflow.python.keras.layers.core.Activation object at 0x000002A812A4E0D0>
129 <tensorflow.python.keras.layers.convolutional.Conv2D object at 0x000002A812A65D00>
130 <tensorflow.python.keras.layers.normalization_v2.BatchNormalization object at 0x000002A812A56490>
131 <tensorflow.python.keras.layers.merge.Add object at 0x000002A812A14E20>
132 <tensorflow.python.keras.layers.core.Activation object at 0x000002A812A41E80>
133 <tensorflow.python.keras.layers.convolutional.Conv2D object at 0x000002A8129A4310>
134 <tensorflow.python.k

I have tried fine tuning first and second conv layer (from last of feture extractor) but fine tuning only the first conv layer gives good result.

In [125]:
#setting the parmaters of last conv layer of feature extracter as trianable(fine tuning) 
for ix in range(171):
    model_new.layers[ix].trainable = False

In [126]:
model_new.summary()

Model: "model_4"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 230, 230, 3)  0           input_4[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 112, 112, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, 112, 112, 64) 256         conv1_conv[0][0]                 
____________________________________________________________________________________________

__________________________________________________________________________________________________
conv3_block1_add (Add)          (None, 28, 28, 512)  0           conv3_block1_0_bn[0][0]          
                                                                 conv3_block1_3_bn[0][0]          
__________________________________________________________________________________________________
conv3_block1_out (Activation)   (None, 28, 28, 512)  0           conv3_block1_add[0][0]           
__________________________________________________________________________________________________
conv3_block2_1_conv (Conv2D)    (None, 28, 28, 128)  65664       conv3_block1_out[0][0]           
__________________________________________________________________________________________________
conv3_block2_1_bn (BatchNormali (None, 28, 28, 128)  512         conv3_block2_1_conv[0][0]        
__________________________________________________________________________________________________
conv3_bloc

Trainable params: 1,579,521
Non-trainable params: 22,532,992
__________________________________________________________________________________________________


In [127]:
adam = Adam(lr = 0.00003)
model_new.compile(optimizer = adam, metrics=['accuracy'], loss = 'binary_crossentropy')

In [128]:
#training our model
hist = model_new.fit(x_train, y_train, batch_size = 32, epochs = 15, validation_data = (x_val, y_val))

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [169]:
#getting predicted probablities from out model
pred = model_new.predict(img_array)

In [170]:
pred = pred.flatten()
pred.shape

(1399,)

In [171]:
#converting probablities to class labels
classes = np.where(pred > 0.5, 1,0)
classes.shape

(1399,)

In [172]:
np.mean(classes == y)

0.9807005003573981

In [173]:
from sklearn.metrics import classification_report, f1_score

In [174]:
print(classification_report(y, classes))

              precision    recall  f1-score   support

           0       1.00      0.73      0.84        99
           1       0.98      1.00      0.99      1300

    accuracy                           0.98      1399
   macro avg       0.99      0.86      0.92      1399
weighted avg       0.98      0.98      0.98      1399



In [175]:
f1_score(y, classes)

0.9897221164826799

In [176]:
pred = model_new.predict(x_val)

In [177]:
pred = pred.flatten()
pred.shape

(280,)

In [178]:
classes = np.where(pred > 0.5, 1,0)
classes.shape

(280,)

In [179]:
np.mean(classes == y_val)

0.95

In [180]:
print(classification_report(y_val, classes))

              precision    recall  f1-score   support

           0       1.00      0.30      0.46        20
           1       0.95      1.00      0.97       260

    accuracy                           0.95       280
   macro avg       0.97      0.65      0.72       280
weighted avg       0.95      0.95      0.94       280



In [181]:
f1_score(y_val, classes)

0.9737827715355805

Problem is all damaged are predicted as damaged but some not damaged are predicted as damaged. This problem persists due to imbalanced dataset.

In [140]:
df_test = pd.read_csv('dataset/test.csv')
df_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 600 entries, 0 to 599
Data columns (total 6 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Image_path         600 non-null    object 
 1   Insurance_company  600 non-null    object 
 2   Cost_of_vehicle    600 non-null    int64  
 3   Min_coverage       600 non-null    float64
 4   Expiry_date        600 non-null    object 
 5   Max_coverage       600 non-null    float64
dtypes: float64(2), int64(1), object(3)
memory usage: 28.2+ KB


In [141]:
img_list = []

In [143]:
base_path = 'dataset/testImages/'
for i in range(df_test.shape[0]):
    img = image.load_img( (base_path+ df_test.Image_path[i]), target_size = (224,224))
    img = image.img_to_array(img)
    img_list.append(img)

In [144]:
img_array = np.array(img_list)

In [145]:
img_array.shape

(600, 224, 224, 3)

In [150]:
pred = model_new.predict(img_array)

In [151]:
pred.shape

(600, 1)

In [152]:
pred = pred.flatten()
pred.shape

(600,)

In [153]:
classes = np.where(pred > 0.5, 1,0)
classes.shape

(600,)

In [155]:
classes

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

In [154]:
df_test['Condition'] = classes
df_test.head()

Unnamed: 0,Image_path,Insurance_company,Cost_of_vehicle,Min_coverage,Expiry_date,Max_coverage,Condition
0,img_4538519.jpg,B,23600,590.0,2025-04-12,5978.0,1
1,img_7766002.jpg,C,28300,707.5,2028-08-24,7153.0,1
2,img_4637390.jpg,AC,43700,1092.5,2023-11-28,11003.0,1
3,img_4516108.jpg,BB,46100,1152.5,2028-02-04,11603.0,1
4,img_4517008.jpg,BB,40700,1017.5,2022-01-03,10253.0,1


In [156]:
df_test.to_csv('test_c.csv', index= 0)