In [1]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPool2D , Flatten

In [2]:
labels_path = "preprocessed\index.csv"
labels = pd.read_csv(labels_path)

## Clean the data

In [3]:
image_names = os.listdir('preprocessed/Image')

# check if y values are repeated
repeat = []
for i in labels['image']:

    if i in repeat:
        print(i)
    else:
        repeat.append(i)
    
    if i[6:] not in image_names:
        print(i)

print(len(labels['image']))
print(len(repeat))

1512
1512


## Frequency of each label

In [4]:
print(labels['class'].value_counts())
labels

unknown           549
door_dent         192
door_scratch      154
glass_shatter     137
tail_lamp         136
head_lamp         133
bumper_dent       129
bumper_scratch     82
Name: class, dtype: int64


Unnamed: 0,image,class,subset
0,image/0.jpeg,unknown,T
1,image/1.jpeg,head_lamp,T
2,image/2.jpeg,door_scratch,T
3,image/3.jpeg,head_lamp,T
4,image/4.jpeg,unknown,T
...,...,...,...
1507,image/1507.jpeg,unknown,V
1508,image/1508.jpeg,glass_shatter,V
1509,image/1509.jpeg,unknown,V
1510,image/1510.jpeg,unknown,V


## Create one hot encoding for y values

In [5]:
label_names = labels['class'].unique()
print(label_names)

y = np.zeros((len(labels), len(label_names)), dtype=int)

for index, name in enumerate(labels['class']):
    y[index][np.where(label_names==name)] = 1

print(y[:5]) # check if working correctly
y = y.tolist()

['unknown' 'head_lamp' 'door_scratch' 'glass_shatter' 'tail_lamp'
 'bumper_dent' 'door_dent' 'bumper_scratch']
[[1 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0]
 [0 0 1 0 0 0 0 0]
 [0 1 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0]]


## Model architecture (vgg16)

In [6]:
model = Sequential()
model.add(Conv2D(input_shape=(224,224,3),filters=64,kernel_size=(3,3),padding="same", activation="relu"))
model.add(Conv2D(filters=64,kernel_size=(3,3),padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(Flatten())
model.add(Dense(units=4096,activation="relu"))
model.add(Dense(units=4096,activation="relu"))
model.add(Dense(units=len(label_names), activation="softmax"))
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 224, 224, 64)      1792      
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 224, 224, 64)      36928     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 112, 112, 64)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 112, 112, 128)     73856     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 112, 112, 128)     147584    
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 56, 56, 128)       0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 56, 56, 256)       2

## Import images

In [7]:
train_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    'preprocessed', labels=y, label_mode='int', color_mode='rgb', 
    batch_size=32, image_size=(224, 224), shuffle=True, seed=42, 
    validation_split=0.3, subset='training'
)

val_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    'preprocessed', labels=y, label_mode='int', color_mode='rgb', 
    batch_size=32, image_size=(224, 224), shuffle=True, seed=42, 
    validation_split=0.3, subset='validation'
)

Found 1512 files belonging to 1 classes.
Using 1059 files for training.
Found 1512 files belonging to 1 classes.
Using 453 files for validation.


## Compile and train the model

In [8]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
              metrics=['accuracy'])

history = model.fit(train_dataset, epochs=10, 
                    validation_data=val_dataset)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10

KeyboardInterrupt: 

In [None]:
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
# plt.ylim([0.5, 1])
plt.legend(loc='lower right')