<a id = 'cnn'></a>
### **<font color ='black'>IDENTIFICATION OF AUDI/MERCEDES/BMW WITH CONVULUTIONAL NEURAL NETWORK </font>**

<a id = 'drive'></a>
<font color ='black'>Mount the dataset from drive: Using standford car dataset (contains images from Audi, BMW and Mercedes)</font>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


<a id = 'zip'></a>
<font color ='black'>Extract the zip file content</font>

In [None]:
import zipfile
zip_ref = zipfile.ZipFile("/content/drive/MyDrive/Input/car_data.zip", 'r')
zip_ref.extractall("/content/drive/MyDrive/Input")
zip_ref.close()

<a id = 'zip'></a>
<font color ='black'>Import deep learning, algebra and dataframe libraries</font>

In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
from ipykernel import kernelapp as app
import os
import tensorflow as tf
import PIL as  pil 
from PIL import Image
import keras
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers import Dense
#!wget 'https://raw.githubusercontent.com/keras-team/keras-preprocessing/d3d58f5c6e2ef8b6270301415738ecb6deee2042/keras_preprocessing/image.py'
from keras.preprocessing.image import ImageDataGenerator

import os
print(os.listdir("/content/drive/MyDrive/Input"))


['car_data.zip', 'anno_test.csv', 'anno_train.csv', 'names.csv', 'car_data']


In [None]:
names = pd.read_csv("/content/drive/MyDrive/Input/names.csv")
names.sample(5)

Unnamed: 0,AM General Hummer SUV 2000
137,Hyundai Elantra Touring Hatchback 2012
55,Chevrolet Corvette Ron Fellows Edition Z06 2007
128,Hyundai Veloster Hatchback 2012
154,Lincoln Town Car Sedan 2011
180,Suzuki Kizashi Sedan 2012


In [None]:
anno_train = pd.read_csv("/content/drive/MyDrive/Input/anno_train.csv")
anno_train.sample(5)

Unnamed: 0,00001.jpg,39,116,569,375,14
845,00847.jpg,56,23,415,334,159
5622,05624.jpg,57,101,416,383,11
4585,04587.jpg,537,329,1214,955,183
587,00589.jpg,41,140,558,325,41
7801,07803.jpg,12,32,265,169,80


In [None]:
# Folder containng all the files for the test set. 
car_test= "/content/drive/MyDrive/Input/car_data/test/"
print('There are', len(os.listdir(car_test)),'folders in the test dataset')


There are 196 folders in the test dataset


In [None]:
# Folder containing all the training data.
car_train = "/content/drive/MyDrive/Input/car_data/train/"
print('There are', len(os.listdir(car_train)),'folders in the training dataset')

There are 196 folders in the training dataset


In [None]:
# Transform training and test dataset into a pandas dataframe
def pd_images(folder, is_training = True):
    data = list()
    for labels in os.listdir(car_train):
        for label in os.listdir(car_train+labels):
            if is_training == True:
                car_add = car_train + labels + '/' + label
            else:
                car_add = car_test+ labels + '/' + label
            car_value= (labels, car_add)                
            if car_value not in data:
                data.append(car_value)   

    pd_images = pd.DataFrame(np.array(data).reshape(8144,2), columns= ["car", "image path"])
    
    return pd_images



In [None]:
train_df = pd_images(car_train, is_training = True)
train_df.sample(10)
train_df.describe()

Unnamed: 0,car,image path
count,8144,8144
unique,196,8144
top,GMC Savana Van 2012,/content/drive/MyDrive/Input/car_data/train/Au...
freq,68,1


In [52]:
test_df = pd_images(car_test, is_training=False)
test_df.sample(10)
test_df.describe()

Unnamed: 0,car,image path
count,8144,8144
unique,196,8144
top,GMC Savana Van 2012,/content/drive/MyDrive/Input/car_data/test/BMW...
freq,68,1


In [53]:
# Label each car according to its category : Audi -0 , BMW-1, Mercedes-2
def cars_to_label(df):
    df = df[df['car'].str.contains('Audi|BMW|Mercedes')]
    df = df[df['car'].notnull()].copy()
    df['car'] = df['car'].str.split(' ').str[0]
    df['car label'] = df.car.astype("category").cat.codes
    return df

In [54]:
train_img = cars_to_label(train_df)
train_img_df =  train_img[['image path', 'car label']].copy()
train_img_df.sample(5)

Unnamed: 0,image path,car label
706,/content/drive/MyDrive/Input/car_data/train/Au...,0
1453,/content/drive/MyDrive/Input/car_data/train/BM...,1
6749,/content/drive/MyDrive/Input/car_data/train/Me...,2
1478,/content/drive/MyDrive/Input/car_data/train/BM...,1
1414,/content/drive/MyDrive/Input/car_data/train/BM...,1


In [55]:
test_img = cars_to_label(test_df)
test_img.sample(10)

Unnamed: 0,car,image path,car label
658,Audi,/content/drive/MyDrive/Input/car_data/test/Aud...,0
6621,Mercedes-Benz,/content/drive/MyDrive/Input/car_data/test/Mer...,2
1687,BMW,/content/drive/MyDrive/Input/car_data/test/BMW...,1
445,Audi,/content/drive/MyDrive/Input/car_data/test/Aud...,0
947,Audi,/content/drive/MyDrive/Input/car_data/test/Aud...,0
652,Audi,/content/drive/MyDrive/Input/car_data/test/Aud...,0
829,Audi,/content/drive/MyDrive/Input/car_data/test/Aud...,0
486,Audi,/content/drive/MyDrive/Input/car_data/test/Aud...,0
561,Audi,/content/drive/MyDrive/Input/car_data/test/Aud...,0
521,Audi,/content/drive/MyDrive/Input/car_data/test/Aud...,0


<a id = 'cnn'></a>
# <font color ='purple'> Convolutional Neural network setup  </font>#

In [56]:
# CNN model
model = Sequential()
#Convolution
model.add(Conv2D(32, (3, 3), activation="relu", input_shape=(32, 32, 3)))
#Pooling
model.add(MaxPooling2D(pool_size = (2, 2)))
# 2nd Convolution
model.add(Conv2D(32, (3, 3), activation="relu"))
# 2nd Pooling layer
model.add(MaxPooling2D(pool_size = (2, 2)))
model.add(Dropout(0.25))
#3rd Convolution
model.add(Conv2D(32, (3, 3), activation="relu"))
#Pooling
model.add(MaxPooling2D(pool_size = (2, 2)))
model.add(Dropout(0.25))
# Flatten the layer
model.add(Flatten())
# Fully Connected Layers
model.add(Dense(activation = 'relu', units = 128))
model.add(Dense(activation = 'sigmoid', units = 3))
# Compile the Neural network
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

In [57]:
datagen = ImageDataGenerator(rescale=1./255.,validation_split=0.25)

In [58]:
train_generator=datagen.flow_from_dataframe(
dataframe = train_img_df,
directory = None,
x_col="image path",
y_col="car label",
has_ext=True,                                     
subset="training",
batch_size=34,
seed=42,
shuffle=True,
class_mode="categorical",
target_size=(32,32))

valid_generator=datagen.flow_from_dataframe(
dataframe=train_img,
directory = None, 
x_col="image path",
y_col="car label",
has_ext=True,
subset="validation",
batch_size=15,
seed=42,
shuffle=True,
class_mode="categorical",
target_size=(32,32))

test_datagen=ImageDataGenerator(rescale=1./255.)

test_generator=test_datagen.flow_from_dataframe(
dataframe=train_img,
directory = None,
x_col="image path",
y_col="car label",
has_ext=True,
batch_size=32,
seed=42,
shuffle=False,
class_mode=None,
target_size=(32,32))

--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.6/logging/__init__.py", line 994, in emit
    msg = self.format(record)
  File "/usr/lib/python3.6/logging/__init__.py", line 840, in format
    return fmt.format(record)
  File "/usr/lib/python3.6/logging/__init__.py", line 577, in format
    record.message = record.getMessage()
  File "/usr/lib/python3.6/logging/__init__.py", line 338, in getMessage
    msg = msg % self.args
TypeError: not all arguments converted during string formatting
Call stack:
  File "/usr/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.6/dist-packages/traitlets/config/application.py", line 664, in launch_instance
    app.start()
  File "/usr/local/lib/python

TypeError: ignored

In [47]:
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size

In [48]:
model.fit_generator(generator=train_generator,
                    steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_data=valid_generator,
                    validation_steps=STEP_SIZE_VALID,
                    epochs=10
)



Epoch 1/10


ValueError: ignored

In [None]:
model.evaluate_generator(generator=valid_generator)



[1.5707783699035645, 0.0]

In [None]:
test_generator.reset()
pred=model.predict_generator(test_generator,verbose=1)





In [None]:
predicted_class_indices=np.argmax(pred,axis=1)
print(predicted_class_indices)

[1 1 1 ... 1 1 1]


In [None]:
labels = (train_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]


In [None]:
filenames=test_generator.filenames
results=pd.DataFrame({"Filename":filenames,
                      "Predictions":predictions})
results.to_csv("results.csv",index=False)

In [None]:
results = pd.read_csv('results.csv')
results['Predictions'].unique
results.sample(5)

Unnamed: 0,Filename,Predictions
341,/content/drive/MyDrive/Input/car_data/train/Au...,1
783,/content/drive/MyDrive/Input/car_data/train/BM...,1
1289,/content/drive/MyDrive/Input/car_data/train/Me...,1
568,/content/drive/MyDrive/Input/car_data/train/Au...,1
128,/content/drive/MyDrive/Input/car_data/train/Au...,1


In [None]:
results.sample(10)

Unnamed: 0,Filename,Predictions
1097,/content/drive/MyDrive/Input/car_data/train/BM...,1
1160,/content/drive/MyDrive/Input/car_data/train/Me...,1
726,/content/drive/MyDrive/Input/car_data/train/BM...,1
1328,/content/drive/MyDrive/Input/car_data/train/Me...,1
54,/content/drive/MyDrive/Input/car_data/train/Au...,1
938,/content/drive/MyDrive/Input/car_data/train/BM...,1
116,/content/drive/MyDrive/Input/car_data/train/Au...,1
897,/content/drive/MyDrive/Input/car_data/train/BM...,1
1241,/content/drive/MyDrive/Input/car_data/train/Me...,1
576,/content/drive/MyDrive/Input/car_data/train/Au...,1


In [None]:
merc_df = results[results['Filename'].str.contains('Mercedes')]
print(merc_df['Filename'].count(), 'images were provided for evaluation as Mercedes or not')

261

In [None]:
print('The total number of Mercedes classified correctly are:', (merc_df['Predictions'] ==2).sum()) # 151 
print('The percentage of Mercedes classified correctly is:',((merc_df['Predictions'] ==2).sum())/(merc_df['Filename'].count())*100, 
     '%')

The total number of Mercedes classified correctly are: 6
The percentage of Mercedes classified correctly is: 2.2988505747126435 %


In [None]:
bmw_df = results[results['Filename'].str.contains('BMW')]
print(bmw_df['Filename'].count(), 'images were provided for evaluation as BMW or not')

531 images were provided for evaluation as BMW or not


In [None]:
print('The total number of BMW classified correctly are:', (bmw_df['Predictions'] ==1).sum()) 
print('The percentage of BMW classified correctly is:',((bmw_df['Predictions'] ==1).sum())/(bmw_df['Filename'].count())*100, 
     '%')

The total number of BMW classified correctly are: 529
The percentage of BMW classified correctly is: 99.62335216572504 %


In [None]:
audi = results[results['Filename'].str.contains('Audi')]
print(audi['Filename'].count(), 'images were provided for evaluation as Audi or not')

589 images were provided for evaluation as Audi or not


In [None]:
print('The total number of Audi classified correctly are:', (audi['Predictions'] ==0).sum()) # 151 
print('The percentage of BMW classified correctly is:',((audi['Predictions'] ==0).sum())/(audi['Filename'].count())*100, 
     '%')

The total number of Audi classified correctly are: 0
The percentage of BMW classified correctly is: 0.0 %
