In [None]:
!pip install keras-balanced-batch-generator

In [None]:
import os
from os import listdir
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.image import imread
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import array_to_img
from keras.preprocessing.image import save_img
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.optimizers import SGD , Adam, RMSprop
from keras import backend
from keras.preprocessing.image import ImageDataGenerator
from keras.models import load_model
from numpy import load
from numpy import zeros
from numpy import asarray
from numpy import savez_compressed
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import tensorflow as tf
#from keras.utils.data_utils import Sequence
from keras_balanced_batch_generator import make_generator
#from imblearn.over_sampling import RandomOverSampler
#from imblearn.keras import balanced_batch_generator
from sklearn.utils import class_weight

In [None]:
# car_data =  "./car_image_data/"

In [None]:
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


## Preparing Dataset

In [None]:
# load all images into memory
def load_dataset(img_folder):
    photos, targets = list(), list()
    # enumerate files in the directory
    print('Preparing Data ...')
    print(' ')
    for filename in listdir(img_folder):
        # load image
        if filename != 'Thumbs.db':
           # print(filename)
            photo = load_img(img_folder + filename, target_size =(224,224))
            # convert to numpy array
            photo = img_to_array(photo, dtype='uint8')
            
            # get targets.
            target = filename.split('-')[0]
            
            # store
            photos.append(photo)
            targets.append(target)
    print('Done!')
            
    X = asarray(photos, dtype = 'uint8')
    Y = asarray(targets) #, dtype = 'uint8'
    return X, Y

In [None]:
X,Y = load_dataset(car_data)

In [None]:
print(X.shape, Y.shape)

In [None]:
# save both arrays  and targets to one file in compressed format
savez_compressed('compressed_data.npz', X, Y)

### Done

## Creating and Training Model

In [None]:
# load the dataset from compressed data
data = load('/kaggle/input/toyotacars/compressed_data.npz')    
x,y = data['arr_0'], data['arr_1']
print(x.shape, y.shape)

In [None]:
# one hot encoder
ohe = preprocessing.OneHotEncoder()

In [None]:
# seperating the dataset into train and test datasets

def split_dataset(X, Y):
    train_x, test_x,train_y,test_y = train_test_split(X, Y, test_size = 0.1, random_state =3)
    return(train_x, test_x, train_y, test_y)

In [None]:
def image_model(input_shape =(224,224,3), output_shape = None):
    
    model = Sequential()
    model.add(Conv2D(32,(3,3), activation ='relu', kernel_initializer ='he_uniform', padding='same', input_shape=input_shape))
    model.add(Conv2D(32,(3,3), activation ='relu', kernel_initializer ='he_uniform', padding='same'))
    model.add(MaxPooling2D((2,2)))
    model.add(Dropout(0.2))
    model.add(Conv2D(64,(3,3), activation ='relu', kernel_initializer ='he_uniform', padding='same'))
    model.add(Conv2D(64,(3,3), activation ='relu', kernel_initializer ='he_uniform', padding='same'))
    model.add(MaxPooling2D((2,2)))
    model.add(Dropout(0.2))
    model.add(Conv2D(128,(3,3), activation ='relu', kernel_initializer ='he_uniform', padding='same'))
    model.add(Conv2D(128,(3,3), activation ='relu', kernel_initializer ='he_uniform', padding='same'))
    model.add(MaxPooling2D((2,2)))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(256, activation='relu', kernel_initializer ='he_uniform'))
    model.add(Dense(256, activation='relu', kernel_initializer ='he_uniform'))
    model.add(Dropout(0.2))
    model.add(Dense(output_shape, activation='softmax'))
    # compile model
    #opt = SGD(lr=0.03, momentum =0.9)
    opt = Adam()
    model.compile(optimizer = opt, loss ='categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
# plotting the learning curves, this function will show the loss and the accuracy

def summary(info):
    # plot loss
    plt.subplot(211)
    plt.title('Cross Entropy Loss')
    plt.plot (info.history['loss'], color='blue', label='train')
    plt.plot (info.history['val_loss'], color='orange', label='test')
    plt.legend(["Loss","Validation Loss"])
    
    # plot accuracy
    plt.subplot(212)
    plt.title('Classification Accuracy')
    plt.plot (info.history['accuracy'], color='blue', label='train')
    plt.plot (info.history['val_accuracy'], color='orange', label='test')
    plt.legend(["Accuracy","Validation Accuracy"])
    

In [None]:
def run_learning_sequence(X,Y):
    # split dataset
    train_x, test_x,train_y,test_y = split_dataset(X, Y)
    
    
    #create data generator
    train_datagen = ImageDataGenerator(rescale =1.0/255.0,  horizontal_flip = True, vertical_flip = True, rotation_range = 90)
    test_datagen = ImageDataGenerator(rescale =1.0/255.0)
    
    
    ohe.fit(train_y.reshape(-1,1))
    y_encoded_train = ohe.transform(train_y.reshape(-1,1)).toarray()
    y_encoded_test = ohe.transform(test_y.reshape(-1,1)).toarray()
    
    
    # prepare iterations
    train = train_datagen.flow(train_x, y_encoded_train, batch_size = 32)
    test = test_datagen.flow(test_x, y_encoded_test, batch_size = 32)
    
    # define the model
    model = image_model(output_shape = y_encoded_train.shape[1])
   
    # handles the unbalanced image dataset
    generator = make_generator(train_x,y_encoded_train, batch_size=32)
    
    
    
    info = model.fit_generator(generator, steps_per_epoch = len(train), validation_data = test, 
                                 validation_steps = len(test), epochs =50, verbose = 1)
    
    # evaluate the model
    loss, accuracy = model.evaluate_generator(test, steps = len(test), verbose = 1)
    print('> loss = %.3f, accuracy=%.3f' %(loss, accuracy))
                               
    # saving the model
    model.save('toyota_model_1.h5')
                               
    # display learning curves
    summary(info)

In [None]:
run_learning_sequence(x,y)

### Done

## Testing the model

In [None]:
train_x, test_x,train_y,test_y = split_dataset(x, y)
ohe.fit(train_y.reshape(-1,1))

In [None]:
multiple_images = './multi_image_test/'

In [None]:
single_image = './single_image_test/rav4-2129563200.jpg'

In [None]:
def run_prediction_sequence(ohe, dirc=None,single_image=None,condition=False):
    # load model
    model = load_model('./toyota_model_1.h5')
    
    result = list()
    
    if condition:
        for files in listdir(dirc):
            if files != 'Thumbs.db':
                photo = load_img(dirc + files, target_size =(224,224))
                photo = img_to_array(photo, dtype='uint8')
                photo = photo.reshape(1, 224, 224, 3)
                prediction = model.predict(photo)
                result.append(ohe.inverse_transform(prediction))
                
    else:
        photo = load_img(single_image, target_size =(224,224))
        photo = img_to_array(photo, dtype='uint8')
        photo = photo.reshape(1, 224, 224, 3)
        prediction = model.predict(photo)
        result.append(ohe.inverse_transform(prediction))
        
        
    return(result) 

### Multiple images Test

In [None]:
run_prediction_sequence(ohe, dirc=multiple_images,single_image=None,condition=True)

### Single Image Test

In [None]:
run_prediction_sequence(ohe, dirc=None,single_image=single_image,condition=False)

In [None]:
#https://pypi.org/project/keras-balanced-batch-generator/
#https://medium.com/analytics-vidhya/how-to-apply-data-augmentation-to-deal-with-unbalanced-datasets-in-20-lines-of-code-ada8521320c9
#https://www.kaggle.com/occultainsights/toyota-cars-over-20k-labeled-images