# Car Classification

## Imports

In [None]:
!pip install -q efficientnet

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import zipfile
import csv
import sys
import os


import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import LearningRateScheduler, ModelCheckpoint, EarlyStopping
from tensorflow.keras.callbacks import Callback
from tensorflow.keras.regularizers import l2
from tensorflow.keras import optimizers
from tensorflow.keras.models import Model
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.applications.efficientnet import EfficientNetB2
from tensorflow.keras.layers import *
from tensorflow import keras
import efficientnet.tfkeras as efn

import tensorflow.keras.models as M
import tensorflow.keras.layers as L
import tensorflow.keras.backend as K

from sklearn.model_selection import train_test_split, StratifiedKFold

import PIL
from PIL import ImageOps, ImageFilter
#увеличим дефолтный размер графиков
from pylab import rcParams
rcParams['figure.figsize'] = 10, 5
#графики в svg выглядят более четкими
%config InlineBackend.figure_format = 'svg' 
%matplotlib inline

print(os.listdir("../input/sf-dl-car-classification"))
print('Python       :', sys.version.split('\n')[0])
print('Numpy        :', np.__version__)
print('Tensorflow   :', tf.__version__)
print('Keras        :', tf.keras.__version__)

## Settings

In [None]:
# В setup выносим основные настройки: так удобнее их перебирать в дальнейшем.

EPOCHS               = 5  # эпох на обучение
BATCH_SIZE           = 64 # уменьшаем batch если сеть большая, иначе не поместится в память на GPU
LR                   = 1e-4
VAL_SPLIT            = 0.15 # сколько данных выделяем на тест = 15%

CLASS_NUM            = 10  # количество классов в нашей задаче
IMG_SIZE             = 224 # какого размера подаем изображения в сеть
IMG_CHANNELS         = 3   # у RGB 3 канала
input_shape          = (IMG_SIZE, IMG_SIZE, IMG_CHANNELS)

RANDOM_SEED = 42

DATA_PATH = '../input/sf-dl-car-classification/'
PATH = "../working/car/" # рабочая директория

## Upload + EDA

In [None]:
train_df = pd.read_csv(DATA_PATH+"train.csv")
sample_submission = pd.read_csv(DATA_PATH+"sample-submission.csv")
train_df.head()

In [None]:
train_df.Category.value_counts()

In [None]:
print('Распаковываем картинки')
# Will unzip the files so that you can see them..
for data_zip in ['train.zip', 'test.zip']:
    with zipfile.ZipFile(DATA_PATH+data_zip,"r") as z:
        z.extractall(PATH)
        
print(os.listdir(PATH))

In [None]:
print('Пример картинок (random sample)')
plt.figure(figsize=(12,8))

random_image = train_df.sample(n=9)
random_image_paths = random_image['Id'].values
random_image_cat = random_image['Category'].values

for index, path in enumerate(random_image_paths):
    im = PIL.Image.open(PATH+f'train/{random_image_cat[index]}/{path}')
    plt.subplot(3,3, index+1)
    plt.imshow(im)
    plt.title('Class: '+str(random_image_cat[index]))
    plt.axis('off')
plt.show()

In [None]:
image = PIL.Image.open(PATH+'/train/0/100380.jpg')
imgplot = plt.imshow(image)
plt.show()
image.size

### Augmentation

In [None]:
train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    rotation_range = 5,
    width_shift_range=0.15,
    height_shift_range=0.15,
    shear_range=0.1,
    zoom_range=0.1,
    validation_split=VAL_SPLIT, # set validation split
    horizontal_flip=False)

test_datagen = ImageDataGenerator(rescale=1. / 255)

### Data Generation

In [None]:
train_generator = train_datagen.flow_from_directory(
    PATH+'train/',      # директория где расположены папки с картинками 
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=True, seed=RANDOM_SEED,
    subset='training') # set as training data

test_generator = train_datagen.flow_from_directory(
    PATH+'train/',
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=True, seed=RANDOM_SEED,
    subset='validation') # set as validation data

test_sub_generator = test_datagen.flow_from_dataframe( 
    dataframe=sample_submission,
    directory=PATH+'test_upload/',
    x_col="Id",
    y_col=None,
    shuffle=False,
    class_mode=None,
    seed=RANDOM_SEED,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,)

## Xception

### Naive

In [None]:
base_model = Xception(weights='imagenet', include_top=False, input_shape = input_shape)
#base_model = EfficientNetV2(weights='imagenet', include_top=False, input_shape = input_shape)

In [None]:
base_model.summary()

In [None]:
x = base_model.output
x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
x = Dense(256, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.25)(x)
# and a logistic layer -- let's say we have 10 classes
predictions = Dense(CLASS_NUM, activation='softmax')(x)

# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)
model.compile(loss="categorical_crossentropy", optimizer=optimizers.Adam(learning_rate=LR), metrics=["accuracy"])

In [None]:
model.summary()

In [None]:
checkpoint = ModelCheckpoint('best_model.hdf5' , monitor = ['val_accuracy'] , verbose = 1  , mode = 'max')
callbacks_list = [checkpoint]

In [None]:
history = model.fit_generator(
        train_generator,
        steps_per_epoch = len(train_generator),
        validation_data = test_generator, 
        validation_steps = len(test_generator),
        epochs = EPOCHS,
        callbacks = callbacks_list
)

In [None]:
model.save('../working/model_last.hdf5')
model.load_weights('best_model.hdf5')

In [None]:
scores = model.evaluate_generator(test_generator, steps=len(test_generator), verbose=1)
print("Accuracy: %.2f%%" % (scores[1]*100))

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
 
epochs = range(len(acc))
 
plt.plot(epochs, acc, 'b', label='Training acc')
plt.plot(epochs, val_acc, 'r', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
 
plt.figure()
 
plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
 
plt.show()

In [None]:
test_sub_generator.reset()
predictions = model.predict_generator(test_sub_generator, steps=len(test_sub_generator), verbose=1) 
predictions = np.argmax(predictions, axis=-1) #multiple categories
label_map = train_generator.class_indices
label_map = dict((v,k) for k,v in label_map.items()) #flip k,v
predictions = [label_map[k] for k in predictions]

In [None]:
submission = pd.DataFrame({'Id':test_sub_generator.filenames, 'Category':predictions}, columns=['Id', 'Category'])
submission.to_csv('submission_xception_naive.csv', index=False)

Results on 1st submission: ~92%

### Fine Tuning

#### Step 1

In [None]:
K.clear_session()
base_xception = Xception(weights='imagenet', include_top=False, input_shape = input_shape)

In [None]:
base_xception.summary()

In [None]:
base_xception.trainable = False

In [None]:
model=M.Sequential()
model.add(Lambda(lambda x: x/255))
model.add(base_xception)
model.add(L.GlobalAveragePooling2D(),)
model.add(L.Dense(512,activation='relu'))
model.add(L.BatchNormalization())
model.add(L.Dropout(0.25))
model.add(L.Dense(CLASS_NUM, activation='softmax'))

In [None]:
EPOCHS=20

In [None]:
model.build((None,224,224,3))
model.summary()

__Fit__

In [None]:
LR=1e-5
model.compile(loss="categorical_crossentropy", optimizer=optimizers.Adam(learning_rate=LR), metrics=["accuracy"])

In [None]:
checkpoint = ModelCheckpoint('best_model.hdf5' , monitor = ['val_accuracy'] , verbose = 1  , mode = 'max')
earlystop = EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)
callbacks_list = [checkpoint, earlystop]

In [None]:
scores = model.evaluate_generator(test_generator, steps=1, verbose=1)
print("Accuracy: %.2f%%" % (scores[1]*100))

In [None]:
history = model.fit_generator(
        train_generator,
        steps_per_epoch = len(train_generator),
        validation_data = test_generator, 
        validation_steps = len(test_generator),
        epochs = EPOCHS,
        callbacks = callbacks_list
)

In [None]:
model.save('../working/model_step1.hdf5')
model.load_weights('best_model.hdf5')

In [None]:
scores = model.evaluate_generator(test_generator, steps=1, verbose=1)
print("Accuracy: %.2f%%" % (scores[1]*100))

In [None]:
def plot_history(history):
    plt.figure(figsize=(10,5))
    #plt.style.use('dark_background')
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']

    epochs = range(len(acc))

    plt.plot(epochs, acc, 'b', label='Training acc')
    plt.plot(epochs, val_acc, 'g', label='Validation acc')
    plt.title('Training and validation accuracy')
    plt.legend()

    #plt.figure()
    plt.figure(figsize=(10,5))
    #plt.style.use('dark_background')
    plt.plot(epochs, loss, 'b', label='Training loss')
    plt.plot(epochs, val_loss, 'g', label='Validation loss')
    plt.title('Training and validation loss')
    plt.legend()

    plt.show()

plot_history(history)

#### Step 2

In [None]:
# Let's take a look to see how many layers are in the base model
print("Number of layers in the base model: ", len(base_xception.layers))

In [None]:
base_xception.trainable = True

# Fine-tune from this layer onwards
fine_tune_at = 80

# Freeze all the layers before the `fine_tune_at` layer
for layer in base_xception.layers[:fine_tune_at]:
  layer.trainable =  False

In [None]:
# Check the trainable status of the individual layers
for layer in model.layers:
    print(layer, layer.trainable)

In [None]:
model.compile(loss="categorical_crossentropy", optimizer=optimizers.Adam(learning_rate=LR), metrics=["accuracy"])

In [None]:
model.summary()

In [None]:
scores = model.evaluate_generator(test_generator, steps=1, verbose=1)
print("Accuracy: %.2f%%" % (scores[1]*100))

In [None]:
EPOCHS = 25

In [None]:
history = model.fit_generator(
        train_generator,
        steps_per_epoch = len(train_generator),
        validation_data = test_generator, 
        validation_steps = len(test_generator),
        epochs = EPOCHS,
        callbacks = callbacks_list
)

In [None]:
model.save('../working/model_step2.hdf5')
model.load_weights('best_model.hdf5')

In [None]:
scores = model.evaluate_generator(test_generator, steps=1, verbose=1)
print("Accuracy: %.2f%%" % (scores[1]*100))

In [None]:
plot_history(history)

#### Step 3

In [None]:
base_xception.trainable = True

# Fine-tune from this layer onwards
fine_tune_at = 45

# Freeze all the layers before the `fine_tune_at` layer
for layer in base_xception.layers[:fine_tune_at]:
  layer.trainable =  False

In [None]:
model.compile(loss="categorical_crossentropy", optimizer=optimizers.Adam(learning_rate=LR), metrics=["accuracy"])

In [None]:
model.summary()

In [None]:
scores = model.evaluate_generator(test_generator, steps=1, verbose=1)
print("Accuracy: %.2f%%" % (scores[1]*100))

In [None]:
EPOCHS = 15

In [None]:
history = model.fit_generator(
        train_generator,
        steps_per_epoch = len(train_generator),
        validation_data = test_generator, 
        validation_steps = len(test_generator),
        epochs = EPOCHS,
        callbacks = callbacks_list
)

In [None]:
model.save('../working/model_step3.hdf5')
model.load_weights('best_model.hdf5')

In [None]:
scores = model.evaluate_generator(test_generator, steps=1, verbose=1)
print("Accuracy: %.2f%%" % (scores[1]*100))

In [None]:
plot_history(history)

#### Step 4

In [None]:
base_xception.trainable = True

# Fine-tune from this layer onwards
fine_tune_at = 25

# Freeze all the layers before the `fine_tune_at` layer
for layer in base_xception.layers[:fine_tune_at]:
  layer.trainable =  False

In [None]:
LR = 1e-5
model.compile(loss="categorical_crossentropy", optimizer=optimizers.Adam(learning_rate=LR), metrics=["accuracy"])

In [None]:
EPOCHS = 30

In [None]:
model.summary()

In [None]:
scores = model.evaluate_generator(test_generator, steps=1, verbose=1)
print("Accuracy: %.2f%%" % (scores[1]*100))

In [None]:
history = model.fit_generator(
        train_generator,
        steps_per_epoch = len(train_generator),
        validation_data = test_generator, 
        validation_steps = len(test_generator),
        epochs = EPOCHS,
        callbacks = callbacks_list
)

In [None]:
model.save('../working/model_step4.hdf5')
model.load_weights('best_model.hdf5')

In [None]:
scores = model.evaluate_generator(test_generator, steps=1, verbose=1)
print("Accuracy: %.2f%%" % (scores[1]*100))

In [None]:
plot_history(history)

#### Step 5

In [None]:
base_xception.trainable = True
model.compile(loss="categorical_crossentropy", optimizer=optimizers.Adam(learning_rate=LR), metrics=["accuracy"])

In [None]:
scores = model.evaluate_generator(test_generator, steps=1, verbose=1)
print("Accuracy: %.2f%%" % (scores[1]*100))

In [None]:
history = model.fit_generator(
        train_generator,
        steps_per_epoch = len(train_generator),
        validation_data = test_generator, 
        validation_steps = len(test_generator),
        epochs = EPOCHS,
        callbacks = callbacks_list
)

In [None]:
model.save('../working/model_step5.hdf5')
model.load_weights('best_model.hdf5')

In [None]:
scores = model.evaluate_generator(test_generator, steps=1, verbose=1)
print("Accuracy: %.2f%%" % (scores[1]*100))

In [None]:
plot_history(history)

In [None]:
test_sub_generator.reset()
predictions = model.predict_generator(test_sub_generator, steps=len(test_sub_generator), verbose=1) 
predictions = np.argmax(predictions, axis=-1) #multiple categories
label_map = train_generator.class_indices
label_map = dict((v,k) for k,v in label_map.items()) #flip k,v
predictions = [label_map[k] for k in predictions]

In [None]:
submission = pd.DataFrame({'Id':test_sub_generator.filenames, 'Category':predictions}, columns=['Id', 'Category'])
submission.to_csv('submission_xception_fine_tuned.csv', index=False)

In [None]:
import os
os.chdir(r'../working')
from IPython.display import FileLink
FileLink(r'submission_xception_fine_tuned.csv')

On submit 91.64%...

Trying to increase img_size to get higher results.

In [None]:
EPOCHS               = 25  # эпох на обучение
BATCH_SIZE           = 64 # уменьшаем batch если сеть большая, иначе не поместится в память на GPU
LR                   = 1e-5
VAL_SPLIT            = 0.15 # сколько данных выделяем на тест = 15%

CLASS_NUM            = 10  # количество классов в нашей задаче
IMG_SIZE             = 350 # какого размера подаем изображения в сеть
IMG_CHANNELS         = 3   # у RGB 3 канала
input_shape          = (IMG_SIZE, IMG_SIZE, IMG_CHANNELS)

In [None]:
base_xception = Xception(weights='imagenet', include_top=False, input_shape = input_shape)

In [None]:
base_xception.trainable = True
model.compile(loss="categorical_crossentropy", optimizer=optimizers.Adam(learning_rate=LR), metrics=["accuracy"])

In [None]:
model.load_weights('best_model.hdf5')

In [None]:
scores = model.evaluate_generator(test_generator, steps=1, verbose=1)
print("Accuracy: %.2f%%" % (scores[1]*100))

In [None]:
history = model.fit_generator(
        train_generator,
        steps_per_epoch = len(train_generator),
        validation_data = test_generator, 
        validation_steps = len(test_generator),
        epochs = EPOCHS,
        callbacks = callbacks_list
)

In [None]:
model.save('../working/xception_size350.hdf5')
model.load_weights('best_model.hdf5')

In [None]:
scores = model.evaluate_generator(test_generator, steps=1, verbose=1)
print("Accuracy: %.2f%%" % (scores[1]*100))

In [None]:
submission = pd.DataFrame({'Id':test_sub_generator.filenames, 'Category':predictions}, columns=['Id', 'Category'])
submission.to_csv('submission_xception_size350.csv', index=False)

### EfficientNetB2

In [None]:
base_effnet = efn.EfficientNetB2(weights='imagenet', include_top=False, input_shape=input_shape)

In [None]:
run_history(train)