## Unzip the dataset

## Import necessary packages

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os
import shutil

import tensorflow as tf
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.preprocessing.image import load_img, ImageDataGenerator
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Conv2D, BatchNormalization, MaxPooling2D, Dropout, Dense, Flatten
from sklearn.model_selection import train_test_split

## Load the data

In [None]:
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
test_labels = pd.read_csv('../input/cifar-10/sampleSubmission.csv')
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

In [None]:
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
BATCH_SIZE = 64
NUM_CLASSES = 10

In [None]:
train_labels_categories = tf.keras.utils.to_categorical(y_train, NUM_CLASSES)#.astype('uint8')

# Splitting the training data into train set and validation set
x_train, x_val, y_train, y_val = train_test_split(X_train, train_labels_categories, random_state=0, test_size=0.05)

In [None]:
y_val

## Image Data Generator

In [None]:
# Data augumetation
datagen = ImageDataGenerator(
        rotation_range=0.3,  
        zoom_range = 0.1,  
        width_shift_range=0.1, 
        height_shift_range=0.1,
        horizontal_flip=True
)

## Modeling

In [None]:
model = Sequential()
model.add(Conv2D(32, kernel_size=3, activation='relu', padding='same', input_shape=(32,32,3)))
model.add(BatchNormalization())
model.add(Conv2D(32, kernel_size=3, activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D())
model.add(Dropout(0.2))

model.add(Conv2D(64, kernel_size=3, activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(64, kernel_size=3, activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D())
model.add(Dropout(0.3))

model.add(Conv2D(128, kernel_size=3, activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(128, kernel_size=3, activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D())
model.add(Dropout(0.4))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.4))
model.add(Dense(10, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer=tf.keras.optimizers.Adam(0.001, amsgrad=True),
              metrics=['accuracy'])
model.summary()

In [None]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', patience=5, factor=0.6, min_denta=0.00001)
es = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=15, verbose=1, restore_best_weights=True)

history = model.fit(datagen.flow(x_train, y_train, batch_size=BATCH_SIZE),
                    validation_data=(x_val, y_val),
                    epochs=100, callbacks=[reduce_lr, es])

## Visualize `accuracy` and `loss`

In [None]:
_, ax = plt.subplots(2, 1)
ax[0].plot(history.history['loss'], color='b', label='loss')
ax[0].plot(history.history['val_loss'], color='r', label='val_loss')
legend = ax[0].legend(loc='best', shadow=True)

ax[1].plot(history.history['accuracy'], color='b', label='accuracy')
ax[1].plot(history.history['val_accuracy'], color='r', label='val_accuracy')
legend = ax[1].legend(loc='best', shadow=True)

## Prediction

In [None]:
!mkdir ./data/
!mv ./test ./data

In [None]:
!pip install py7zr
from py7zr import unpack_7zarchive
import shutil
shutil.register_unpack_format('7zip', ['.7z'], unpack_7zarchive)

In [None]:
shutil.unpack_archive('../input/cifar-10/test.7z')

In [None]:
train_dir=os.listdir('./test')
train_dir_len = len(train_dir)
print('Length :\t',
      train_dir_len)

In [None]:
!mkdir ./data/
!mv ./test ./data

In [None]:
test_data_generator = ImageDataGenerator(rescale=1./255.)
test_generator = test_data_generator.flow_from_directory(directory='./data',
            batch_size=BATCH_SIZE,
            shuffle=False,color_mode='rgb',
            target_size=(32,32),
            class_mode=None)

In [None]:
prediction = model.predict(test_generator)
prediction.shape

In [None]:
preds = np.argmax(prediction, axis=1)
submission = pd.DataFrame(columns=['id', 'label'], dtype=str)
submission['label'] = [class_names[int(i)] for i in preds]
submission['id'] = [ (''.join(filter(str.isdigit, name ))) for name in test_generator.filenames]

submission.sort_values(by=['id'])
submission.head()

In [None]:
%rm -rf ./train
%rm -rf ./data

## Submission

In [None]:
submission.to_csv('submission.csv', index=False)

In [None]:
base_model = tf.keras.applications.ResNet50(
    weights='imagenet',  # Load weights pre-trained on ImageNet.
    input_shape=(32, 32, 3),
    include_top=False)  # Do not include the ImageNet classifier at the top.

In [None]:
base_model.trainable = False

In [None]:
inputs = tf.keras.Input(shape=(32, 32, 3))
x = base_model(inputs, training=False)
# Convert features of shape `base_model.output_shape[1:]` to vectors
x = tf.keras.layers.GlobalAveragePooling2D()(x)
# A Dense classifier with a single unit (binary classification)
outputs = tf.keras.layers.Dense(10, activation='softmax')(x)
res_net = tf.keras.Model(inputs, outputs)

In [None]:
res_net.compile(optimizer=tf.keras.optimizers.Adam(amsgrad=True),
                loss=tf.keras.losses.CategoricalCrossentropy(),
                metrics=['accuracy'])

res_net.fit(datagen.flow(x_train, y_train, batch_size=BATCH_SIZE),
            validation_data=(x_val, y_val),
            epochs=20,
            callbacks=[reduce_lr, es])

In [None]:
# Unfreeze the base model
base_model.trainable = True

# It's important to recompile your model after you make any changes
# to the `trainable` attribute of any inner layer, so that your changes
# are take into account
res_net.compile(optimizer=tf.keras.optimizers.Adam(1e-5, amsgrad=True),  # Very low learning rate
                loss=tf.keras.losses.CategoricalCrossentropy(),
                metrics=['accuracy'])

# Train end-to-end. Be careful to stop before you overfit!
res_net.fit(datagen.flow(x_train, y_train, batch_size=BATCH_SIZE),
            validation_data=(x_val, y_val),
            epochs=10)