## Import modules

In [1]:
import sys
from keras.utils import np_utils
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from keras.models import load_model
from keras import optimizers

print('Python ' + sys.version)
print('numpy: ' + np.__version__)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


KeyboardInterrupt: 

## Importing the dataset from keras
### Keras includes special support for loading some of the benchmark datasets. One is the CIFAR-10 dataset

In [None]:
from keras.datasets import cifar10

## Load the dataset

#### If you never had used CIFAR-10 before with keras, it will download the dataset first. The next time you resort to using the dataset again, it will use the downloaded copy.

In [None]:
(X_train, y_train), (X_test, y_test) = cifar10.load_data()

In [None]:
X_train.shape

In [None]:
y_train.shape

In [None]:
X_test.shape

In [None]:
y_test.shape

## Visualising the dataset
#### Displaying the first 9 images

In [None]:
for i in range(0, 9):
    plt.subplot(3, 3, 1+i)
    plt.imshow(X_train[i])
plt.show()

# Data Preprocessing

## Feature scaling is a must in Deep Learning networks
#### Scaling the dataset features to have values between 0 and 1

In [None]:
# storing the 1st image before feature scaling so that it can be later\
# used for sanity checks
X_train_0 = X_train[0]

# Feature scaling
X_train = X_train/255
X_test = X_test/255

## One-hot encoding 
#### The labels in the dataset are numerical values which inherently are ordinal. This ordinality can skew the weights, for eg, a label 6 will skew the weights of the ANN more towards itself than the other lower values, ie, 0, 1, 2, 3, 4... ANN's work in such a way that this happens and may not create a nice model. So we one-hot encode the labels into a vector of 0's and 1's. The length of each of such vector will be equal to the number of classes and each vector will have a 1 at only one position. For eg, if the categories are dog, cat and lizard, the one-hot encoding will have a vector for each example, each of such vector will be of length 3 (since number of classes = 3). If an example has label 'cat', then the one-hot encoded vector "may be" [1,0,0], if an example has label 'dog', the one-hot encoded vector "may be" [0,1,0] and for a 'lizard' it "may be" [0,0,1]. I have explicitly mentioned "may be" because the position of 1 in the vector for each category depends on the underlying implementation of the one-hot encoding done, ie, what I have mentioned is according to an implemention that assigns one-hot encoded vectors based on lexicographical order, other implementation may assign these vectors differently.

In [None]:
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)

In [None]:
print(y_train.shape)

# printing one-hot encoded vector for 0th label (which is 6)
print(y_train[0])

# from cifar-10 dataset webpage, we can see that label 6 is for frog
# checking if X_train[0] is a frog's image
plt.subplot(3,3,1) # doing this to plot image in a smaller grid so that\
                # the 32x32 image looks a bit clearer
plt.imshow(X_train_0)
plt.show()

##### It does look like a frog

# Building the All-CNN model
#### Reference used-  All-CNN paper (https://arxiv.org/abs/1412.6806)

Model - C (taken from the paper, out of three models, this model performed the best)
* Input 32 × 32 RGB image
* 3 × 3 conv. 96 ReLU
* 3 × 3 conv. 96 ReLU
* 3 × 3 max-pooling stride 2
* 3 × 3 conv. 192 ReLU
* 3 × 3 conv. 192 ReLU
* 3 × 3 max-pooling stride 2
* 3 × 3 conv. 192 ReLU
* 1 × 1 conv. 192 ReLU
* 1 × 1 conv. 10 ReLU
* global averaging over 6 × 6 spatial dimensions
* 10 or 100-way softmax

### Importing necessary libraries to build layers of the CNN

In [None]:
from keras.models import Sequential
from keras.layers import Dropout # used for regularisation to prevent\
                                # overfitting which is common with ANNs
from keras.layers import Convolution2D
from keras.layers import MaxPooling2D
from keras.layers import GlobalAveragePooling2D
from keras.layers import Activation

### Implement the All-CNN function

In [None]:
def allcnn_traditional(weights = None):
    # Initialising the ANN as a sequence of layers
    classifier = Sequential()
    
    # Adding convolution layers
    # In Convolution2D the activation parameter is the activation function that is applied after convolution is\
    # done, ie, activation is applied on the feature map obtained. This is done by a separate activation layer.
    # I have considered the convolution + activation as one big step of two stages- convolution and then\
    # activation. If you would like to consider these two as two separate steps, then keras allows you to implement\
    # your code that way as well. This can be done as follows:
    # classifier.add(Convolution2D(96, 3, 3))
    # classifer.add(Activation('relu'))
    # For the above two line two work, you will have to import Activation class from keras.layers package
    
    classifier.add(Convolution2D(96, (3, 3), input_shape = (32, 32, 3), padding = 'same', activation = 'relu'))
    classifier.add(Convolution2D(96, (3, 3), padding = 'same', activation = 'relu'))
    classifier.add(MaxPooling2D(pool_size = (3,3), padding = 'same', strides = 2))
    
    classifier.add(Convolution2D(192, (3, 3), padding = 'same', activation = 'relu'))
    classifier.add(Convolution2D(192, (3, 3), padding = 'same', activation = 'relu'))
    classifier.add(MaxPooling2D(pool_size = (3,3), strides = 2))
    
    classifier.add(Convolution2D(192, (3, 3), padding = 'same', activation = 'relu'))
    
    classifier.add(Convolution2D(192, (1, 1), padding = 'valid', activation = 'relu'))
    
    classifier.add(Convolution2D(10, (1, 1), padding = 'valid', activation = 'relu'))
    
    classifier.add(GlobalAveragePooling2D())
    classifier.add(Activation('softmax'))
    
    if weights:
        classifier.load_weights(weights)
    
    return classifier
    

In [None]:
import os

if(os.path.exists('MyClassifier_traditional.h5')):
    classifier = load_model('MyClassifier_traditional.h5')

else:
    classifier = allcnn_traditional()
    # adam is a stochastic gradient descent algorithm
    adam = optimizers.Adam(lr = 0.0001)
    classifier.compile(optimizer = adam, loss = 'categorical_crossentropy', metrics = ['accuracy'])
    print(classifier.summary())
    
    # not using the first raw as the validation set for fitting the model, this raw will be used to test the final
    # model to check how well the model predicts
    classifier.fit(X_train, y_train, validation_data = (X_test[1:], y_test[1:]), epochs = 1, batch_size = 32)
    classifier.save('MyClassifier_traditional.h5')

In [None]:
X_test_ex = X_test[2312]
y_test_ex = y_test[2312]

In [None]:
X_test_ex = np.expand_dims(X_test_ex, axis=0)

In [None]:
X_test_ex.shape

In [None]:
# y_test_ex_pred = classifier.predict_classes([X_test_ex])
y_test_ex_pred = classifier.predict([X_test_ex])

In [None]:
print('y_pred_ex = ' + str(y_test_ex))
print('y_pred_ex_pred = ' + str(y_test_ex_pred))