### CNN on CIFR Assignment:

1.  Please visit this link to access the state-of-art DenseNet code for reference - DenseNet - cifar10 notebook link
2.  You need to create a copy of this and "retrain" this model to achieve 90+ test accuracy. 
3.  You cannot use DropOut layers.
4.  You MUST use Image Augmentation Techniques.
5.  You cannot use an already trained model as a beginning points, you have to initilize as your own
6.  You cannot run the program for more than 300 Epochs, and it should be clear from your log, that you have only used 300 Epochs
7.  You cannot use test images for training the model.
8.  You cannot change the general architecture of DenseNet (which means you must use Dense Block, Transition and Output blocks as mentioned in the code)
9.  You are free to change Convolution types (e.g. from 3x3 normal convolution to Depthwise Separable, etc)
10. You cannot have more than 1 Million parameters in total
11. You are free to move the code from Keras to Tensorflow, Pytorch, MXNET etc. 
12. You can use any optimization algorithm you need. 
13. You can checkpoint your model and retrain the model from that checkpoint so that no need of training the model from first if you lost at any epoch while training. You can directly load that model and Train from that epoch. 

In [1]:
from tensorflow.keras import models, layers
from tensorflow.keras.models import Model
from tensorflow.keras.layers import BatchNormalization, Activation, Flatten
from tensorflow.keras.optimizers import Adam
import warnings
warnings.filterwarnings("ignore")

In [2]:
import tensorflow as tf

In [3]:
# Hyperparameters
batch_size = 128
epochs = 10
l = 40
num_filter = 12
compression = 1

#### Load Data

In [4]:
# Load CIFAR10 Data
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()
img_height, img_width, channel = X_train.shape[1],X_train.shape[2],X_train.shape[3]




Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [5]:

import numpy as np
unique, counts = np.unique(y_train, return_counts=True)

print (np.asarray((unique, counts)).T)

[[   0 5000]
 [   1 5000]
 [   2 5000]
 [   3 5000]
 [   4 5000]
 [   5 5000]
 [   6 5000]
 [   7 5000]
 [   8 5000]
 [   9 5000]]


In [6]:
#ytrain for classweigth
y_tr = y_train.copy()
# convert to one hot encoing 
num_classes=10
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes) 


In [7]:
print(X_train.shape)
print(X_test.shape)

(50000, 32, 32, 3)
(10000, 32, 32, 3)


#### Datageneration and Augumentation

In [8]:
from keras_preprocessing.image import ImageDataGenerator
datagen=ImageDataGenerator(rotation_range=15,
        width_shift_range=0.1,
        height_shift_range=0.1,
        shear_range=0.1,
        zoom_range=0.1,
        horizontal_flip=True,
        fill_mode='nearest',rescale=1./255.,validation_split=0.25)

In [9]:
y_train

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.],
       ...,
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.]], dtype=float32)

In [10]:
train_generator=datagen.flow(
    X_train, y_train, batch_size=128, shuffle=True, sample_weight=None, seed=None,
    save_to_dir=None, save_prefix='', save_format='png',
    subset='training'
)


In [11]:
valid_generator=datagen.flow(
    X_train, y_train, batch_size=128, shuffle=True, sample_weight=None, seed=None,
    save_to_dir=None, save_prefix='', save_format='png',
    subset='validation'
)

In [12]:
testDatagen=ImageDataGenerator(rescale=1./255.)
test_generator=testDatagen.flow(
    X_test,batch_size=1
)

#### CallBacks


In [13]:
from tensorflow.keras.callbacks import ModelCheckpoint
%load_ext tensorboard
from keras.models import Sequential
from tensorflow.keras.callbacks import LearningRateScheduler
#Decaying Learning rate using epoch
def changeLearningRate(epoch, lr):
    if (epoch+1)%5==0:
        return lr*0.90
    else:
        return lr
lrschedule = LearningRateScheduler(changeLearningRate, verbose=1)

from tensorflow.keras.callbacks import ReduceLROnPlateau
reduce_lr = ReduceLROnPlateau(monitor='val_accuracy', factor=0.9, patience=5)

from tensorflow.keras.callbacks import TerminateOnNaN
loss_terminate = TerminateOnNaN()


from tensorflow.keras.callbacks import EarlyStopping
earlystop = EarlyStopping(monitor='val_accuracy', min_delta=0.0001, patience=15)

#### Model

In [14]:
# Dense Block
def denseblock(input, num_filter = 12, dropout_rate = 0):
    global compression
    temp = input
    for _ in range(l): 
        BatchNorm = layers.BatchNormalization()(temp)
        relu = layers.Activation('relu')(BatchNorm)
        Conv2D_3_3 = layers.SeparableConv2D(int(num_filter*compression), (3,3), use_bias=False ,padding='same')(relu)
        if dropout_rate>0:
            Conv2D_3_3 = layers.Dropout(dropout_rate)(Conv2D_3_3)
        concat = layers.Concatenate(axis=-1)([temp,Conv2D_3_3])
        
        temp = concat
        
    return temp

## transition Block
def transition(input, num_filter = 12, dropout_rate = 0):
    global compression
    BatchNorm = layers.BatchNormalization()(input)
    relu = layers.Activation('relu')(BatchNorm)
    Conv2D_BottleNeck = layers.SeparableConv2D(int(num_filter*compression), (1,1), use_bias=False ,padding='same')(relu)
    if dropout_rate>0:
         Conv2D_BottleNeck = layers.Dropout(dropout_rate)(Conv2D_BottleNeck)
    avg = layers.AveragePooling2D(pool_size=(2,2))(Conv2D_BottleNeck)
    return avg

#output layer
def output_layer(input):
    global compression
    BatchNorm = layers.BatchNormalization()(input)
    relu = layers.Activation('relu')(BatchNorm)
    AvgPooling = layers.AveragePooling2D(pool_size=(2,2))(relu)
    flat = layers.Flatten()(AvgPooling)
    output = layers.Dense(num_classes, activation='softmax')(flat)
    return output

In [15]:
num_filter = 46
dropout_rate = 0
l = 12
input = layers.Input(shape=(img_height, img_width, channel,))
First_Conv2D = layers.SeparableConv2D(num_filter, (3,3), use_bias=False ,padding='same')(input)

First_Block = denseblock(First_Conv2D, num_filter, dropout_rate)
First_Transition = transition(First_Block, num_filter, dropout_rate)

Second_Block = denseblock(First_Transition, num_filter, dropout_rate)
Second_Transition = transition(Second_Block, num_filter, dropout_rate)

Third_Block = denseblock(Second_Transition, num_filter, dropout_rate)
Third_Transition = transition(Third_Block, num_filter, dropout_rate)

Last_Block = denseblock(Third_Transition,  num_filter, dropout_rate)
output = output_layer(Last_Block)

2022-01-22 14:01:17.584746: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-22 14:01:17.686737: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-22 14:01:17.687466: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-22 14:01:17.689270: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

##### Summary

In [16]:
model = Model(inputs=[input], outputs=[output])
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 32, 32, 3)]  0                                            
__________________________________________________________________________________________________
separable_conv2d (SeparableConv (None, 32, 32, 46)   165         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 32, 32, 46)   184         separable_conv2d[0][0]           
__________________________________________________________________________________________________
activation (Activation)         (None, 32, 32, 46)   0           batch_normalization[0][0]        
______________________________________________________________________________________________

In [17]:
print(len(model.layers))

211


In [18]:
import os
import datetime

In [19]:
#https://datascience.stackexchange.com/questions/13490/how-to-set-class-weights-for-imbalanced-classes-in-keras
from sklearn.utils import class_weight
import numpy as np
class_weights = class_weight.compute_class_weight(class_weight = "balanced",
                                        classes = list(np.unique(y_tr)),
                                        y = y_tr[:,0] )
class_weight_dict = dict(enumerate(class_weights))
class_weight_dict



{0: 1.0,
 1: 1.0,
 2: 1.0,
 3: 1.0,
 4: 1.0,
 5: 1.0,
 6: 1.0,
 7: 1.0,
 8: 1.0,
 9: 1.0}

In [20]:
# determine Loss function and Optimizer
model.compile(loss='categorical_crossentropy',
              optimizer=Adam(),
              metrics=['accuracy'])
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size

#Saving the best model
filepath="model1/weights-{epoch:02d}-{val_accuracy:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath=filepath, monitor='val_accuracy',  verbose=1, save_best_only=True, mode='auto')

#Tensorboard Callback
!rm -rf ./logs/
log_dir = os.path.join("logs",'fits', datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir,histogram_freq=1,write_graph=True)

callbacks = [lrschedule,reduce_lr, loss_terminate, earlystop,tensorboard_callback]
model.fit_generator(generator=train_generator,steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_data=valid_generator,
                    validation_steps=STEP_SIZE_VALID,
                    epochs=10,callbacks=callbacks)

2022-01-22 14:01:22.985839: I tensorflow/core/profiler/lib/profiler_session.cc:131] Profiler session initializing.
2022-01-22 14:01:22.985900: I tensorflow/core/profiler/lib/profiler_session.cc:146] Profiler session started.
2022-01-22 14:01:22.987988: I tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1614] Profiler found 1 GPUs
2022-01-22 14:01:23.280433: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session tear down.
2022-01-22 14:01:23.280625: I tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1748] CUPTI activity buffer flushed
2022-01-22 14:01:23.660077: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/10

Epoch 00001: LearningRateScheduler setting learning rate to 0.0010000000474974513.


2022-01-22 14:01:30.228907: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005


  1/292 [..............................] - ETA: 1:03:12 - loss: 2.4566 - accuracy: 0.0859

2022-01-22 14:01:36.890660: I tensorflow/core/profiler/lib/profiler_session.cc:131] Profiler session initializing.
2022-01-22 14:01:36.890712: I tensorflow/core/profiler/lib/profiler_session.cc:146] Profiler session started.


  2/292 [..............................] - ETA: 4:06 - loss: 2.5939 - accuracy: 0.1133   

2022-01-22 14:01:37.589857: I tensorflow/core/profiler/lib/profiler_session.cc:66] Profiler session collecting data.
2022-01-22 14:01:37.593241: I tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1748] CUPTI activity buffer flushed
2022-01-22 14:01:37.782138: I tensorflow/core/profiler/internal/gpu/cupti_collector.cc:673]  GpuTracer has collected 1587 callback api events and 1584 activity events. 
2022-01-22 14:01:37.858289: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session tear down.
2022-01-22 14:01:37.945980: I tensorflow/core/profiler/rpc/client/save_profile.cc:136] Creating directory: logs/fits/20220122-140122/train/plugins/profile/2022_01_22_14_01_37

2022-01-22 14:01:37.990652: I tensorflow/core/profiler/rpc/client/save_profile.cc:142] Dumped gzipped tool data for trace.json.gz to logs/fits/20220122-140122/train/plugins/profile/2022_01_22_14_01_37/893ac80ead6d.trace.json.gz
2022-01-22 14:01:38.085691: I tensorflow/core/profiler/rpc/client/save_profile

Epoch 2/10

Epoch 00002: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 3/10

Epoch 00003: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 4/10

Epoch 00004: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 5/10

Epoch 00005: LearningRateScheduler setting learning rate to 0.0009000000427477062.
Epoch 6/10

Epoch 00006: LearningRateScheduler setting learning rate to 0.0009000000427477062.
Epoch 7/10

Epoch 00007: LearningRateScheduler setting learning rate to 0.0009000000427477062.
Epoch 8/10

Epoch 00008: LearningRateScheduler setting learning rate to 0.0009000000427477062.
Epoch 9/10

Epoch 00009: LearningRateScheduler setting learning rate to 0.0009000000427477062.
Epoch 10/10

Epoch 00010: LearningRateScheduler setting learning rate to 0.0008100000384729356.


<keras.callbacks.History at 0x7f9f54d67fd0>

In [21]:
%tensorboard --logdir logs --bind_all

In [22]:
testDatagen=ImageDataGenerator(rescale=1./255.)
test_generator=testDatagen.flow(
    X_test,y_test,batch_size=1
)

In [23]:
# Test the model
score = model.evaluate(test_generator, verbose=1)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.5558121800422668
Test accuracy: 0.8158000111579895


In [24]:
y_test[5]

array([0., 0., 0., 0., 0., 0., 1., 0., 0., 0.], dtype=float32)

In [25]:
# Save the trained weights in to .h5 format
model.save_weights("DNST_model.h5")
print("Saved model to disk")

Saved model to disk
