# Loading Libraries


In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt

# tensorflow 2.0.0 , Cudnn10.0.0 and Cuda 10.1 , python 3.8

In [2]:
tf.config.experimental.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [3]:
tf.test.is_built_with_cuda()

True

# Loading Dataset

60000- 32x32 colour images in 10 classes, with 6000 images per class. 
- 50000 train images 
- 10000 test images

Training will be done using ANN - instead of CNN to keep things simple and understandable

In [4]:
(X_train, y_train), (X_test, y_test) = keras.datasets.cifar10.load_data()

# EDA

In [5]:
# checking images shape
X_train.shape, X_test.shape

((50000, 32, 32, 3), (10000, 32, 32, 3))

In [6]:
# display single image shape
X_train[0].shape

(32, 32, 3)

In [7]:
# checking labels
y_train[:5]

array([[6],
       [9],
       [9],
       [4],
       [1]], dtype=uint8)

# Pre-Processing

In [8]:
# scaling image values between 0-1
X_train_scaled = X_train/255
X_test_scaled = X_test/255

In [9]:
# one hot encoding labels
y_train_encoded = keras.utils.to_categorical(y_train, num_classes = 10, dtype = 'float32')
y_test_encoded = keras.utils.to_categorical(y_test, num_classes = 10, dtype = 'float32')

# Model Building
- Flatten : Converts 32*32*32 into single 3072 vector
- Dense : Units > 3000, activation fn > relu
- Dense : Units > 1000, activation fn > relu
- Dense : Units > 10,   activation fn > sigmoid (output probabilities)

- Optimizer : Adam
- Loss Fn : Categorical Cross Entropy
- Monitor Metric : Accuracy 
        

In [11]:
def get_model():
    model = keras.Sequential([
        keras.layers.Flatten(input_shape=(32,32,3)),
        keras.layers.Dense(3000, activation='relu'),
        keras.layers.Dense(1000, activation='relu'),
        keras.layers.Dense(10, activation='sigmoid')    
    ])

    model.compile(optimizer='SGD',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
    
    return model

In [12]:
model = get_model()

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 3072)              0         
_________________________________________________________________
dense (Dense)                (None, 3000)              9219000   
_________________________________________________________________
dense_1 (Dense)              (None, 1000)              3001000   
_________________________________________________________________
dense_2 (Dense)              (None, 10)                10010     
Total params: 12,230,010
Trainable params: 12,230,010
Non-trainable params: 0
_________________________________________________________________


# Benchmarking On 1 Epoch

In [25]:
%%timeit -n1 -r1

# CPU
with tf.device('/CPU:0'):
    model_cpu = get_model()
    
    model_cpu.fit(X_train_scaled, y_train_encoded, epochs = 1)

Train on 50000 samples
1min 29s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [24]:
%%timeit -n1 -r1
# GPU
with tf.device('/GPU:0'):
    model_gpu = get_model()
    
    model_gpu.fit(X_train_scaled, y_train_encoded, epochs = 1)

Train on 50000 samples
21.7 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


# Benchmarking On 10 Epoch

In [29]:
%%timeit -n1 -r1

# CPU
with tf.device('/CPU:0'):
    model_cpu = get_model()
    
    model_cpu.fit(X_train_scaled, y_train_encoded, epochs = 10, verbose = 0)

Train on 50000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
14min 9s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [26]:
%%timeit -n1 -r1
# GPU
with tf.device('/GPU:0'):
    model_gpu = get_model()
    
    model_gpu.fit(X_train_scaled, y_train_encoded, epochs = 10, verbose = 0)

Train on 50000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
1min 55s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


**Performance Comparison**

| EPOCHS | CPU | GPU  |
|:------|:------|:------|
| 1 | 1min 29sec | 21.7sec  |
| 10 |14min 9sec | 1min 55sec |

