<a href="https://colab.research.google.com/github/ram130849/Deep_Learning_Systems_Assignments/blob/main/TensorFlow/Sushant/DLS_Assignment4_Part1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
import timeit
from tensorflow.keras.utils import to_categorical
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import Flatten
from keras.models import Model
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

### Allocating and Checking the GPU

In [2]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [3]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  print(
      '\n\nThis error most likely means that this notebook is not '
      'configured to use a GPU.  Change this in Notebook Settings via the '
      'command palette (cmd/ctrl-shift-P) or the Edit menu.\n\n')
  raise SystemError('GPU device not found')

def cpu():
  with tf.device('/cpu:0'):
    random_image_cpu = tf.random.normal((100, 100, 100, 3))
    net_cpu = tf.keras.layers.Conv2D(32, 7)(random_image_cpu)
    return tf.math.reduce_sum(net_cpu)

def gpu():
  with tf.device('/device:GPU:0'):
    random_image_gpu = tf.random.normal((100, 100, 100, 3))
    net_gpu = tf.keras.layers.Conv2D(32, 7)(random_image_gpu)
    return tf.math.reduce_sum(net_gpu)
  
# We run each op once to warm up; see: https://stackoverflow.com/a/45067900
cpu()
gpu()

# Run the op several times.
print('Time (s) to convolve 32x7x7x3 filter over random 100x100x100x3 images '
      '(batch x height x width x channel). Sum of ten runs.')
print('CPU (s):')
cpu_time = timeit.timeit('cpu()', number=10, setup="from __main__ import cpu")
print(cpu_time)
print('GPU (s):')
gpu_time = timeit.timeit('gpu()', number=10, setup="from __main__ import gpu")
print(gpu_time)
print('GPU speedup over CPU: {}x'.format(int(cpu_time/gpu_time)))

Time (s) to convolve 32x7x7x3 filter over random 100x100x100x3 images (batch x height x width x channel). Sum of ten runs.
CPU (s):
0.3988690119999774
GPU (s):
0.03919492199997876
GPU speedup over CPU: 10x


### Loading the MNIST Dataset

In [4]:
mnist = tf.keras.datasets.mnist

In [5]:
(train_images, train_labels) , (test_images, test_labels) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [6]:
#Lets look at the training data
print("Training Images Shape: ",train_images.shape)
print("Training Labels: ",train_labels)


#Lets look at the testing data
print("Testing Images Shape: ",test_images.shape)
print("Testing Labels: ",test_labels)

Training Images Shape:  (60000, 28, 28)
Training Labels:  [5 0 4 ... 5 6 8]
Testing Images Shape:  (10000, 28, 28)
Testing Labels:  [7 2 1 ... 4 5 6]


### Normalizing the Images

In [7]:
train_images = train_images.astype('float32')
test_images = test_images.astype('float32')
train_images/=255
test_images/=255

### Creating the Baseline Model

In [85]:
baseline = keras.Sequential([
    layers.Flatten(input_shape=(28, 28)),
    layers.Dense(1024, activation='relu', name='first_layer'),
    layers.Dense(1024, activation='relu', name='second_layer'),
    layers.Dense(1024, activation='relu', name='third_layer'),
    layers.Dense(1024, activation='relu', name='fourth_layer'),
    layers.Dense(1024, activation='relu', name="fifth_layser"),
    layers.Dense(10, name='output' ,activation='softmax')
])
print(baseline.summary())

Model: "sequential_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_12 (Flatten)        (None, 784)               0         
                                                                 
 first_layer (Dense)         (None, 1024)              803840    
                                                                 
 second_layer (Dense)        (None, 1024)              1049600   
                                                                 
 third_layer (Dense)         (None, 1024)              1049600   
                                                                 
 fourth_layer (Dense)        (None, 1024)              1049600   
                                                                 
 fifth_layser (Dense)        (None, 1024)              1049600   
                                                                 
 output (Dense)              (None, 10)              

In [86]:
baseline.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
)

In [87]:
history_baseline = baseline.fit(train_images , train_labels ,epochs = 30, batch_size = 256)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [88]:
evaluation = baseline.evaluate(test_images , test_labels)



### Saving the Baseline Model

In [10]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [8]:
model_path = '/content/drive/MyDrive/DLS_Assignments/Models/assign4_part1_baseline.h5'

In [91]:
#model_path = '/content/drive/MyDrive/DLS_Assignments/Models/assign4_part1_baseline.h5'
baseline.save(model_path)

### Loading the saved model

In [11]:
baseline = keras.models.load_model(model_path)

### Applying SVD on weight matrices of every layer

In [12]:
layer_1 = baseline.get_layer('output')

In [13]:
layer_1.get_weights()[0].shape

(1024, 10)

In [14]:
len(baseline.layers)

7

In [15]:
baseline.layers[2].get_weights()

[array([[ 0.04147477, -0.04297014,  0.01650734, ..., -0.00344087,
         -0.00032141,  0.04433878],
        [-0.00768789,  0.00075719,  0.04130481, ..., -0.01158604,
          0.06521795, -0.03428532],
        [ 0.03143325, -0.03816259, -0.0555243 , ..., -0.00832305,
          0.03612668, -0.03685714],
        ...,
        [-0.09021768,  0.01226015, -0.09118644, ..., -0.08706104,
          0.11366595, -0.02852068],
        [-0.09176303, -0.01390115, -0.04507038, ..., -0.04366718,
          0.00597748, -0.06752308],
        [-0.05622025, -0.03835107,  0.01046084, ...,  0.07575639,
          0.00369204, -0.02010996]], dtype=float32),
 array([-0.07401229, -0.07578075, -0.04443052, ..., -0.04017141,
        -0.02489184, -0.03327626], dtype=float32)]

In [16]:
s, U, V = tf.linalg.svd(baseline.layers[2].get_weights()[0])
S = tf.linalg.diag(s)
V_T = tf.transpose(V)

In [17]:
S.shape

TensorShape([1024, 1024])

In [18]:
s.shape

TensorShape([1024])

In [19]:
V_T.shape

TensorShape([1024, 1024])

In [20]:
V.shape

TensorShape([1024, 1024])

Performing matrix multiplication for SVD:


In [21]:
A_svd = U@S@V_T

In [22]:
A_svd

<tf.Tensor: shape=(1024, 1024), dtype=float32, numpy=
array([[ 0.0414734 , -0.04295873,  0.0165107 , ..., -0.00341044,
        -0.00030622,  0.04435263],
       [-0.0076798 ,  0.00076753,  0.04131874, ..., -0.01157343,
         0.06523549, -0.03425943],
       [ 0.03143236, -0.03815509, -0.05552064, ..., -0.00831379,
         0.03612494, -0.03687633],
       ...,
       [-0.09020642,  0.01229865, -0.09121983, ..., -0.0870466 ,
         0.11367158, -0.02854278],
       [-0.09170499, -0.01388217, -0.04507461, ..., -0.04368113,
         0.00597113, -0.06751347],
       [-0.05619104, -0.03833483,  0.01044659, ...,  0.07578545,
         0.00372933, -0.02013971]], dtype=float32)>

In [23]:
A_svd.shape

TensorShape([1024, 1024])

In [24]:
bias = baseline.layers[2].get_weights()[1]

In [25]:
bias

array([-0.07401229, -0.07578075, -0.04443052, ..., -0.04017141,
       -0.02489184, -0.03327626], dtype=float32)

In [26]:
#baseline.layers[2].set_weights([A_svd,bias])

In [27]:
#baseline.layers[2].get_weights()

In [28]:
A_svd.shape

TensorShape([1024, 1024])

In [71]:
baseline_full = keras.models.clone_model(baseline)

In [72]:
for i in range(1,len(baseline_full.layers)-1):
  weights = baseline_full.layers[i].get_weights()[0]
  bias = baseline_full.layers[i].get_weights()[1]
  s, U, V = tf.linalg.svd(weights)
  S = tf.linalg.diag(s)
  V_T = tf.transpose(V)
  weights_svd = U@S@V_T
  baseline_full.layers[i].set_weights([weights_svd,bias])

### Performing Low Rank Approximation for different values of D

In [73]:
baseline_10 = keras.models.clone_model(baseline)
baseline_20 = keras.models.clone_model(baseline)
baseline_50 = keras.models.clone_model(baseline)
baseline_100 = keras.models.clone_model(baseline)
baseline_200 = keras.models.clone_model(baseline)

In [74]:
def rank_r_approx(s, U, V, r, verbose=False):
  # Compute the matrices necessary for a rank-r approximation
  s_r, U_r, V_r = s[..., :r], U[..., :, :r], V[..., :, :r] # ... implies any number of extra batch axes
  # Compute the low-rank approximation and its size
  A_r = tf.einsum('...s,...us,...vs->...uv',s_r,U_r,V_r)
  A_r_size = tf.size(U_r) + tf.size(s_r) + tf.size(V_r)
  if verbose:
    print(f"Approximation Size: {A_r_size}")
  return A_r

In [75]:
def change_weights(model,r):
  for i in range(1,len(model.layers)-1):
    weights = model.layers[i].get_weights()[0]
    bias = model.layers[i].get_weights()[1]
    s, U, V = tf.linalg.svd(weights)
    weights_approx = rank_r_approx(s, U, V, r, verbose = False)
    model.layers[i].set_weights([weights_approx,bias])
  return model

#### For D=10

In [76]:
baseline_10 = change_weights(baseline_10,10)

#### For D=20

In [77]:
baseline_20 = change_weights(baseline_20,20)

#### For D=50

In [78]:
baseline_50 = change_weights(baseline_50,50)

#### For D=100

In [79]:
baseline_100 = change_weights(baseline_100,100)

#### For D=200

In [80]:
baseline_200 = change_weights(baseline_200,200)

## Training all the 6 models

#### Baseline Full

In [81]:
baseline_full.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
)

In [82]:
history_baseline_full = baseline_full.fit(train_images , train_labels ,epochs = 30, batch_size = 256)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [65]:
test_full = baseline_full.evaluate(test_images , test_labels)

 46/313 [===>..........................] - ETA: 0s - loss: 0.1537 - sparse_categorical_accuracy: 0.9762

  return dispatch_target(*args, **kwargs)




#### Baseline 10

In [55]:
baseline_10.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
)

In [56]:
history_baseline_10 = baseline_10.fit(train_images , train_labels ,epochs = 30, batch_size = 256)

Epoch 1/30


  return dispatch_target(*args, **kwargs)


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [66]:
test_10 = baseline_10.evaluate(test_images , test_labels)

 24/313 [=>............................] - ETA: 0s - loss: 0.0985 - sparse_categorical_accuracy: 0.9896 

  return dispatch_target(*args, **kwargs)




#### Baseline 20

In [57]:
baseline_20.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
)

In [58]:
history_baseline_20 = baseline_20.fit(train_images , train_labels ,epochs = 30, batch_size = 256)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [67]:
test_20 = baseline_20.evaluate(test_images , test_labels)

 42/313 [===>..........................] - ETA: 0s - loss: 0.1591 - sparse_categorical_accuracy: 0.9762

  return dispatch_target(*args, **kwargs)




#### Baseline 50

In [59]:
baseline_50.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
)

In [60]:
history_baseline_50 = baseline_50.fit(train_images , train_labels ,epochs = 30, batch_size = 256)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [68]:
test_50 = baseline_50.evaluate(test_images , test_labels)

 46/313 [===>..........................] - ETA: 0s - loss: 0.1465 - sparse_categorical_accuracy: 0.9769

  return dispatch_target(*args, **kwargs)




#### Baseline 100

In [61]:
baseline_100.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
)

In [62]:
history_baseline_100 = baseline_100.fit(train_images , train_labels ,epochs = 30, batch_size = 256)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [69]:
test_100 = baseline_100.evaluate(test_images , test_labels)

 43/313 [===>..........................] - ETA: 0s - loss: 0.1733 - sparse_categorical_accuracy: 0.9789

  return dispatch_target(*args, **kwargs)




#### Baseline 200

In [63]:
baseline_200.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
)

In [64]:
history_baseline_200 = baseline_200.fit(train_images , train_labels ,epochs = 30, batch_size = 256)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [70]:
test_200 = baseline_200.evaluate(test_images , test_labels)

 23/313 [=>............................] - ETA: 0s - loss: 0.1030 - sparse_categorical_accuracy: 0.9837     

  return dispatch_target(*args, **kwargs)




In [107]:
s, U, V = tf.linalg.svd(baseline.layers[2].get_weights()[0])

In [108]:
tf.size(baseline.layers[2].get_weights()[0])

<tf.Tensor: shape=(), dtype=int32, numpy=1048576>

In [109]:
A_10, A_10_size  = rank_r_approx(s, U, V, 10, verbose = True)

Approximation Size: 20490


In [110]:
A_10.shape

TensorShape([1024, 1024])

In [111]:
A_10

<tf.Tensor: shape=(1024, 1024), dtype=float32, numpy=
array([[ 8.58276710e-03, -1.46560371e-02, -1.68197062e-02, ...,
         2.82411948e-02,  1.07880663e-02,  1.45007642e-02],
       [-1.31638860e-02, -1.20625915e-02, -2.33423319e-02, ...,
        -2.35342290e-02, -1.10373730e-02, -1.59793894e-03],
       [-8.47995543e-05, -7.53484294e-03, -1.41212456e-02, ...,
         3.09202168e-03, -2.77012750e-03, -9.30951163e-03],
       ...,
       [-2.05020532e-02,  1.59101635e-02, -2.70265192e-02, ...,
        -7.23445565e-02,  2.10185628e-02,  4.11602715e-03],
       [-5.37080280e-02, -2.66314633e-02,  7.24346889e-03, ...,
        -7.23579451e-02,  1.36814709e-03, -2.36150771e-02],
       [-2.12653838e-02, -1.22296214e-02,  4.48545180e-02, ...,
         2.44082510e-03, -3.73425148e-02, -2.47329334e-03]], dtype=float32)>

In [112]:
baseline.layers[2].get_weights()[0]

array([[ 0.04147477, -0.04297014,  0.01650734, ..., -0.00344087,
        -0.00032141,  0.04433878],
       [-0.00768789,  0.00075719,  0.04130481, ..., -0.01158604,
         0.06521795, -0.03428532],
       [ 0.03143325, -0.03816259, -0.0555243 , ..., -0.00832305,
         0.03612668, -0.03685714],
       ...,
       [-0.09021768,  0.01226015, -0.09118644, ..., -0.08706104,
         0.11366595, -0.02852068],
       [-0.09176303, -0.01390115, -0.04507038, ..., -0.04366718,
         0.00597748, -0.06752308],
       [-0.05622025, -0.03835107,  0.01046084, ...,  0.07575639,
         0.00369204, -0.02010996]], dtype=float32)