<a href="https://colab.research.google.com/github/pure-water/462deferred/blob/master/GPUvsTPU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Why TPUs ?

In [1]:
import numpy as np

def main():
    # Define the PV and Q matrices as provided
    PV = np.array([
        [0.839099,  0.000000, -0.000604, -0.000091],
        [-0.000238, 0.771070, -0.330967, -0.126752],
        [-0.000688, -0.410530, -0.956432,  1.249704],
        [-0.000661, -0.394431, -0.918925,  1.592853]
    ])

    Q = np.array([
        [12.755102, 0.000000, 0.000000, 0.000000],
        [0.000000, 12.755102, 0.000000, 0.000000],
        [0.000000, 0.000000, 12.755102, 0.000000],
        [0.000000, 0.000000, 0.000000, -1.000000]
    ])

    print("PV:")
    print(PV, "\n")

    # Compute the inverse of PV
    PV_inv = np.linalg.inv(PV)
    print("PV_inv:")
    print(PV_inv, "\n")

    # Check that PV_inv * PV is the identity matrix (or close enough)
    identity_approx = np.dot(PV_inv, PV)
    print("PV_inv * PV (should be identity):")
    print(identity_approx, "\n")

    # Compute the transpose of PV_inv
    PV_inv_T = PV_inv.T

    # Compute QEP: QEP = (PV_inv)^T * Q * (PV_inv)
    QEP = PV_inv_T @ Q @ PV_inv
    print("QEP (Projected Quadratic Error Matrix):")
    print(QEP, "\n")

    # Cross validation:
    # Recover Q by transforming back: Q_recon = (PV)^T * QEP * PV
    Q_recon = PV.T @ QEP @ PV
    print("Reconstructed Q (should match original Q):")
    print(Q_recon, "\n")

    # Check how close the reconstruction is to the original Q
    if np.allclose(Q, Q_recon, atol=1e-6):
        print("Cross validation successful: Reconstructed Q is close to the original Q.")
    else:
        diff = np.abs(Q - Q_recon)
        print("Cross validation failed: Maximum difference =", np.max(diff))

if __name__ == "__main__":
    main()


PV:
[[ 8.390990e-01  0.000000e+00 -6.040000e-04 -9.100000e-05]
 [-2.380000e-04  7.710700e-01 -3.309670e-01 -1.267520e-01]
 [-6.880000e-04 -4.105300e-01 -9.564320e-01  1.249704e+00]
 [-6.610000e-04 -3.944310e-01 -9.189250e-01  1.592853e+00]] 

PV_inv:
[[ 1.19175387e+00 -3.38361268e-04 -2.74197102e-03  2.19242691e-03]
 [-1.38309426e-07  1.09513314e+00 -1.87935053e+00  1.56162696e+00]
 [-8.57295370e-04 -4.70063976e-01 -3.44012420e+00  2.66161117e+00]
 [-5.98878021e-08  4.37817601e-07 -2.45000138e+00  2.55000090e+00]] 

PV_inv * PV (should be identity):
[[ 1.00000000e+00  6.38937659e-20  5.20507609e-19 -1.04672248e-18]
 [ 2.22000659e-19  1.00000000e+00  1.36311600e-16 -4.43943858e-16]
 [ 5.69352994e-19  5.69952542e-17  1.00000000e+00 -1.02536479e-15]
 [ 1.76631954e-19  4.13258087e-17  1.95374835e-16  1.00000000e+00]] 

QEP (Projected Quadratic Error Matrix):
[[ 1.81157911e+01 -5.24423609e-06 -4.05999492e-03  4.21992742e-03]
 [-5.24423609e-06  1.81157761e+01 -5.62575992e+00  5.85537089e+00]

In [2]:
import numpy as np

def main():
    # Define the PV and Q matrices as provided.
    PV = np.array([
        [0.839099,  0.000000, -0.000604, -0.000091],
        [-0.000238, 0.771070, -0.330967, -0.126752],
        [-0.000688, -0.410530, -0.956432,  1.249704],
        [-0.000661, -0.394431, -0.918925,  1.592853]
    ])

    Q = np.array([
        [12.755102, 0.000000, 0.000000, 0.000000],
        [0.000000, 12.755102, 0.000000, 0.000000],
        [0.000000, 0.000000, 12.755102, 0.000000],
        [0.000000, 0.000000, 0.000000, -1.000000]
    ])

    print("PV:")
    print(PV, "\n")

    print("Q:")
    print(Q, "\n")

    # Compute the inverse of PV.
    try:
        PV_inv = np.linalg.inv(PV)
    except np.linalg.LinAlgError:
        print("PV is not invertible.")
        return

    # Verify that PV_inv * PV is approximately the identity matrix.
    identity_approx = PV_inv @ PV
    print("PV_inv * PV (should be identity):")
    print(identity_approx, "\n")

    # Compute QEP using the formula: QEP = (PV_inv)^T * Q * (PV_inv)
    QEP_python = PV_inv.T @ Q @ PV_inv
    print("QEP computed in Python:")
    print(QEP_python, "\n")

    # Plug in your pre-calculated QEP values.
    # These values were provided in your post:
    # discriminant: -1312.725220
    # Conic Type: Ellipse
    # QEP:
    # [[18.115772, -0.000000, -0.004019, 0.004183],
    #  [-0.000000, 18.115778, -5.625710, 5.855333],
    #  [-0.004019, -5.625712, 189.997421, -147.975769],
    #  [0.004183, 5.855332, -147.975754, 114.962334]]
    my_calculated_QEP = np.array([
        [18.115772, -0.000000, -0.004019,  0.004183],
        [-0.000000, 18.115778, -5.625710,  5.855333],
        [-0.004019, -5.625712, 189.997421, -147.975769],
        [ 0.004183,  5.855332, -147.975754, 114.962334]
    ])

    print("Your pre-calculated QEP:")
    print(my_calculated_QEP, "\n")

    # Compare the two QEP matrices element-by-element.
    if np.allclose(QEP_python, my_calculated_QEP, atol=1e-6):
        print("The Python computed QEP matches your calculated QEP within the tolerance.")
    else:
        diff = QEP_python - my_calculated_QEP
        print("The Python computed QEP does NOT match your calculated QEP.")
        print("Difference matrix:")
        print(diff)
        max_diff = np.max(np.abs(diff))
        print(f"Maximum absolute difference: {max_diff}")

if __name__ == "__main__":
    main()


PV:
[[ 8.390990e-01  0.000000e+00 -6.040000e-04 -9.100000e-05]
 [-2.380000e-04  7.710700e-01 -3.309670e-01 -1.267520e-01]
 [-6.880000e-04 -4.105300e-01 -9.564320e-01  1.249704e+00]
 [-6.610000e-04 -3.944310e-01 -9.189250e-01  1.592853e+00]] 

Q:
[[12.755102  0.        0.        0.      ]
 [ 0.       12.755102  0.        0.      ]
 [ 0.        0.       12.755102  0.      ]
 [ 0.        0.        0.       -1.      ]] 

PV_inv * PV (should be identity):
[[ 1.00000000e+00  6.38937659e-20  5.20507609e-19 -1.04672248e-18]
 [ 2.22000659e-19  1.00000000e+00  1.36311600e-16 -4.43943858e-16]
 [ 5.69352994e-19  5.69952542e-17  1.00000000e+00 -1.02536479e-15]
 [ 1.76631954e-19  4.13258087e-17  1.95374835e-16  1.00000000e+00]] 

QEP computed in Python:
[[ 1.81157911e+01 -5.24423609e-06 -4.05999492e-03  4.21992742e-03]
 [-5.24423609e-06  1.81157761e+01 -5.62575992e+00  5.85537089e+00]
 [-4.05999492e-03 -5.62575992e+00  1.89997753e+02 -1.47976007e+02]
 [ 4.21992742e-03  5.85537089e+00 -1.47976007e+02

TPUs are tensor processing units developed by Google to  accelerate operations on a Tensorflow Graph. Each TPU packs up to 180 teraflops of floating-point performance and 64 GB of high-bandwidth memory onto a single board. Here is a comparions between TPUs and Nvidia GPUs. The y axis represents # images per seconds and the x axis is different models.

<img src="https://cdn-images-1.medium.com/max/800/1*tVHGjJHJrhKaKECT3Z4CIw.png" alt="Drawing" style="width: 150px;"/>

# Experiement

TPUs were only available on Google cloud but now they are available for free in Colab. We will be comparing TPU vs GPU here on colab using mnist dataset. We will compare the time of each step and epoch against different batch sizes.

# Downoad MNIST

In [None]:
import tensorflow as tf
import os
import numpy as np
from tensorflow.keras.utils import to_categorical

def get_data():

  #Load mnist data set
  (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

  x_train = x_train.astype('float32') / 255
  x_test = x_test.astype('float32') / 255

  x_train = np.expand_dims(x_train, 3)
  x_test = np.expand_dims(x_test, 3)

  y_train = to_categorical(y_train)
  y_test  = to_categorical(y_test)

  return x_train, y_train, x_test, y_test

# Basic CNN

Note that since we need to run the code on TPU we need to do more work. We need to specify the address of the TPU and tell tensorflow to run the model on the TPU cluster

In [None]:
from tensorflow.contrib.tpu.python.tpu import keras_support

def get_model(tpu = False):
  model = tf.keras.Sequential()

  #add layers to the model
  model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=2, padding='same', activation='relu', input_shape=(28,28,1)))
  model.add(tf.keras.layers.MaxPooling2D(pool_size=2))
  model.add(tf.keras.layers.Dropout(0.3))

  model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=2, padding='same', activation='relu'))
  model.add(tf.keras.layers.MaxPooling2D(pool_size=2))
  model.add(tf.keras.layers.Dropout(0.3))

  model.add(tf.keras.layers.Flatten())
  model.add(tf.keras.layers.Dense(256, activation='relu'))
  model.add(tf.keras.layers.Dropout(0.5))
  model.add(tf.keras.layers.Dense(10, activation='softmax'))

  #compile the model
  model.compile(loss='categorical_crossentropy',
               optimizer='adam',
               metrics=['accuracy'])

  #flag to run on tpu
  if tpu:
    tpu_grpc_url = "grpc://"+os.environ["COLAB_TPU_ADDR"]

    #connect the TPU cluster using the address
    tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(tpu_grpc_url)

    #run the model on different clusters
    strategy = keras_support.TPUDistributionStrategy(tpu_cluster_resolver)

    #convert the model to run on tpu
    model = tf.contrib.tpu.keras_to_tpu_model(model, strategy=strategy)
  return model

#GPU vs TPU


In [None]:
x_train, y_train, x_test, y_test = get_data()

Each time you want to run the model on TPU make sure to set the tpu flag and change the enviornment runtime via  Edit> Notebook Setting > Hardware Accelerator > TPU and then click save.

In [None]:
#set tpu = True if you want to run the model on TPU
model = get_model(tpu = False)

In [None]:
model.fit(x_train,
         y_train,
         batch_size=1024,
         epochs=10,
         validation_data=(x_test, y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


# Benchmarks

Note that TPU setup takes some time when compiling the model and distributing the data in the clusters, so the first epoch will take alonger time. I only reported the time for the later epochs. I calculated the average time accross different epochs.

### Epoch Time ($s$)

$$\left[\begin{array}{c|c|c}  
 \textbf{Batch Size} & \textbf{GPU} & \textbf{TPU} \\
 256 & 6s & 6s\\  
 512 & 5s & 3s\\
 1024 & 4s & 2s\\
\end{array}\right]$$

### Step Time ($\mu s$)

$$\left[\begin{array}{c|c|c}  
 \textbf{Batch Size} & \textbf{GPU} & \textbf{TPU} \\
 256 & 94 \mu s & 97 \mu s\\  
 512 & 82 \mu  s& 58 \mu s \\
 1024 & 79 \mu s & 37 \mu s\\
\end{array}\right]$$

# References



*   https://qiita.com/koshian2/items/25a6341c035e8a260a01
*   https://medium.com/tensorflow/hello-deep-learning-fashion-mnist-with-keras-50fcff8cd74a
*   https://blog.riseml.com/benchmarking-googles-new-tpuv2-121c03b71384
*   https://cloudplatform.googleblog.com/2018/02/Cloud-TPU-machine-learning-accelerators-now-available-in-beta.html

