Kevin Casey 

Required import of fashion classifier dataset 

In [7]:
#Input the fashion image dataset in order to train the image classifier
import tensorflow as tf 
fashion_mnist = tf.keras.datasets.fashion_mnist

#Split the input into training values and testing values
(x_train, y_train),(x_test, y_test) = fashion_mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

Example given to replicate without using the keras library in the next code cell

In [None]:
import os, datetime
def create_model():
  return tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10, activation='softmax')
  ])


def train_model():
  model = create_model()
  model.summary()
  model.compile(optimizer='adam',
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy'])

  logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
  tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

  model.fit(x=x_train, 
            y=y_train, 
            epochs=50, 
            validation_data=(x_test, y_test),
            callbacks=[tensorboard_callback])

train_model()


In [None]:
%load_ext tensorboard

In [None]:
%tensorboard --logdir logs

Recreating the above image classifier without using the keras library


In [5]:
#Kevin Casey 

#the install may be required based on current version
#!pip install tensorflow==2.0.0

import tensorflow as tf
from keras.datasets import mnist
import numpy as np
import matplotlib.pyplot as plt
import time as time

batch_size = 128
nb_epoch = 50
nb_classes = 10

#initialize the starting weights and biases
w0 = tf.Variable(
    tf.random.normal(
    shape=[784, 512],
    mean=0.0,
    stddev=1.0,
    seed=43,
    dtype=tf.dtypes.float64),
    trainable=True)
b0 = tf.Variable(
    tf.random.normal(
    shape=[batch_size, 512],
    mean=0.0,
    stddev=1.0,
    seed=43,
    dtype=tf.dtypes.float64),
    trainable=True)

w1 = tf.Variable(
    tf.random.normal(
    shape=[512, 10],
    mean=0.0,
   stddev=1.0,
    seed=43,
    dtype=tf.dtypes.float64),
    trainable=True)
b1 = tf.Variable(
    tf.random.normal(
    shape=[batch_size, 10],
    mean=0.0,
    stddev=1.0,
    seed=43,
    dtype=tf.dtypes.float64),
    trainable=True)



def sm_dense(y, w, b):
  return tf.nn.softmax(tf.matmul(y, w) + b)

def relu_dense(y, w, b):
  return tf.nn.relu(tf.matmul(y, w) + b)



(x_train, y_train),(x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
#swap all data sets to tensors to work with
x_train = tf.convert_to_tensor(x_train, dtype=tf.float64)
x_test = tf.convert_to_tensor(x_test, dtype=tf.float64)
y_train = tf.convert_to_tensor(y_train, dtype=tf.float64)
y_test = tf.convert_to_tensor(y_test, dtype=tf.float64)

#create the model
def multilayer_perceptron(x, w0, b0, w1, b1):
  x = tf.reshape(x, [-1, 784])
  x = relu_dense(x, w0, b0)
  tf.nn.dropout(x, 0.2)
  x = sm_dense(x, w1, b1)
  return x

#I set the learning rate relatively high as it was running into a substantial
#amount of local minima, getting stuck quickly at 0.1
learning_rate = 0.5

start = time.time()

for epoch in range(nb_epoch):
  epoch_time = time.time()
  #create the shuffled batches here and iterate through them
  for batch_iter in range(x_train.shape[0]//128):
    #iterates through the epoch until there are no more training values
    offset = batch_iter * batch_size
    batch_data = x_train[offset:(offset + batch_size), :] 
    batch_labels = y_train[offset:(offset + batch_size):]

    #creates the model and determines the loss to obtain the gradients
    with tf.GradientTape() as tape:
      tape.watch([w0, w1, b0, b1])
      model = multilayer_perceptron(batch_data, w0, b0, w1, b1)
      batch_labels = tf.cast(batch_labels, tf.int64)
      loss = tf.nn.sparse_softmax_cross_entropy_with_logits( 
                          labels=batch_labels, logits=model)
      cost = tf.reduce_mean(loss)    

    gradsw0, gradsw1, gradsb0, gradsb1 = tape.gradient(cost, [w0,w1,b0,b1])

    #the gradient descent steps for each set of weights and biases.
    #the training level is significantly slower than the keras implementation,
    #yet it still learns until a local minima is hit, feedback on why the 
    #training went as slow as it did would be appreciated
    w0 = tf.Variable(w0 - gradsw0*learning_rate*cost)
    w1 = tf.Variable(w1 - gradsw1*learning_rate*cost)
    b0 = tf.Variable(b0 - gradsb0*learning_rate*cost)
    b1 = tf.Variable(b1 - gradsb1*learning_rate*cost)

  print('iter:', epoch, 'loss:', cost.numpy(), "Epoch time: ", time.time() - epoch_time)

#total training time
print("Training time: ", time.time() - start)

#using the test values to test how well the training worked on a new set of data
for batch_iter in range(x_test.shape[0]//128):
  offset = batch_iter * batch_size
  batch_data = x_test[offset:(offset + batch_size), :] 
  batch_labels = y_test[offset:(offset + batch_size):]
  with tf.GradientTape() as tape:
    tape.watch([w0, w1, b0, b1])
    model = multilayer_perceptron(batch_data, w0, b0, w1, b1)
    batch_labels = tf.cast(batch_labels, tf.int64)
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits( 
                            labels=batch_labels, logits=model)
    cost = tf.reduce_mean(loss)    

print("Verify loss: ", cost.numpy())

iter: 0 loss: 2.156429134379184 Epoch time:  2.5958216190338135
iter: 1 loss: 2.155854388329394 Epoch time:  2.428013801574707
iter: 2 loss: 2.1482726634416114 Epoch time:  2.5278735160827637
iter: 3 loss: 2.1486398851850335 Epoch time:  2.431572198867798
iter: 4 loss: 2.1480707813668616 Epoch time:  2.4691028594970703
iter: 5 loss: 2.060290979679099 Epoch time:  2.453275442123413
iter: 6 loss: 2.05455721835415 Epoch time:  2.5925471782684326
iter: 7 loss: 1.9926709408548242 Epoch time:  2.5873537063598633
iter: 8 loss: 1.9759071909067454 Epoch time:  2.652812957763672
iter: 9 loss: 1.9610042312289282 Epoch time:  2.629941701889038
iter: 10 loss: 1.9609136760582064 Epoch time:  2.4646148681640625
iter: 11 loss: 1.96087225811852 Epoch time:  2.4449496269226074
iter: 12 loss: 1.9767786799889877 Epoch time:  2.4581403732299805
iter: 13 loss: 1.960850530435548 Epoch time:  2.4461209774017334
iter: 14 loss: 1.9767007027960417 Epoch time:  2.456088066101074
iter: 15 loss: 1.9608361386371471 

Comparing and optimizing using different hardware run times for calculating matrix multiplication 

In [6]:
#Kevin Casey 11214791 kjc887

#the install is required and a runtime restart is sometimes required as well

#!pip install tensorflow-gpu==2.0.0
from tabulate import tabulate as table
import tensorflow as tf
import numpy as np
import time as time
import sys as sys

print(tf.config.experimental.list_physical_devices())

matrix_sizes = [500, 1000, 5000, 10000]
cpu_times = []
gpu_times = []

#first iteration through matrix_sizes runs on CPU
with tf.device('/device:CPU:0'):
  for size in matrix_sizes:
    #randomly generated matrices to multiply
    matrixA = tf.random.uniform(shape = [size,size],
                                  minval=0,
                                  maxval=None,
                                  dtype=tf.dtypes.float32,
                                  seed=1337,
                                  name='a')

    matrixB = tf.random.uniform(shape = [size,size],
                                  minval=0,
                                  maxval=None,
                                  dtype=tf.dtypes.float32,
                                  seed=1337,
                                  name='b')

    #starts time right before multiplication runs, ends time after function call and adds to to the CPU time list
    start_time = time.time()

    multcpu = tf.matmul(matrixA, matrixB)

    cpu_times.append(time.time() - start_time)


#this will run the exact same as the cpu, besides the device it is initially run on and the list to add the times to
with tf.device('/device:GPU:0'):
  for size in matrix_sizes:
    matrixA = tf.random.uniform(shape = [size, size],
                                  minval=0,
                                  maxval=None,
                                  dtype=tf.dtypes.float32,
                                  seed=1337,
                                  name='a')

    matrixB = tf.random.uniform(shape = [size, size],
                                  minval=0,
                                  maxval=None,
                                  dtype=tf.dtypes.float32,
                                  seed=1337,
                                  name='b')


    start_time = time.time()

    multgpu = tf.matmul(matrixA, matrixB)

    gpu_times.append(time.time() - start_time)

#prints off the table with all data and headers
print(table([["CPU" , cpu_times[0], cpu_times[1], cpu_times[2], cpu_times[3]], 
             ["GPU", gpu_times[0], gpu_times[1], gpu_times[2], gpu_times[3]]], headers=['Device', '500', '1000', '5000', '10000'],
             tablefmt='orgtbl'))

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
| Device   |         500 |        1000 |       5000 |      10000 |
|----------+-------------+-------------+------------+------------|
| CPU      | 0.0497587   | 0.0304668   | 3.57925    | 27.4216    |
| GPU      | 0.000656843 | 0.000158787 | 6.8903e-05 |  0.0132332 |


Finding Jacobian values using 2x2 matrices

In [None]:
#!pip install tensorflow==2.0.0
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

def func(x):
  return (3. * tf.pow(x,2.) + 2.*x + 3)


x1 = tf.Variable([1., 3.], trainable=True)
x2 = tf.Variable([1., 3.], trainable=True)
learning_rate = 0.1

with tf.GradientTape() as tape:
  y1 = func(x1)
jacob = tape.jacobian(y1, x1)
print(jacob)

with tf.GradientTape() as tape:
  y2 = func(x2)
jacob = tape.jacobian(y2, x2)
print(jacob)

#both jacobians combined will work out to [[8, 20], [8, 20]] which results in a
#jacobian value of 0 (finding the determinent of the jacobian matrix)

tf.Tensor(
[[ 8.  0.]
 [ 0. 20.]], shape=(2, 2), dtype=float32)
tf.Tensor(
[[ 8.  0.]
 [ 0. 20.]], shape=(2, 2), dtype=float32)
