## Load MNIST Dataset
### MNIST Dataset was loaded from Tensorflow package from -- https://www.tensorflow.org/datasets/catalog/mnist
Load the MNIST dataset is downloaded using the tensor flow package, with information in the link above. The link given in the assignment is now password protected, so I pulled it directly from the TensorFlow package instead.

In [18]:
import tensorflow as tf
import numpy as np

def get_mnist_dataset():
    """
    Grabs the MNIST dataset from TensorFlow.Keras and returns it in a usable form.
    :return: x_train, y_train, x_test, y_test
    """
    mnist = tf.keras.datasets.mnist
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    return x_train, y_train, x_test, y_test


## Soft-max Regression Classifier
This section will define method to use the soft-max regression classifier to classify the MNIST dataset.
The classifier will have optional parameters to use or not use dropout, and to use or not use batch normalization.

In [19]:
class SoftmaxRegressionClassifier:
  def __init__(self, x_train, y_train, dropout=False, batch_norm=False):
    # Constant parameters for training
    self.learning_rate = 0.01
    self.batch_size = 100
    self.training_iterations = 5
    self.dropout_rate = 0.2
    # Store training dataset
    self.x_train = x_train
    self.y_train = y_train
    self.train_dataset = tf.data.Dataset.from_tensor_slices((self.x_train, self.y_train))
    # Store training params
    self.dropout = dropout
    self.batch_norm = batch_norm
    # Initialize weights, biases, and optimizer
    num_images = len(self.x_train)
    image_size = len(self.x_train[0][0])*len(self.x_train[0][1])
    self.weights = tf.Variable(tf.random.normal([image_size, num_images]), name="weights")
    self.biases = tf.Variable(tf.random.normal([num_images]), name="biases")

  def calculate_raw_model_output(self, x_data):
    """
    Calculates the raw output of the model, i.e. the logits or y_predicted. Also optionally applies batch normalization or dropout.
    :return: raw_model_output (logits)
    """
    # !!! Blows up here !!!
    raw_model_output = tf.add(tf.matmul(x_data, self.weights) + self.biases)

    # Optionally apply the batch normalization
    if self.batch_norm:
        raw_model_output = self.batch_norm(raw_model_output)
    # Optionally apply dropout
    if self.dropout:
        raw_model_output = tf.nn.dropout(raw_model_output, rate=self.dropout_rate)
    return raw_model_output

  def train_model(self):
    optimizer = tf.optimizers.Adam(learning_rate=self.learning_rate)
    trainable_variables = [self.weights, self.biases]

    # Iterate through the entire training dataset training_iterations times
    for iterations in range(self.training_iterations):
        with tf.GradientTape() as tape:
            y_pred = self.calculate_raw_model_output(self.x_train)
            # Calculate loss using softmax cross entropy, then reduce mean
            loss = tf.nn.softmax_cross_entropy_with_logits(logits=raw_model_output, labels=trainable_variables)
            loss = tf.reduce_mean(loss)

        gradients = tape.gradient(loss, trainable_variables)
        # Apply gradients
        optimizer.apply_gradients(zip(gradients, trainable_variables))

    # Calculate and display training accuracy every iteration through the dataset
    train_accuracy = self.calculate_accuracy(x_train, y_train)
    print(f"Epoch {epoch + 1}, Training Accuracy: {train_accuracy.numpy()}")

  def accuracy_external(self, x_test_data, y_test_data):
    predictions = self.calculate_raw_model_output(x_test_data)
    predictions = tf.nn.softmax(predictions)
    correct_prediction = tf.equal(tf.argmax(predictions, 1), tf.argmax(y_test_data, 1))
    return tf.reduce_mean(tf.cast(correct_prediction, tf.float32), axis=-1)

  def accuracy_internal(self):
    predictions = self.calculate_raw_model_output(self.x_train)
    predictions = tf.nn.softmax(predictions)
    correct_prediction = tf.equal(tf.argmax(predictions, 1), tf.argmax(self.y_train, 1))
    return tf.reduce_mean(tf.cast(correct_prediction, tf.float32), axis=-1)



In [22]:
x_train, y_train, x_test, y_test = get_mnist_dataset()


num_images = len(x_train)
image_size = len(x_train[0][0])*len(x_train[0][1])

print(f"Number of images: {num_images}")
print(f"Image size: {image_size}")
print("test")

softmax = SoftmaxRegressionClassifier(x_train, y_train, dropout=False, batch_norm=False)
softmax.train_model()
train_accuracy = softmax.accuracy_internal()
ext_accuracy = softmax.accuracy_external(x_test, y_test)




# print(train_dataset)
# print(f"Shape of training tensor: {train_dataset.shape}")
# print(f"Datatype of training tensor: {train_dataset.dtype}")
# print(f"Device training tensor is stored on: {train_dataset.device}")




Number of images: 60000
Image size: 784
test


NotFoundError: Could not find device for node: {{node BatchMatMulV2}} = BatchMatMulV2[T=DT_UINT8, adj_x=false, adj_y=false]
All kernels registered for op BatchMatMulV2:
  device='CPU'; T in [DT_HALF]
  device='CPU'; T in [DT_BFLOAT16]
  device='CPU'; T in [DT_FLOAT]
  device='CPU'; T in [DT_DOUBLE]
  device='CPU'; T in [DT_INT16]
  device='CPU'; T in [DT_INT32]
  device='CPU'; T in [DT_INT64]
  device='CPU'; T in [DT_COMPLEX64]
  device='CPU'; T in [DT_COMPLEX128]
  device='XLA_CPU_JIT'; T in [DT_FLOAT, DT_DOUBLE, DT_INT32, DT_UINT8, DT_INT16, DT_COMPLEX64, DT_INT64, DT_BFLOAT16, DT_UINT16, DT_COMPLEX128, DT_HALF, DT_UINT32, DT_UINT64]
 [Op:BatchMatMulV2] name: 