In [21]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import Model

import numpy as np
import matplotlib.pyplot as plt

import datetime
%load_ext tensorboard

In [2]:
def get_data():
    mnist = tf.keras.datasets.mnist
    (x_train, y_train), (x_test, y_test) = mnist.load_data()

    x_train, x_test = x_train / 255.0, x_test / 255.0
    y_train = tf.one_hot(y_train, 10)
    y_test = tf.one_hot(y_test, 10)
    
    print(
        f'Training size: {len(x_train)}',
        f'Test size: {len(x_test)}'
    )

    train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000, reshuffle_each_iteration=True).batch(100)
    test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(100)
    return train_ds, test_ds

train_ds, test_ds = get_data()

Training size: 60000 Test size: 10000


2022-12-04 19:07:03.395101: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-12-04 19:07:03.395120: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)
2022-12-04 19:07:03.395133: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (kang-arch): /proc/driver/nvidia/version does not exist
2022-12-04 19:07:03.395347: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [14]:
class MyModel(Model):
    def __init__(self):
        super(MyModel, self).__init__()
        
    def summary(self, input_shape=(28,28)):
        x = layers.Input(shape=input_shape)
        model = tf.keras.Model(inputs=[x], outputs=self.call(x))
        print(model.summary())
    
    def default_kernel_initializer(self):
        return keras.initializers.TruncatedNormal(
            mean=0.0,
            stddev=0.01,
            seed=1000
        )
    
    def default_bias_initializer(self):
         return keras.initializers.Constant(-0.1)
        
    def create_dense_layer(self, neuron_amount, activation, name):
        return layers.Dense(
            neuron_amount,
            activation=activation,
#             kernel_initializer=self.default_kernel_initializer(),
#             bias_initializer=self.default_bias_initializer(),
            name=name)
    
class LinearModel(MyModel):
    def __init__(self):
        super(LinearModel, self).__init__()
        self.flatten = layers.Flatten(input_shape=(28,28), name='flatten')
        self.o = layers.Dense(10, activation='softmax', name='output')
    
    def call(self, x):
        x = self.flatten(x)
        x = self.o(x)
        return x
    
class MlpModel(MyModel):
    def __init__(self):
        super(MlpModel, self).__init__()
        
        self.flatten = layers.Flatten(input_shape=(28,28), name='flatten')
        self.h1 = self.create_dense_layer(1500, 'relu', 'hidden_1')
        self.h2 = self.create_dense_layer(1500, 'relu', 'hidden_2')
        self.h3 = self.create_dense_layer(1500, 'relu', 'hidden_3')
        self.o = self.create_dense_layer(10, 'softmax', 'output')
     
    def call(self, x):
        x = self.flatten(x)
        x = self.h1(x)
        x = self.h2(x)
        x = self.h3(x)
        x = self.o(x)
        return x
    
class MlpDropoutModel(MyModel):
    def __init__(self):
        super(MlpDropoutModel, self).__init__()
        
        self.flatten = layers.Flatten(input_shape=(28,28), name='flatten')
        self.h1 = self.create_dense_layer(1500, 'relu', 'hidden_1')
        self.h2 = self.create_dense_layer(1500, 'relu', 'hidden_2')
        self.h3 = self.create_dense_layer(1500, 'relu', 'hidden_3')
        self.o = self.create_dense_layer(10, 'softmax', 'output')
        
        dropout_rate = 0.5
        self.dropout_layer1 = layers.Dropout(rate=dropout_rate, name='dropout_1')
        self.dropout_layer2 = layers.Dropout(rate=dropout_rate, name='dropout_2')
        self.dropout_layer3 = layers.Dropout(rate=dropout_rate, name='dropout_3')
    
    def call(self, x):
        x = self.flatten(x)
        x = self.h1(x)
        x = self.dropout_layer1(x)
        x = self.h2(x)
        x = self.dropout_layer2(x)
        x = self.h3(x)
        x = self.dropout_layer3(x)
        x = self.o(x)
        return x

class ConvModel(MyModel):
    def __init__(self):
        super(ConvModel, self).__init__()
        
        
        self.re = layers.Reshape((28,28,1), name='add_channel')
        
        self.conv1 = self.create_conv_2D_layer(32, 'conv1')
        self.maxp1 = self.create_max_pooling_layer('maxp1')
        self.conv2 = self.create_conv_2D_layer(64, 'conv2')
        self.maxp2 = self.create_max_pooling_layer('maxp2')
        
        self.f = layers.Flatten()
        self.o = self.create_dense_layer(10, 'softmax', 'output')
        
    def create_conv_2D_layer(self, filter_amount, name):
        return layers.Conv2D(
            filters=filter_amount,
            kernel_size=(5,5), 
            activation='relu',
            padding='same',
            strides=(1,1),
#             kernel_initializer=self.default_kernel_initializer(),
#             bias_initializer=self.default_bias_initializer(),
            name=name
        )
    
    def create_max_pooling_layer(self, name):
        return layers.MaxPooling2D(
            pool_size=(2,2),
            name=name
        )
    
    def call(self, x):
        
        x = self.re(x)
        x = self.conv1(x)
        x = self.maxp1(x)
        x = self.conv2(x)
        x = self.maxp2(x)
        x = self.f(x)
        x = self.o(x)
    
        return x
   
    
model = MlpModel()
model.summary()

Model: "model_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_5 (InputLayer)        [(None, 28, 28)]          0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 hidden_1 (Dense)            (None, 1500)              1177500   
                                                                 
 hidden_2 (Dense)            (None, 1500)              2251500   
                                                                 
 hidden_3 (Dense)            (None, 1500)              2251500   
                                                                 
 output (Dense)              (None, 10)                15010     
                                                                 
Total params: 5,695,510
Trainable params: 5,695,510
Non-tra

Note: It is possible to bake the tf.nn.softmax function into the activation function for the last layer of the network. While this can make the model output more directly interpretable, this approach is discouraged as it's impossible to provide an exact and numerically stable loss calculation for all models when using a softmax output.


https://www.tensorflow.org/tutorials/quickstart/beginner

In [48]:
class Trainer():

    def __init__(self, optimizer):
        self.optimizer = optimizer
        
        self.loss_object = keras.losses.CategoricalCrossentropy(from_logits=False)
        
        self.train_loss = tf.keras.metrics.Mean(name='train_loss')
        self.train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')
        
        self.test_loss = tf.keras.metrics.Mean(name='test_loss')
        self.test_accuracy = tf.keras.metrics.CategoricalAccuracy(name='test_accuracy')
    
    @tf.function
    def train_step(self, model, optimizer, x, yt):
        with tf.GradientTape() as tape:
            predictions = model(x, training=True)
            loss = self.loss_object(yt, predictions)
            
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        
        self.train_loss(loss)
        self.train_accuracy(yt, predictions)
        
    @tf.function
    def test_step(self, model, x, yt):
        predictions = model(x, training=False)
        loss = self.loss_object(yt, predictions)
        
        self.test_loss(loss)
        self.test_accuracy(yt, predictions)
    
    def train(self, train_ds, model, name, epochs):
        
        train_summary_writer, test_summary_writer = self.create_summary_writer(name)
        
        i = 0
        for epoch in range(epochs):
            for x, yt in train_ds:
                if(i%100 == 0):
                    self.reset_states()
#                     print(f'Iteration {i//100}')
                    self.train_step(model, self.optimizer, x, yt)

                    for x, yt in test_ds:
                        self.test_step(model, x, yt)

                    with train_summary_writer.as_default():
                        tf.summary.scalar('accuracy:', self.train_accuracy.result(), step=i//100) 

                    with test_summary_writer.as_default():
                        tf.summary.scalar('accuracy:', self.test_accuracy.result(), step=i//100) 
            
                i += 1
                
            
            print(
                f'Epoch {epoch + 1}',
#                 f'Loss: {self.train_loss.result()}',
                f'Accuracy: {self.train_accuracy.result()}',
#                 f'Test Loss: {self.test_loss.result()}',
                f'Test Accuracy: {self.test_accuracy.result()}',
            )
        
    def create_summary_writer(self, name):
        current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
        train_log_dir = 'logs/' + name + '/' + current_time + '/train'
        test_log_dir = 'logs/' + name + '/' + current_time + '/test'
        train_summary_writer = tf.summary.create_file_writer(train_log_dir)
        test_summary_writer = tf.summary.create_file_writer(test_log_dir) 
        
        return train_summary_writer, test_summary_writer
    
    def reset_states(self):
        self.train_loss.reset_states()
        self.train_accuracy.reset_state()
        self.test_loss.reset_states()
        self.test_accuracy.reset_states()

In [53]:
class Result():
    
    def linear_model(self):
        model = LinearModel()
        
        optimizer = tf.optimizers.experimental.SGD(0.5)
        trainer = Trainer(optimizer)
    
        # 16 epchos = 10000 iteration
        trainer.train(train_ds, model, 'linear_model', 16)
    
    def mlp_model(self):
        model = MlpModel() 
        
        optimizer = tf.optimizers.Adam(
            learning_rate=0.001,
            beta_1=0.9,
            beta_2=0.999,
            epsilon=1e-08
        )
        trainer = Trainer(optimizer)
        
        trainer.train(train_ds, model, 'mlp_model', 32)
        
    def mlpdropout_model(self):
        model = MlpDropoutModel() 
        
        optimizer = tf.optimizers.Adam(
            learning_rate=0.001,
            beta_1=0.9,
            beta_2=0.999,
            epsilon=1e-08
        )
        trainer = Trainer(optimizer)
        trainer.train(train_ds, model, 'mlp_dropout_model', 32)
        
    def conv_model(self):
        model = ConvModel() 
        
        optimizer = tf.optimizers.Adam(
            learning_rate=0.001,
            beta_1=0.9,
            beta_2=0.999,
            epsilon=1e-08
        )
        trainer = Trainer(optimizer)
        trainer.train(train_ds, model, 'conv_model', 32)
    
result = Result()
result.linear_model()
result.mlp_model()
result.mlpdropout_model()
result.conv_model()

Epoch 1 Accuracy: 0.8399999737739563 Test Accuracy: 0.7199000120162964
Epoch 2 Accuracy: 0.7799999713897705 Test Accuracy: 0.8111000061035156
Epoch 3 Accuracy: 0.949999988079071 Test Accuracy: 0.8580999970436096
Epoch 4 Accuracy: 0.8199999928474426 Test Accuracy: 0.8661999702453613
Epoch 5 Accuracy: 0.8600000143051147 Test Accuracy: 0.8860999941825867
Epoch 6 Accuracy: 0.9399999976158142 Test Accuracy: 0.909600019454956
Epoch 7 Accuracy: 0.8899999856948853 Test Accuracy: 0.9010000228881836
Epoch 8 Accuracy: 0.9100000262260437 Test Accuracy: 0.9186999797821045
Epoch 9 Accuracy: 0.9399999976158142 Test Accuracy: 0.9128000140190125
Epoch 10 Accuracy: 0.949999988079071 Test Accuracy: 0.9301000237464905
Epoch 11 Accuracy: 0.9300000071525574 Test Accuracy: 0.9254999756813049
Epoch 12 Accuracy: 0.9200000166893005 Test Accuracy: 0.9287999868392944
Epoch 13 Accuracy: 0.8899999856948853 Test Accuracy: 0.9369999766349792
Epoch 14 Accuracy: 0.9399999976158142 Test Accuracy: 0.9340000152587891
Epoc

In [54]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6007 (pid 153067), started 0:14:05 ago. (Use '!kill 153067' to kill it.)