In [84]:
import sklearn
import tensorflow as tf
from tensorflow import keras
import numpy as np

mnist = tf.keras.datasets.mnist
layer_amt = 3

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

In [63]:
base_model = tf.keras.models.Sequential()
base_model.add(tf.keras.layers.Flatten(input_shape=(28, 28)))

for x in range(layer_amt): 
    base_model.add(tf.keras.layers.Dense(128, activation='relu'))

base_model.add(tf.keras.layers.Dropout(0.2))
base_model.add(tf.keras.layers.Dense(10))


base_model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
base_model.fit(x_train, y_train, epochs=15)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.src.callbacks.History at 0x2efa0f820>

In [85]:
base_model.evaluate(x_test,  y_test, verbose=2)

313/313 - 0s - loss: 0.1241 - accuracy: 0.9781 - 149ms/epoch - 476us/step


[0.12409665435552597, 0.9781000018119812]

In [69]:
import tensorflow.keras.backend as K
from keras import activations

@keras.saving.register_keras_serializable()
class PCAProj(tf.keras.layers.Layer):
    def __init__(self, weights, k, activation, **kwargs):
        super().__init__(**kwargs)
        self.trainable=False
        self.activation = activations.get(activation)
        self.k = k
        
        # if W is m x n, A is m x k and B is k x n
        w, b = weights
        self.b = K.constant(b)
        if w is not None: 
            u, s, vT = np.linalg.svd(w, full_matrices=False)
            self.A = u[:, :k] @ np.diag(s[:k])
            self.B = vT[:k]
        
    def call(self, inputs):  
        return self.activation(tf.matmul(inputs, tf.matmul(self.A, self.B)) + self.b)

    def get_config(self):
        base_config = super().get_config()
        config = {
            "A": keras.saving.serialize_keras_object(self.A),
            "B": keras.saving.serialize_keras_object(self.B),
            "bias": keras.saving.serialize_keras_object(self.b),
            "k": self.k,
            "activation": self.activation
        }
        return {**base_config, **config}
    
    @classmethod
    def from_config(cls, config):
        A = keras.saving.deserialize_keras_object(config.pop("A"))
        B = keras.saving.deserialize_keras_object(config.pop("B"))
        bias = keras.saving.deserialize_keras_object(config.pop("bias"))
        layer = cls([None, bias], **config)
        layer.A = A
        layer.B = B
        return layer

In [88]:
optimized_model = tf.keras.models.Sequential()
optimized_model.add(tf.keras.layers.Flatten(input_shape=(28, 28)))

weights = base_model.get_weights()

for x in range(layer_amt): 
    optimized_model.add(PCAProj(weights[2*x:2*(x+1)], k = 40, activation='relu'))
optimized_model.add(tf.keras.layers.Dropout(0.2))
optimized_model.add(PCAProj(weights[2*layer_amt:2*(layer_amt+1)], k = 10, activation='relu'))

optimized_model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [90]:
optimized_model.evaluate(x_test,  y_test, verbose=2)

313/313 - 0s - loss: 0.1370 - accuracy: 0.9710 - 143ms/epoch - 458us/step


[0.13704745471477509, 0.9710000157356262]

In [91]:
base_model.save("./base_model.h5")
optimized_model.save("./optimized_model.h5")

In [82]:
loaded_optimized_model = keras.models.load_model("./optimized_model.keras")

OSError: No file or directory found at ./optimized_model.keras

In [12]:
!pip3 install tensorflow==2.11.0

[38;5;240m 1:14  up 7 days,  3:35, 2 users, load averages: 1.73 1.97 2.27 
[35m                          narfee[0m@[35mNicks-MacBook
[35m        |\__/,|   (`\     [36mos[0m        macOS   13.3.1  
[35m      _.|[34mo o[35m  |_   ) )    [36mkernel[0m    Darwin 22.4.0
[35m    [34m-[35m((([34m---[35m((([34m--------    [36mshell[0m     fish

[35m                                         [7m[31m [31m [32m [32m [33m [33m [34m [34m [35m [35m [36m [36m [m
[31mERROR: Could not find a version that satisfies the requirement tensorflow==2.11.0 (from versions: 2.13.0rc0, 2.13.0rc1, 2.13.0rc2, 2.13.0, 2.13.1, 2.14.0rc0, 2.14.0rc1, 2.14.0, 2.14.1, 2.15.0rc0, 2.15.0rc1, 2.15.0)[0m[31m
[0m[31mERROR: No matching distribution found for tensorflow==2.11.0[0m[31m
[0m

In [58]:
low_param_model = tf.keras.models.Sequential()
low_param_model.add(tf.keras.layers.Flatten(input_shape=(28, 28)))
low_param_model.add(tf.keras.layers.Dense(100, activation='relu'))
low_param_model.add(tf.keras.layers.Dropout(0.2))
low_param_model.add(tf.keras.layers.Dense(10))
low_param_model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
low_param_model.fit(x_train, y_train, epochs=15)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.src.callbacks.History at 0x2def53790>

In [59]:
low_param_model.save("./low_param_model.h5")

In [99]:
(np.prod(optimized_model.layers[2].A.shape) + np.prod(optimized_model.layers[2].A.shape))/np.prod(base_model.layers[2].get_weights()[0].shape)

0.625

In [104]:
optimized_model.layers[1].A.shape, optimized_model.layers[1].B.shape, optimized_model.layers[2].A.shape, optimized_model.layers[2].B.shape

((784, 40), (40, 128), (128, 40), (40, 128))

In [101]:
optimized_model.layers

[<keras.src.layers.reshaping.flatten.Flatten at 0x2a0cf8ee0>,
 <__main__.PCAProj at 0x287bb9c00>,
 <__main__.PCAProj at 0x287747a30>,
 <__main__.PCAProj at 0x2875838b0>,
 <keras.src.layers.regularization.dropout.Dropout at 0x287745900>,
 <__main__.PCAProj at 0x287bba380>]