# TensorNetworks in Neural Networks.

Training the MNIST dataset using tensornetwork package from google.
#### Reference: https://github.com/google/TensorNetwork

In [6]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
tf.compat.v1.enable_v2_behavior()
import tensornetwork as tn
tn.set_default_backend("tensorflow")

# Load MNIST dataset from Keras

In [7]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data("./mnist.npz")

In [9]:
from tensorflow.keras.utils import to_categorical

x_train = x_train.reshape((60000, 28, 28, 1))
y_train = to_categorical(y_train, 10)
x_test = x_test.reshape((10000, 28, 28, 1))
y_test = to_categorical(y_test, 10)

print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)
print(y_train)

(60000, 28, 28, 1) (60000, 10)
(10000, 28, 28, 1) (10000, 10)
[[0. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 1. 0.]]


In [10]:
from tensorflow.python.client import device_lib

def get_available_gpus():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos if x.device_type == 'GPU']

get_available_gpus()

[]

# TensorNetwork layer definition

Here, we define the TensorNetwork layer we wish to use to replace the fully connected layer. Here, we simply use a 2 node Matrix Product Operator network to replace the normal dense weight matrix.

In [11]:
class TNLayer(tf.keras.layers.Layer):
    
    def __init__(self, dim):
        super(TNLayer, self).__init__()
        # Create the variables for the layer.
        self.dim = dim
        self.a_var = tf.Variable(tf.random.normal(shape=(dim, dim, 2), stddev=1.0/dim), name="a", trainable=True)
        self.b_var = tf.Variable(tf.random.normal(shape=(dim, dim, 2), stddev=1.0/dim), name="b", trainable=True)
        self.bias = tf.Variable(tf.zeros(shape=(dim, dim)), name="bias", trainable=True)

    def call(self, inputs):
        # Define the contraction.
        # We break it out so we can parallelize a batch using tf.vectorized_map.
        print(inputs.shape)
        def f(input_vec, a_var, b_var, bias_var):
            # Reshape to a matrix instead of a vector.
            input_vec = tf.reshape(input_vec, (self.dim, self.dim))

            # Now we create the network.
            a = tn.Node(a_var)
            b = tn.Node(b_var)
            x_node = tn.Node(input_vec)
            a[1] ^ x_node[0] # ^ means connection
            b[1] ^ x_node[1]
            a[2] ^ b[2]

            # The TN should now look like this
            #   |     |
            #   a --- b
            #    \   /
            #      x

            # Now we begin the contraction.
            c = a @ x_node
            result = (c @ b).tensor

            # Finally, add bias.
            return result + bias_var

        # To deal with a batch of items, we can use the tf.vectorized_map function.
        # https://www.tensorflow.org/api_docs/python/tf/vectorized_map
        result = tf.vectorized_map(lambda vec: f(vec, self.a_var, self.b_var, self.bias), inputs)
        return tf.nn.relu(tf.reshape(result, (-1, self.dim**2)))

In [12]:
class MultiNodeTNLayer(tf.keras.layers.Layer):
    
    def __init__(self, dim, n_nodes):
        super(TNLayer, self).__init__()
        # Create the variables for the layer.
        self.dim = dim
        self.a_var = tf.Variable(tf.random.normal(shape=(dim, dim, 2), stddev=1.0/dim), name="a", trainable=True)
        self.b_var = tf.Variable(tf.random.normal(shape=(dim, dim, 2), stddev=1.0/dim), name="b", trainable=True)
        self.bias = tf.Variable(tf.zeros(shape=(dim, dim)), name="bias", trainable=True)

    def call(self, inputs):
        # Define the contraction.
        # We break it out so we can parallelize a batch using tf.vectorized_map.
        print(inputs.shape)
        def f(input_vec, a_var, b_var, bias_var):
            # Reshape to a matrix instead of a vector.
            input_vec = tf.reshape(input_vec, (self.dim, self.dim))

            # Now we create the network.
            a = tn.Node(a_var)
            b = tn.Node(b_var)
            x_node = tn.Node(input_vec)
            a[1] ^ x_node[0] # ^ means connection
            b[1] ^ x_node[1]
            a[2] ^ b[2]

            # The TN should now look like this
            #   |     |
            #   a --- b
            #    \   /
            #      x

            # Now we begin the contraction.
            c = a @ x_node
            result = (c @ b).tensor

            # Finally, add bias.
            return result + bias_var

        # To deal with a batch of items, we can use the tf.vectorized_map function.
        # https://www.tensorflow.org/api_docs/python/tf/vectorized_map
        result = tf.vectorized_map(lambda vec: f(vec, self.a_var, self.b_var, self.bias), inputs)
        return tf.nn.relu(tf.reshape(result, (-1, self.dim**2)))

# Dense model vs. TN model
The TN layer has nearly 100x fewer parameters in this particular architecture.

In [17]:
Dense = tf.keras.layers.Dense
Conv2D = tf.keras.layers.Conv2D
MaxPooling2D = tf.keras.layers.MaxPooling2D

model = tf.keras.Sequential()
model.add(Conv2D(16, kernel_size=(3,3), activation='relu', name='conv1', input_shape=(28,28,1)))
model.add(Conv2D(32, kernel_size=(3,3), activation='relu', name='conv2'))
model.add(Conv2D(16, kernel_size=(3,3), activation='relu', name='conv3'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(tf.keras.layers.Flatten())
model.add(Dense(1936, activation='relu', name='fc1'))
model.add(Dense(10, activation='softmax', name='fc2'))
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1 (Conv2D)               (None, 26, 26, 16)        160       
_________________________________________________________________
conv2 (Conv2D)               (None, 24, 24, 32)        4640      
_________________________________________________________________
conv3 (Conv2D)               (None, 22, 22, 16)        4624      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 11, 11, 16)        0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 1936)              0         
_________________________________________________________________
fc1 (Dense)                  (None, 1936)              3750032   
_________________________________________________________________
fc2 (Dense)                  (None, 10)               

In [18]:
tn_model = tf.keras.Sequential()
tn_model.add(Conv2D(16, kernel_size=(3,3), activation='relu', name='conv1', input_shape=(28,28,1)))
tn_model.add(Conv2D(32, kernel_size=(3,3), activation='relu', name='conv2'))
tn_model.add(Conv2D(16, kernel_size=(3,3), activation='relu', name='conv3'))
tn_model.add(MaxPooling2D(pool_size=(2,2)))
tn_model.add(tf.keras.layers.Flatten())
tn_model.add(TNLayer(44))
tn_model.add(Dense(10, activation='softmax', name='fc2'))
tn_model.summary()

(None, 1936)
Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1 (Conv2D)               (None, 26, 26, 16)        160       
_________________________________________________________________
conv2 (Conv2D)               (None, 24, 24, 32)        4640      
_________________________________________________________________
conv3 (Conv2D)               (None, 22, 22, 16)        4624      
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 11, 11, 16)        0         
_________________________________________________________________
flatten_3 (Flatten)          (None, 1936)              0         
_________________________________________________________________
tn_layer_1 (TNLayer)         (None, 1936)              9680      
_________________________________________________________________
fc2 (Dense)                  (None, 10)  

# Training the two models

In [23]:
%%time

# Traditional model
model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(), metrics=['accuracy'])
hist = model.fit(x_test, y_test, epochs=1, verbose=1)

Train on 10000 samples
CPU times: user 28 s, sys: 210 ms, total: 28.2 s
Wall time: 28.7 s


In [24]:
loss = hist.history['loss']
acc = hist.history['accuracy']
np.savetxt('loss_.out', loss, delimiter=',')  
np.savetxt('acc.out', acc, delimiter=',')  

In [15]:
%%time

# TensorNetwork model
tn_model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(), metrics=['accuracy'])
tn_hist = tn_model.fit(x_train, y_train, epochs=20, verbose=1)

Train on 60000 samples
Epoch 1/20
(32, 1936)
(32, 1936)

KeyboardInterrupt: 

In [None]:
np.savetxt('loss_MERA64.out', tn_loss, delimiter=',')  
np.savetxt('acc_MERA64.out', tn_acc, delimiter=',')  


# Tests and performance graphs

In [None]:
# Traditional model
model.evaluate(x=x_test, y=y_test)

In [None]:
# TN model
tn_model.evaluate(x=x_test, y=y_test)

In [None]:
loss = hist.history['loss']
acc = hist.history['accuracy']
tn_loss = tn_hist.history['loss']
tn_acc = tn_hist.history['accuracy']

plt.plot(loss, label='traditional')
plt.plot(tn_loss, label='tn')
plt.title('Loss comparsion in training')
plt.ylim(0, 0.15)
plt.legend()
plt.show()

plt.plot(acc, label='traditional')
plt.plot(tn_acc, label='tn')
plt.title('Accuracy comparsion in training')
plt.legend()
plt.show()