In [1]:
from copy import deepcopy
import pandas as pd
import pickle
import numpy as np
import os
import matplotlib.pyplot as plt

import tensorflow as tf
import tensorflow_datasets as tfds
from keras.engine.input_layer import InputLayer
from keras.layers import Activation
from keras import backend as K
import keras

tf.keras.backend.set_floatx('float32')

%matplotlib inline
%load_ext autoreload
%autoreload 2

2023-02-22 03:11:06.394351: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-02-22 03:11:06.555816: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-02-22 03:11:07.336424: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-02-22 03:11:07.336499: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] 

In [2]:
data_path = "../data/mcb-7/ibmq_belem/Pauli_Stochastic/"
x_path = data_path + "circuits/all_circuits_11.npz"
y_path = data_path + "indices/all_circuit_indices_11.npz"

In [3]:
x_data = np.load(x_path)
x_train = x_data["train"]
x_val = x_data["validate"]
x_test = x_data["test"]

y_data = np.load(y_path)
y_train = y_data["train"]
y_val = y_data["validate"]
y_test = y_data["test"]

In [4]:
# 1st dimension: Index for training samples
# 2nd dimension: Index for Qubits (5 for ibm Belem)
# 3rd dimension: Index for moments of quantum circuit (zero padding to maximum)
# 4th dimension: One-hot encoding of different quantum gates (+ additional infos)
print(x_train.shape)
print(y_train.shape)

(11620, 5, 1825, 10)
(11620,)


In [6]:
def transform_data(numpy_encoding, qubit_adjacency=[[0,1], [1,2], [1,3], [3,4]]):
    """ Takes the 3-tensor numpy encoding of one quantum circuit and the architecture of a quantum computer
    and calculates the 2-tensor encoding of shape (nqubits*nmoments, dim_gate_vector), -1 padded adjacency array
    and global features of the circuit. 
    """

    # compute number of moments of quantum circuit
    nmoments = np.where(np.sum((numpy_encoding != 0), axis=(0,2)) == 0)[0]
    if len(nmoments) == 0:
        nmoments = numpy_encoding.shape[1]
    else:
        nmoments = nmoments[0]

    # number of qubits
    nqubits = numpy_encoding.shape[0]

    # dimensionality of the gate vector
    dim_node_state = numpy_encoding.shape[2]

    # compute adjacency list for given architecture and amount of moments
    architecture = []
    for i in range(nqubits):
        architecture.append([])
    for edge in qubit_adjacency:
        architecture[edge[0]].append(edge[1])
        architecture[edge[1]].append(edge[0])
    adjacency = deepcopy(architecture)
    for l in range(1, nmoments):
        for i in range(0, nqubits):
            adjacency[-1-i].append(len(adjacency)-1-i+nqubits)
            
        new_layer = [[n+l*nqubits for n in node] for node in architecture]
        for i in range(0, nqubits):
            new_layer[-1-i].append(len(adjacency)-1-i)

        adjacency = adjacency + new_layer

    adjacency = tf.ragged.constant(adjacency, dtype=tf.int32)
    
    # compute data 2-tensor with shape (nqubits*nmoments, dim_node_state)
    data = tf.constant(np.transpose(numpy_encoding[:,:nmoments,:], axes=(1,0,2)).reshape(nqubits*nmoments, dim_node_state), dtype=tf.float32)

    # compute global features of quantum circuit
    nZ = np.sum(data[:,0] != 0)
    nX = np.sum(data[:,1] != 0)
    nSX = np.sum(data[:,2] != 0)
    nCNOT = np.sum(np.any(data[:,3:7] != 0, axis=-1))
    global_features = tf.constant([nmoments, nqubits, nZ, nX, nSX, nCNOT], shape=[6], dtype=tf.int32)

    return data, adjacency, global_features

In [7]:
def train_data_generator():
    for (circuit, pst) in zip(x_train, y_train):
        data, adjacency, global_features = transform_data(circuit)
        yield data, adjacency, global_features, pst

In [8]:
BUFFER_SIZE = 200
BATCH_SIZE = 1
DIM_GATE_VECTOR = 10
GLOBAL_FEATURES = 6

In [9]:
train_ds = tf.data.Dataset.from_generator(train_data_generator, output_signature=(
         tf.TensorSpec(shape=(None, DIM_GATE_VECTOR), dtype=tf.float32),
         tf.RaggedTensorSpec(shape=(None, None), dtype=tf.int32),
         tf.TensorSpec(shape=(GLOBAL_FEATURES), dtype=tf.int32),
         tf.TensorSpec(shape=(), dtype=tf.float64))).batch(BATCH_SIZE)
train_dataset = train_ds.shuffle(BUFFER_SIZE)

2023-02-21 22:26:25.778161: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2023-02-21 22:26:25.778195: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)
2023-02-21 22:26:25.778222: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (icme-gpu1): /proc/driver/nvidia/version does not exist
2023-02-21 22:26:25.778682: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [90]:
for elem in train_ds.take(2):
    print(elem[3])


tf.Tensor([1403.], shape=(1,), dtype=float64)
tf.Tensor([5132.], shape=(1,), dtype=float64)


In [91]:
class GraphTransformer(tf.keras.Model):
  def __init__(self, num_layers=1, dim_node_state=10):
    super(GraphTransformer, self).__init__()
    self.num_layers = num_layers
    self.dim_node_state = dim_node_state

  def build(self, input):
    self.Q = []
    self.K = []
    self.V = []
    self.Skip = []
    self.Skip_bias = []
    for l in range(self.num_layers):
      self.Q.append(self.add_weight("Query{}".format(l), 
                                    shape=[self.dim_node_state, self.dim_node_state],
                                    initializer='glorot_uniform',
                                    trainable=True,
                                    dtype=tf.float32))
      self.K.append(self.add_weight("Key{}".format(l), 
                                    shape=[self.dim_node_state, self.dim_node_state],
                                    initializer='glorot_uniform',
                                    trainable=True,
                                    dtype=tf.float32))
      self.V.append(self.add_weight("Value{}".format(l), 
                                    shape=[self.dim_node_state, self.dim_node_state],
                                    initializer='glorot_uniform',
                                    trainable=True,
                                    dtype=tf.float32))
      self.Skip.append(self.add_weight("Skip{}".format(l),
                                    shape=[self.dim_node_state, self.dim_node_state],
                                    initializer='glorot_uniform',
                                    trainable=True,
                                    dtype=tf.float32))
      self.Skip_bias.append(self.add_weight("Skip_bias{}".format(l),
                                    shape=[self.dim_node_state, 1],
                                    initializer='zeros',
                                    trainable=True,
                                    dtype=tf.float32))

    self.global_features = [tf.keras.layers.Dense(self.dim_node_state, activation="relu", use_bias=True)]
    self.global_features.append(tf.keras.layers.Dense(3, activation="relu", use_bias=True))

    self.regression = [tf.keras.layers.Dense(128, activation="relu", use_bias=True)]
    self.regression.append(tf.keras.layers.Dense(128, activation="relu", use_bias=True))
    self.regression.append(tf.keras.layers.Dense(1, activation="sigmoid", use_bias=True))

# TODO implement call using tf.while_loop
  def call(self, inputs):
    D = []
    for graph in inputs[1][:]:
      n_neighbours = []
      for n in graph:
        n_neighbours.append(1/np.sqrt(len(n)))
      D.append(n_neighbours)
    
    U = []
    for graph in inputs[0][:]:
      U.append(tf.transpose(graph))

    graph_vector = []
    for i in range(len(U)):
      for l in range(self.num_layers):
        Q = self.Q[l] @ U[i]
        K = self.K[l] @ U[i]
        V = self.V[l] @ U[i]

        H = []
        for n in range(Q.shape[1]):
          neighbours = tf.constant(inputs[1][i][n], dtype=tf.int32)
          q = tf.transpose(tf.gather(Q, indices=[n,], axis=1))
          k = tf.gather(K, indices=neighbours, axis=1)
          a = tf.nn.softmax(tf.math.multiply(q @ k, D[i][n]), axis=-1)
          v = tf.gather(V, indices=neighbours, axis=1)
          H.append(v @ tf.transpose(a))
        H = tf.concat(H, axis=1)

        #A = tf.nn.softmax(D[i] @ tf.transpose(Q) @ K, axis=-1)
        #H = V @ tf.transpose(A)
        S = self.Skip[l] @ U[i] + self.Skip_bias[l]
        U[i] = tf.keras.layers.LayerNormalization(axis=1)(tf.add(S, H))
      graph_vector.append(tf.math.reduce_mean(U[i], axis=-1))

    global_features = inputs[2]
    for l in range(len(self.global_features)):
      global_features = self.global_features[l](global_features)

    x = tf.concat([graph_vector, global_features], axis=-1)

    for l in range(len(self.regression)):
      x = self.regression[l](x)

    return x

In [92]:
model = GraphTransformer()

optimizer = tf.keras.optimizers.Adam(learning_rate=5e-4)
mse_loss_fn = tf.keras.losses.MeanSquaredError()

loss_metric = tf.keras.metrics.Mean()

epochs = 2

# Iterate over epochs.
for epoch in range(epochs):
    print("Start of epoch %d" % (epoch,))

    # Iterate over the batches of the dataset.
    for step, x_batch_train in enumerate(train_dataset):
        with tf.GradientTape() as tape:
            y_pred = model(x_batch_train)
            
            # Compute reconstruction loss
            loss = mse_loss_fn(x_batch_train[3], y_pred)

        grads = tape.gradient(loss, model.trainable_weights)
        optimizer.apply_gradients(zip(grads, model.trainable_weights))

        loss_metric(loss)

        if step % 1 == 0:
            print("step %d: mean loss = %.4f" % (step, loss_metric.result()))

Start of epoch 0
step 0: mean loss = 40142828.0000
step 1: mean loss = 131160408.0000
step 2: mean loss = 142940352.0000
step 3: mean loss = 158683824.0000


KeyboardInterrupt: 