In [None]:
import numpy as np

from copy import deepcopy
from scipy.sparse import csr_array, csr_matrix, save_npz

In [None]:
data_path = "../data/mcb-7/ibmq_belem/Pauli_Stochastic/"
x_path = data_path + "circuits/all_circuits_11.npz"
y_path = data_path + "sps/none_Shots/sps_all_circuits_none_shots_11.npz"

In [None]:
x_data = np.load(x_path)
x_train = x_data["train"]
x_val = x_data["validate"]
x_test = x_data["test"]

y_data = np.load(y_path)
y_train = y_data["train"]
y_val = y_data["validate"]
y_test = y_data["test"]

In [None]:
def transform_data(numpy_encoding, qubit_adjacency=[[0,1], [1,2], [1,3], [3,4]]):
    """ Takes the 3-tensor numpy encoding of one quantum circuit and the architecture of a quantum computer
    and calculates the 2-tensor encoding of shape (nqubits*nmoments, dim_gate_vector), -1 padded adjacency array
    and global features of the circuit. 
    """

    # compute number of moments of quantum circuit
    nmoments = np.where(np.sum((numpy_encoding != 0), axis=(0,2)) == 0)[0]
    if len(nmoments) == 0:
        nmoments = numpy_encoding.shape[1]
    else:
        nmoments = nmoments[0]

    # number of qubits
    nqubits = numpy_encoding.shape[0]

    # dimensionality of the gate vector
    dim_node_state = numpy_encoding.shape[2]

    # compute adjacency list for given architecture and amount of moments
    architecture = []
    for i in range(nqubits):
        architecture.append([])
    for edge in qubit_adjacency:
        architecture[edge[0]].append(edge[1])
        architecture[edge[1]].append(edge[0])
    adjacency = deepcopy(architecture)
    for l in range(1, nmoments):
        for i in range(0, nqubits):
            adjacency[-1-i].append(len(adjacency)-1-i+nqubits)
            
        new_layer = [[n+l*nqubits for n in node] for node in architecture]
        for i in range(0, nqubits):
            new_layer[-1-i].append(len(adjacency)-1-i)

        adjacency = adjacency + new_layer

    rows = []
    cols = []
    for i in range(len(adjacency)):
        for j in range(len(adjacency[i])):
            rows.append(i)
            cols.append(adjacency[i][j])
    rows = np.array(rows)
    cols = np.array(cols)
    val = np.ones(len(rows))
    a = csr_array((val,(rows,cols)), shape=(nqubits*nmoments, nqubits*nmoments))

    # compute data 2-tensor with shape (nqubits*nmoments, dim_node_state)
    x = np.transpose(numpy_encoding[:,:nmoments,:], axes=(1,0,2)).reshape(nqubits*nmoments, dim_node_state)

    # compute global features of quantum circuit
    nZ = np.sum(x[:,0] != 0)
    nX = np.sum(x[:,1] != 0)
    nSX = np.sum(x[:,2] != 0)
    nCNOT = np.sum(np.any(x[:,3:7] != 0, axis=-1))
    gf = np.array([nmoments, nqubits, nZ, nX, nSX, nCNOT]).reshape(1,6)

    return x, a, gf

In [None]:
global_features_mean = np.zeros((1,6))
global_features_std = np.zeros((1,6))
train_transformed = []

In [None]:
# calculate mean values of the global features in the training data
for circuit, y in zip(x_train, y_train):
    x, a, gf = transform_data(circuit)
    train_transformed.append([x, a, gf, y])
    global_features_mean = global_features_mean + gf
global_features_mean = global_features_mean / y_train.size

# calculate std of the global features in the training data
for elem in train_transformed:
    global_features_std = global_features_std + np.square(elem[2] - global_features_mean)
global_features_std = np.sqrt(global_features_std / y_train.size)

# fix division by zero for normalization
global_features_std[global_features_std == 0] = 1

In [None]:
datasets = [x_train, x_val, x_test]
labels = [y_train, y_val, y_test]
names = ["train", "validation", "test"]

# save transformed dataset to file
for i in range(len(datasets)):
    for j, (x, y) in enumerate(zip(datasets[i], labels[i])):
        x, a, gf = transform_data(x)

        # pad global features and prepend them to graph features
        # as spectral doesnt allow graph.x, graph.a and graph.global_features
        # as model input
        z = np.zeros((1,x.shape[1]))
        z[0,:gf.size] = (gf - global_features_mean)/global_features_std
        x = np.concatenate([z, x], axis=0)
        
        np.savez("dataset/"+names[i]+"/graph_{}.npz".format(j), x=x, y=y)
        save_npz("dataset/"+names[i]+"/adjacency_{}.npz".format(j), a)