In [3]:
import numpy as np
import json
import pickle
import os
import pandas as pd

from pygsti.circuits import Circuit
from pygsti.tools.internalgates import internal_gate_unitaries


import sys
sys.path.insert(0, '../../../')

import ml
from ml import encoding

unitary_dict = internal_gate_unitaries()

dtypes = ['train', 'validate', 'test']

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Package Circuits

This notebook is designed to process the raw circuits and fidelities into tensors that can be processed by our models. Note that, to aid with training, we actually train the physics-aware networks to predict $10000*[1-F(C)]$, or 10000 times the entanglement infidelity of each circuit. This notebook also partitions the dataset into training, validation, and testing subsets.

Set the experiment number

In [72]:
exp_num = 4

# Meta information

The following cell creates any necessary directories as well as the meta information for the dataset. The meta information information contains:
   - the number of qubits,
   - the number of encoding channels per qubit,
   - the maximum weight of the tracked errors,
   - the number of hops on the connectivity graph used to generated the list of tracked errors,
   - the minimum fidelity cutoff,
   - the measurement encoding scheme (this is 0 for entanglement fidelity and 3 for PST),
   - the processor's underlying geometry.
The meta information (should) be consistent across all 10 datasets.

In [73]:
exp_path = f'./experiment_{exp_num}/'
sim_path = exp_path + '/simulation_results/'
proc_path = exp_path + '/processed_inputs_and_outputs/'
os.makedirs(proc_path, exist_ok = True)

with open('./experiment_0/pspec.pkl', 'rb') as f:
    pspec = pickle.load(f)

num_qubits = 4
num_channels = 4 + len(pspec.gate_names) - 1
max_error_weight = 2 # Can't set this above 2
adj_matrix = ml.newtools.ring_adj_matrix(num_qubits)
laplace = ml.newtools.laplace_from_qubit_graph(adj_matrix)
num_hops = 2
cutoff = .85
measurement_encoding = 0
geometry = 'ring'
gate_encoding = encoding.ring_gate_to_index

meta = {'cutoff': cutoff, 'num_hops': num_hops, 'max_error_weight': max_error_weight, 'geometry': geometry, 
        'num_qubits': num_qubits, 'num_channels': num_channels, 'measurement_encoding': measurement_encoding}

error_gens = ml.newtools.up_to_weight_k_error_gens_from_qubit_graph(max_error_weight, num_qubits, laplace, num_hops = num_hops)

with open(exp_path+'/meta.json', 'w') as f:
    json.dump(meta, f)

# Load the circuits

The next three cells loads the circuits and entanglement fidelities. The second cell removes any low-fidelity circuits, if they exist.

In [74]:
df = pd.read_csv(sim_path + '/dataframe.csv')

In [75]:
f_mask = df['D:SP'] > cutoff
print(f'You are using {100*sum(f_mask) / len(df)} percent of the circuits.')

df_good = df[f_mask]
df_good = df_good.reset_index()

You are using 100.0 percent of the circuits.


In [76]:
circs = df_good['Circuit']
circs = [Circuit(c) for c in circs]
sps = list(df_good['D:SP'])


# Process the circuits

In [77]:
xt, y = encoding.create_input_data(circs = circs, fidelities = sps, tracked_error_gens = error_gens, measurement_encoding = measurement_encoding,
                      pspec = pspec, geometry = geometry, num_qubits = num_qubits, num_channels = num_channels,
                      indexmapper = gate_encoding, indexmapper_kwargs = {'pspec': pspec}, 
                      valuemapper = None, valuemapper_kwargs = {},
                      max_depth = None, return_separate=False)

0,200,400,600,800,1000,1200,1400,1600,1800,2000,2200,2400,2600,2800,3000,3200,3400,3600,3800,4000,4200,4400,4600,4800,

# Package the circuits

The next six cells: 
1. partition the dataset into training, validation, and testing sets;
2. save the processed circuits, entanglement fidelities, and indices;
3. and save a new dataframe that contains only the high-fidelity circuits.

In [78]:
x_data = {}
y_data = {}

In [79]:
from sklearn.model_selection import train_test_split
train_indices, test_indices = train_test_split(np.array(df_good.index), train_size = .75)
train_indices, val_indices = train_test_split(train_indices, train_size = .75)

indices = {'train': train_indices, 'validate': val_indices, 'test': test_indices}

In [80]:
assert(len(np.intersect1d(train_indices, test_indices)) == 0)
assert(len(np.intersect1d(train_indices, val_indices)) == 0)
assert(len(np.intersect1d(val_indices, test_indices)) == 0)

In [81]:
x_data['train'] = xt[train_indices, :, :]
x_data['validate'] = xt[val_indices, :, :]
x_data['test'] = xt[test_indices, :, :]

# Multiple fidelities by 10,000 as this seems to help the optimizer (could instead play with
# both learning rate and the weight initializations).
y_data['train'] = 10000*(1 - y[train_indices])
y_data['validate'] = 10000*(1 - y[val_indices])
y_data['test'] = 10000*(1 - y[test_indices])

In [82]:
np.savez_compressed(proc_path+'/processed_high_fidelity_circuits.npz', **x_data)
np.savez_compressed(proc_path+'/processed_infidelities.npz', **y_data)
np.savez_compressed(proc_path+'/indices.npz', **indices)

In [83]:
df_good['test_or_train'] = np.empty(len(df_good))
df_good.loc[indices['train']]['test_or_train'] = 'train'
for dt in dtypes:
    df_good.loc[indices[dt], 'test_or_train']= dt

df.to_csv(sim_path + '/high-fidelity-dataframe.csv')

# Repeat with the mirror circuits

In [84]:
df_other = pd.read_csv(sim_path + '/mirrored_dataframe.csv')

o_circs = df_other['Circuit']
o_circs = [Circuit(c) for c in o_circs]
o_sps = list(df_other['D:SP'])

max_depth = max(df['F:Depth'])
max_depth

180

In [85]:
other_xt, other_y = encoding.create_input_data(circs = o_circs, fidelities = o_sps, tracked_error_gens = error_gens, measurement_encoding = measurement_encoding,
                      pspec = pspec, geometry = geometry, num_qubits = num_qubits, num_channels = num_channels,
                      indexmapper = gate_encoding, indexmapper_kwargs = {'pspec': pspec}, 
                      valuemapper = None, valuemapper_kwargs = {},
                      max_depth = max_depth, return_separate=False)

other_y = 10000 * (1-other_y)

0,200,400,600,

In [86]:
np.savez_compressed(proc_path + '/processed_mirrored_circuits.npz', circuits = other_xt)
np.savez_compressed(proc_path + '/processed_mirrored_infidelities.npz', infidelities = other_y)