In [1]:
import random_VQE_data_prep as data
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold 
import os
import pickle
import re
from sklearn import linear_model

In [2]:
def count_gate_paris(df, gates:str):
    l = df[gates]
    res = dict()
    for i in range(len(l)):
        tmp = str(l[i])
        if tmp in res.keys():
            res[tmp] += 1
        else:
            res[tmp] = 1
    return res

`extract_features(path)` as function to store all relevant features from the circuit at the given path into a df to perform linear regression

In [3]:
def extract_features(path):
    file = open(path, "rb")
    dict_ = pickle.load(file)
    #print(dict)
    #print(len(dict["Quantum_circuit"]))
    data = dict_["Quantum_circuit"][0].__dict__["_data"]
    #print(dict["Quantum_circuit"][0].draw())
    

    qubits = str(data)
    #print(qubits)

    # Split input_string before each "CircuitInstruction"
    instructions = qubits.split("CircuitInstruction")

    # Remove the empty string at the beginning (resulting from the initial split)
    instructions = instructions[1:]

    #patterns to get different values from datastring
    pattern_nq = r"num_qubits=(\d+)"
    pattern_nc = r"num_clbits=(\d+)"
    pattern_n = r"name='(\w+)'"
    pattern_p = r"params=\[(.*?)\]"
    pattern_gates = r"Qubit\(QuantumRegister\(5, 'q'\), (\d+)\)"
    #pattern_clb = r'clbits=\(\)\)' -> possibly clbits relevant?

    # Extracted numbers
    # find all values and store them in array to build df
    numbers = [int(match) for match in re.findall(pattern_nq, qubits)]
    clbits = [int(match_cl) for match_cl in re.findall(pattern_nq, qubits)]
    name = [match_name for match_name in re.findall(pattern_n, qubits)]
    params = [match_p for match_p in re.findall(pattern_p, qubits)]

    # loop over all Instructions to get per instruction a list of used qubits
    gates_all = []
    for instr in enumerate(instructions):
        gates = [match_gates for match_gates in re.findall(pattern_gates, str(instr))]
        gates_all.append(gates)


    #create new df to show parameters
    df_new =pd.DataFrame({'name': name, 'num_qubits': numbers, 'num_clbits': clbits, 'params': params, 'gates': gates_all})
    df_new = df_new.sort_values(by='num_qubits', ascending=False)
    df_new = df_new[df_new['num_qubits'] == 2].reset_index()

    res = dict()
    res['noisy_expectation'] = dict_['noisy_expectation']
    res['num_layers'] = dict_['num_layers']
    #res['observable'] = dict_['obervable']
    tmp = count_gate_paris(df_new, 'gates')
    for i in tmp.keys():
        res['count_' + i] = tmp[i]
    res['target'] = dict_['ideal_expectation']

    return res 

Split the generated data into an validation and train/test set. `train_frac` can be chosen and decides how many percent of the data are used to train and test the model.

In [4]:
def split_data(path="./pickles", train_frac = 0.8):
    ### split data into evaluation and training set
    assert train_frac < 1
    assert train_frac >= 0

    # shuffle data randomly to make different datasets
    all_files = os.listdir(path)
    # data_size = data.shape[0] # data.shape[0] changed to fit dictionary
    # indices = np.arange(data_size)  
    np.random.shuffle(all_files)
    #data_shuffled = data[indices]

    # split shuffeled data into test, train and eval according to fractions
    idx_eval = int(train_frac * len(all_files))

    files_train = all_files[:idx_eval]
    files_eval = all_files[idx_eval:]

    return files_train, files_eval

files_train, files_eval = split_data()

In [5]:
# decide which values are features or samples!
# len(files_eval)
# files_eval[1]

`file_to_data(files)` takes a list of circuits and generates the data using `extract_features(path)`

In [6]:
def file_to_data(files):
    a = list()
    for i, idx in enumerate(files):
        a.append(extract_features(f"./pickles/{idx}"))
    df = pd.DataFrame(a)
    return df

df_eval = file_to_data(files_eval)
df_train = file_to_data(files_train)

`split_data_kfold(df_train)` returns all the training and test data, as features (X) and the value to predict (y)

In [7]:
# def split_data_kfold(df_train):
#     # performs KFold-CV on rest of train data from split into eval data with 5 splits
#     # returns all five splits as list
#     kf = KFold(shuffle=True, random_state=42)
#     X_train = []
#     X_test = []
#     y_train = []
#     y_test = []

#     X = df_train.drop(columns=['target'])
#     y = df_train['target']

    
#     for train_index, test_index in kf.split(df_train):
#         X_train.append(X.iloc[train_index])
#         X_test.append(X.iloc[test_index])
#         y_train.append(y.iloc[train_index])
#         y_test.append(y.iloc[test_index])
        
#     return X_train, X_test, y_train, y_test

# X_train, X_test, y_train, y_test = split_data_kfold(df_train)
# #type(X_train[0])


In [8]:
def split_data_kfold(df_train):
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    X_train = []
    X_test = []
    y_train = []
    y_test = []

    X = df_train.drop(columns=['target'])
    y = df_train['target']
    
    for train_index, test_index in kf.split(df_train):
        X_train.append(X.iloc[train_index])
        X_test.append(X.iloc[test_index])
        y_train.append(y.iloc[train_index])
        y_test.append(y.iloc[test_index])
        
    return X_train, X_test, y_train, y_test

# Ensure the data is correct and check the type of first element
X_train, X_test, y_train, y_test = split_data_kfold(df_train)
#print(X_train[0], y_train[0])

`Linear_Reg(data)` returns the best model of the k = 5 splits according to the R2

In [15]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

def Linear_Reg(X_train, X_test, y_train, y_test):
    # returns best fitting linear model of all trained models based on MSE
    # X_train, y_train, X_test, y_test are numpy arrays
    # returns best model, MSE, R2
    best_model = None
    best_model_MSE = float('inf')
    best_model_R2 = float('-inf')
    
    for i in range(len(X_train)):
        if X_train[i].empty or y_train[i].empty:
            print(f"Skipping fold {i} due to empty training data.")
            continue       
        
        model = LinearRegression()
        model.fit(X_train[i], y_train[i])
        y_pred = model.predict(X_test[i])

        MSE = mean_squared_error(y_test[i], y_pred)
        R2 = r2_score(y_test[i], y_pred)

        print(f'Fold {i} - MSE: {MSE:.4f}, R2: {R2:.4f}')
        
        if R2 > best_model_R2:
            best_model = model
            best_model_MSE = MSE
            best_model_R2 = R2
    return best_model, best_model_MSE, best_model_R2

best_model, MSE, R2 = Linear_Reg(X_train, X_test, y_train, y_test)
print (f'MSE: {MSE}; R2: {R2}')

Fold 0 - MSE: 0.0654, R2: 0.6121
Fold 1 - MSE: 0.0548, R2: 0.6899
Fold 2 - MSE: 0.0903, R2: 0.4323
Fold 3 - MSE: 0.0805, R2: 0.5139
Fold 4 - MSE: 0.0650, R2: 0.5771
MSE: 0.05478249343416357; R2: 0.6899058567670276


In [16]:
X_eval = df_eval.drop(columns=['target']).reindex(columns=X_train[0].columns)
y_eval = df_eval['target']

y_pred_eval = best_model.predict(X_eval)

MSE = mean_squared_error(y_eval, y_pred_eval)
R2 = r2_score(y_eval, y_pred_eval)

print(f'Evaluation of best model - MSE: {MSE:.4f}, R2: {R2:.4f}')

Evaluation of best model - MSE: 0.0923, R2: 0.4445


In [11]:
vqe = data.data_loader("./pickles/circ_100.pickle")
circuit = vqe["Quantum_circuit"][0]
circuit.draw()

#### add more features

In [12]:
counts_qubit = circuit.num_qubits
counts_gates = circuit.count_ops()
size = circuit.size()
print([counts_gates, size])
circuit.num_unitary_factors()

[OrderedDict({'rz': 87, 'sx': 46, 'cx': 32, 'barrier': 6, 'measure': 5}), 170]


1

How to encode name of gates, gates operation performs on, ...?

predict ideal_expectation

In [13]:
from qiskit.providers.fake_provider import FakeQuitoV2
from qiskit.visualization import plot_gate_map, plot_error_map, plot_histogram
#plot_gate_map(vqe)

backend = FakeQuitoV2()
plot_error_map(backend)

from qiskit_experiments.library import LocalReadoutError
qubits = np.arange(vqe["Quantum_circuit"][0].num_qubits)
qubits

read_out = LocalReadoutError(qubits)
backend.qubit_properties(1)

QubitProperties(t1=6.020256768337207e-05, t2=8.907920429881061e-05, frequency=5080807210.560287)

In [14]:
from qiskit.providers.models import BackendProperties#, QubitProperties
from datetime import datetime

# Define the readout errors for each qubit
readout_errors = [0.02, 0.03, 0.05, 0.01]

# Create qubit properties with readout errors
qubits_properties = [
    [BackendProperties.readout_error(backend, 1)] #for error in readout_errors
]

# Create a mock BackendProperties object
properties = BackendProperties(
    backend_name='mock_backend',
    backend_version='1.0.0',
    last_update_date=datetime.now(),
    qubits=qubits_properties,
    gates=[],
    general=[]
)

# Extract readout errors from the properties
extracted_readout_errors = {}
for qubit_index, qubit_props in enumerate(properties.qubits):
    for prop in qubit_props:
        if prop.name == 'readout_error':
            extracted_readout_errors[qubit_index] = prop.value
            break

print("Readout Errors for each qubit:")
for qubit, error in extracted_readout_errors.items():
    print(f"Qubit {qubit}: {error}")


AttributeError: 'FakeQuitoV2' object has no attribute 'qubit_property'