# Packages needed to access data

In [1]:
import pandas as pd
import numpy as np

# Retrieve QUBO matrix of an instance
- How to retrieve the QUBO matrix of instance `id`?
- Which problem class is `id` instance of?

In [3]:
# Select instance with id = 5
# By changing id, we can retrieve the QUBO matrix of other instances
id = 5
Q = pd.read_csv(f'data/qubo_dataset/{id}.csv', index_col=[0]).to_numpy()

# The characteristics of the instance are stored in file map_index_to_instance.csv
map_df = pd.read_csv('data/map_index_to_instance.csv', index_col=[0])
print(map_df.loc[id])

problem          NumPart
structure      geometric
n_variables           27
repetition           0.0
Name: 5, dtype: object


# Solve an instance with Quantum Annealing
We want to solve the instance 5 with quantum annealing.
To do this, you need the `ocean-sdk`.
To do so, we proceed with the following steps:
1. Transform matrix `Q` into a Binary Quadratic Problem (BQM)
2. Embed the BQM onto the quantum annealer
3. Sample solutions

We need the following packages:

In [None]:
import dimod
from dwave.system import DWaveSampler, FixedEmbeddingComposite
from ast import literal_eval

In the case we want to use a precomputed embedding, as one of those in the `embeddings` folder, we rely on these helper functions we have defined.

In [None]:
from dwave.system import FixedEmbeddingComposite
from ast import literal_eval

# Return a dictionary containing the embedding of the instance.
def extract_embedding(id):
    dataframe = pd.read_csv(f'data/embeddings/{id}.csv', index_col=0, header=None, dtype=object)
    dataframe[1] = dataframe[1].apply(lambda x: literal_eval(x))
    dictionary = dataframe.to_dict()
    return dictionary[1]

embedding = extract_embedding(id)
print(embedding)

In [8]:
def array_to_dict(a):
    n = len(a)
    d = {}
    for i in range(n):
        for j in range(i, n):
            if a[i, j] != 0:
                d[(i, j)] = a[i, j]
    return d

Q_dict = array_to_dict(Q)
print(Q_dict)

{(0, 0): -38496.0, (0, 1): 6848.0, (0, 2): 4928.0, (0, 3): 8320.0, (0, 4): 896.0, (0, 5): 768.0, (0, 6): 1728.0, (0, 7): 5760.0, (0, 8): 1472.0, (0, 9): 9600.0, (0, 10): 640.0, (0, 11): 2240.0, (0, 12): 2048.0, (0, 13): 576.0, (0, 14): 4928.0, (0, 15): 4032.0, (0, 16): 640.0, (0, 17): 1152.0, (0, 18): 2880.0, (0, 19): 640.0, (0, 20): 1216.0, (0, 21): 3968.0, (0, 22): 3968.0, (0, 23): 2432.0, (0, 24): 4224.0, (0, 25): 320.0, (0, 26): 768.0, (1, 1): -472512.0, (1, 2): 65912.0, (1, 3): 111280.0, (1, 4): 11984.0, (1, 5): 10272.0, (1, 6): 23112.0, (1, 7): 77040.0, (1, 8): 19688.0, (1, 9): 128400.0, (1, 10): 8560.0, (1, 11): 29960.0, (1, 12): 27392.0, (1, 13): 7704.0, (1, 14): 65912.0, (1, 15): 53928.0, (1, 16): 8560.0, (1, 17): 15408.0, (1, 18): 38520.0, (1, 19): 8560.0, (1, 20): 16264.0, (1, 21): 53072.0, (1, 22): 53072.0, (1, 23): 32528.0, (1, 24): 56496.0, (1, 25): 4280.0, (1, 26): 10272.0, (2, 2): -349272.0, (2, 3): 80080.0, (2, 4): 8624.0, (2, 5): 7392.0, (2, 6): 16632.0, (2, 7): 55440

1. Transform the QUBO matrix into BQM format

In [None]:
bqm = dimod.binary.BinaryQuadraticModel.from_qubo(Q_dict)

2. Embed the problem on DWave Advantage6.4. You need a token to access the Quantum Annealer.

In [None]:
qpu = DWaveSampler(token='d-wave_access_token',
                   solver={'name': 'Advantage_system6.4'})

sampler = FixedEmbeddingComposite(qpu, embedding)

3. Solve the instances with quantum annealing. We define the number of samples `num_reads` and the `annealing_time` (microseconds)

In [None]:
num_reads = 50
annealing_time = 20
sampleset = sampler.sample(bqm,
                           num_reads=num_reads,
                           annealing_time=annealing_time)

We can solve the QUBO problem also with another solver, as Simulated Annealing

In [None]:
from dwave.samplers import SimulatedAnnealingSampler

num_reads = 50
sampler = SimulatedAnnealingSampler()
sampleset = sampler.sample(bqm, num_reads=num_reads)

# Sample with the best value of the cost function
first = sampleset.first
print(first)

# Meta-Learning Dataset 

In [None]:
metalearning_df = pd.read_csv('data/metalearning_dataset.csv', 
                              index_col=[0], header=[0,1])

# Results of the Meta-models
We access to the results of the meta-models trained over the small instances.
Change `size_instances` to `'large'` to see the results for the large instances.


In [None]:
size_instances = 'small'
metamodels_results_df = pd.read_csv(f'data/metamodels_results/{size_instances}/metamodels_results.csv',
                                    index_col=[0,1],
                                    header=[0,1,2])
metamodels_results_df

# Training and testing a meta-model
We give an example on how to train and test a meta-model with the Meta-Learning dataset.

We train an AdaBoost meta-model with the `LogIsing` domain of the small instances, to predict the label `Optimal` for the Quantum Annealing (`QA`). We use balanced accuracy to evaluate the performance of the meta-model.
Some features of this domain are not defined for some instances.
In our work, we substitute the `nan` values of a certain feature `f` with the mean value of `f`. We do it also here. 

In [None]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import balanced_accuracy_score
from sklearn.preprocessing import MinMaxScaler

# Features in the domain LogIsing
domain = 'LogIsing'
X = metalearning_df.loc[range(246), domain]

# Target data
target = ('Optimal', 'QA')
y = metalearning_df.loc[range(246), target]

# Replace not defined value in X with the mean values of the features
a = np.isinf(X)
X.replace([np.inf, -np.inf], np.nan, inplace=True)
X.fillna(X.mean(), inplace=True)

# Features are scaled in the range [0,1]
scaler = MinMaxScaler()
for col in X.columns:
    transformed_col = scaler.fit_transform(pd.DataFrame(X.loc[:,col]))
    X.loc[:,col] = transformed_col

We generate two data splits: one contains the data used for the training (67% of the instances), the other contains the data used to test the meta-model (33% of the instances). In this example, we generate the stratified splits over the problem class.

In [None]:
from sklearn.model_selection import train_test_split

test_size = 0.33
problem = ('instance_id', 'problem')

X_train, X_test, y_train, y_test = train_test_split(X, y.values, test_size=test_size, stratify= metalearning_df.loc[range(246), problem])

Now we train the model and compute its balanced_accuracy on the test split.

In [None]:
# Training of the meta-model
model = AdaBoostClassifier()
model.fit(X_train, y_train)

# Testing of the meta-model
y_predicted = model.predict(X_test)
balanced_accuracy = balanced_accuracy_score(y_test, y_predicted)
print(f'Balanced Accuracy of the meta-model: {balanced_accuracy}')

We can compute the importance of the features of the metamodel with permutation feature importance.
Each feature is shuffled for `n_repeats = 100` times and the loss in the balanced accuracy is computed.

In [None]:
from sklearn.inspection import permutation_importance

permutation_importances = permutation_importance(model, X_test, y_test,
                                                scoring='balanced_accuracy', n_repeats=100)

In [None]:
feature_importances = {}
for feature, importance  in zip(X.columns, permutation_importances.importances_mean):
    feature_importances[feature] = importance
    
# We sort the feature from the most important one to the least important one
print(f'FEATURE IMPORTANCES')
for key in sorted(feature_importances, key=feature_importances.get, reverse=True):
    print(f'{key}: {feature_importances[key]}')

# Feature Importance
We access to the results of the feature importance of the meta-models trained over the small instances.
Change `size_instances` to `'large'` to see the results for the large instances.

In [None]:
size_instances = 'small'
feature_importance_df = pd.read_csv(f'data/feature_importance/{size_instances}/feature_importance.csv',
                                    index_col=[0,1],
                                    header=[0,1,2])
feature_importance_df

# Hamiltonian of the problem

We compute the Hamiltonian of the problem $H_p$ starting from the QUBO matrix Q.
We do so by using the following helper methods:

In [5]:
from tqdm import tqdm
import dimod as dmd

def get_coupling_bias_offset(q):
    """
    Calculates the diagonal of pauli matrix Z associated to qubit i

    :param q: matrix q expressed as a dictionary
    :return: bias b, coupling c and offset o
    """
    b, c, o = dmd.utilities.qubo_to_ising(q)
    return c, b, o

def get_sigma_z_i_diag(i, n):
    """
    Calculates the diagonal of pauli matrix Z associated to qubit i

    :param i: qubit index 1...n
    :param n: total number of qubits
    :return: diagonal of the resulting matrix
    """

    sigma_z = np.array([[1, 0],
                        [0, -1]])

    assert n > 0, "Function is defined for n>0"
    assert i >= 0 and i < n, "Function is defined for i>0 and i<=n"

    sigma_z_diag = sigma_z.diagonal()

    # Initial sequence of Kronecker product from j=1 to i-1 results in identity matrix of shape 2^(i)x2^(i)
    # The following product with sigma_z merely repeats sigma_z 2^(i) times on the diagonal
    result = np.tile(sigma_z_diag, 2 ** (i))
    
    # Final sequence of Kronecker products from j=i+1 to n-1 results in repeating each element in the previous
    # diagonal 2^(n-i) times
    result = np.repeat(result, 2 ** (n - i - 1))
    assert len(result) == 2 ** n
    return result

def dict_to_array(D, dim, n):
    """
    
    :param D: dictionary to transform into an array
    :param dim: if dim=1, D represents a 1D vector, if dim=2 D represents a matrix 
    :param n: express the maximum length of the array of the shape of the matrix
    :return: vector/matrix representation of D
    """
    assert dim == 1 or dim == 2
    keys = D.keys()
    if dim == 2:
        M = np.zeros([n, n])
        for k in keys:
            # assert type(k) == tuple and type(k[0]) == int and type(k[1]) == int
            M[k[0], k[1]] = D[k]
    elif dim == 1:
        M = np.zeros(n)
        i = 0
        for k in keys:
            M[i] = D[k]
            i += 1
    return M

Since $H_p$ is diagonal, we need can compute only its diagonal:

In [6]:
def get_Hp(q):
    """
    
    :param q: QUBO matrix q
    :return: diagonal of the Hamiltonian H_p
    """
    n = len(q)
    new_q = array_to_dict(q)
    coupling, bias, offset = get_coupling_bias_offset(new_q)
    coupling = dict_to_array(coupling, 2, n)
    bias = dict_to_array(bias, 1, n)

    H_bias = np.zeros(2 ** n)
    H_coupling = np.zeros(2 ** n)
    for i in tqdm(range(0, n)):
        # print('We\'re at {}'.format(i))
        sigma_z_i = get_sigma_z_i_diag(i, n)
        if bias[i] != 0:
            H_bias += bias[i] * sigma_z_i
        for j in range(0, n):
            if j > 0:
                H_coupling = H_coupling + coupling[(i, j)] * sigma_z_i * get_sigma_z_i_diag(j, n)

    return H_bias + H_coupling, offset


We show a little example of computation of $H_p$ on the QUBO problem loaded at the beginning of this notebook. Consider that, since $Q$ is a $n \times n$ matrix, the diagonal $H_p$ is a $2^n$ vector.

In [None]:
hamiltonian = get_Hp(Q)