# Packages needed to access data

In [None]:
import pandas as pd
import numpy as np

# Retrieve QUBO matrix of an instance
- How to retrieve the QUBO matrix of instance `id`?
- Which problem class is `id` instance of?

In [None]:
# Select instance with id = 5
# By changing id, we can retrieve the QUBO matrix of other instances
id = 5
Q = pd.read_csv(f'data/qubo_dataset/{id}.csv', index_col=[0]).to_numpy()

# The characteristics of the instance are stored in file map_index_to_instance.csv
map_df = pd.read_csv('data/map_index_to_instance.csv', index_col=[0])
print(map_df.loc[id])

# Solve an instance with Quantum Annealing
We want to solve the instance 5 with quantum annealing.
To do this, you need the `ocean-sdk`.
To do so, we proceed with the following steps:
1. Transform matrix `Q` into a Binary Quadratic Problem (BQM)
2. Embed the BQM onto the quantum annealer
3. Sample solutions

We need the following packages:

In [None]:
import dimod
from dwave.system import DWaveSampler, FixedEmbeddingComposite
from ast import literal_eval

In the case we want to use a precomputed embedding, as one of those in the `embeddings` folder, we rely on these helper functions we have defined.

In [None]:
from dwave.system import FixedEmbeddingComposite
from ast import literal_eval

# Return a dictionary containing the embedding of the instance.
def extract_embedding(id):
    dataframe = pd.read_csv(f'data/embeddings/{id}.csv', index_col=0, header=None, dtype=object)
    dataframe[1] = dataframe[1].apply(lambda x: literal_eval(x))
    dictionary = dataframe.to_dict()
    return dictionary[1]

embedding = extract_embedding(id)
print(embedding)

In [None]:
def array_to_dict(a):
    n = len(a)
    d = {}
    for i in range(n):
        for j in range(i, n):
            if a[i, j] != 0:
                d[(i, j)] = a[i, j]
    return d

Q_dict = array_to_dict(Q)
print(Q_dict)

1. Transform the QUBO matrix into BQM format

In [None]:
bqm = dimod.binary.BinaryQuadraticModel.from_qubo(Q_dict)

2. Embed the problem on DWave Advantage6.4. You need a token to access the Quantum Annealer.

In [None]:
qpu = DWaveSampler(token='d-wave_access_token',
                   solver={'name': 'Advantage_system6.4'})

sampler = FixedEmbeddingComposite(qpu, embedding)

3. Solve the instances with quantum annealing. We define the number of samples `num_reads` and the `annealing_time` (microseconds)

In [None]:
num_reads = 50
annealing_time = 20
sampleset = sampler.sample(bqm,
                           num_reads=num_reads,
                           annealing_time=annealing_time)

We can solve the QUBO problem also with another solver, as Simulated Annealing

In [None]:
from dwave.samplers import SimulatedAnnealingSampler

num_reads = 50
sampler = SimulatedAnnealingSampler()
sampleset = sampler.sample(bqm, num_reads=num_reads)

# Sample with the best value of the cost function
first = sampleset.first
print(first)

# Meta-Learning Dataset 

In [None]:
metalearning_df = pd.read_csv('data/metalearning_dataset.csv', 
                              index_col=[0], header=[0,1])

# Results of the Meta-models
We access to the results of the meta-models trained over the small instances.
Change `size_instances` to `'large'` to see the results for the large instances.


In [None]:
size_instances = 'small'
metamodels_results_df = pd.read_csv(f'data/metamodels_results/{size_instances}/metamodels_results.csv',
                                    index_col=[0,1],
                                    header=[0,1,2])
metamodels_results_df

# Training and testing a meta-model
We give an example on how to train and test a meta-model with the Meta-Learning dataset.

We train an AdaBoost meta-model with the `LogIsing` domain of the small instances, to predict the label `Optimal` for the Quantum Annealing (`QA`). We use balanced accuracy to evaluate the performance of the meta-model.
Some features of this domain are not defined for some instances.
In our work, we substitute the `nan` values of a certain feature `f` with the mean value of `f`. We do it also here. 

In [None]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import balanced_accuracy_score
from sklearn.preprocessing import MinMaxScaler

# Features in the domain LogIsing
domain = 'LogIsing'
X = metalearning_df.loc[range(246), domain]

# Target data
target = ('Optimal', 'QA')
y = metalearning_df.loc[range(246), target]

# Replace not defined value in X with the mean values of the features
a = np.isinf(X)
X.replace([np.inf, -np.inf], np.nan, inplace=True)
X.fillna(X.mean(), inplace=True)

# Features are scaled in the range [0,1]
scaler = MinMaxScaler()
for col in X.columns:
    transformed_col = scaler.fit_transform(pd.DataFrame(X.loc[:,col]))
    X.loc[:,col] = transformed_col

We generate two data splits: one contains the data used for the training (67% of the instances), the other contains the data used to test the meta-model (33% of the instances). In this example, we generate the stratified splits over the problem class.

In [None]:
from sklearn.model_selection import train_test_split

test_size = 0.33
problem = ('instance_id', 'problem')

X_train, X_test, y_train, y_test = train_test_split(X, y.values, test_size=test_size, stratify= metalearning_df.loc[range(246), problem])

Now we train the model and compute its balanced_accuracy on the test split.

In [None]:
# Training of the meta-model
model = AdaBoostClassifier()
model.fit(X_train, y_train)

# Testing of the meta-model
y_predicted = model.predict(X_test)
balanced_accuracy = balanced_accuracy_score(y_test, y_predicted)
print(f'Balanced Accuracy of the meta-model: {balanced_accuracy}')

We can compute the importance of the features of the metamodel with permutation feature importance.
Each feature is shuffled for `n_repeats = 100` times and the loss in the balanced accuracy is computed.

In [None]:
from sklearn.inspection import permutation_importance

permutation_importances = permutation_importance(model, X_test, y_test,
                                                scoring='balanced_accuracy', n_repeats=100)

In [None]:
feature_importances = {}
for feature, importance  in zip(X.columns, permutation_importances.importances_mean):
    feature_importances[feature] = importance
    
# We sort the feature from the most important one to the least important one
print(f'FEATURE IMPORTANCES')
for key in sorted(feature_importances, key=feature_importances.get, reverse=True):
    print(f'{key}: {feature_importances[key]}')

# Feature Importance
We access to the results of the feature importance of the meta-models trained over the small instances.
Change `size_instances` to `'large'` to see the results for the large instances.

In [None]:
size_instances = 'small'
feature_importance_df = pd.read_csv(f'data/feature_importance/{size_instances}/feature_importance.csv',
                                    index_col=[0,1],
                                    header=[0,1,2])
feature_importance_df