In [28]:
import numpy as np
import SVM_scripts as svm
import sklearn as skl
import skopt.space as sks
from tqdm import tqdm

qpu_access_time_50 = 29279.56e-6
qpu_access_time_100 = 42793.56e-6
qpu_access_time_200 = 69827.96e-6
qpu_access_time_250 = 83335.56e-6
qpu_access_time_500 = 150905.56e-6
qpu_access_time_1000 = 286051.96e-6
qpu_access_time_2000 = 556331.96e-6
qpu_access_time_2500 = 691465.56e-6
qpu_access_time_3000 = 826605.56e-6

# The problem cannot be submitted because its estimated QPU access time exceeds the maximum of 1000000 microseconds for Advantage_system4.1. To resolve this issue, see the topic at https://docs.dwavesys.com/docs/latest/c_qpu_timing.html#keeping-within-the-runtime-limit

In [2]:
file_name = 'Polaris/pgp-broccatelli.parquet'
# file_name = 'Polaris/bbb-martins.parquet'
# file_name = 'Polaris/ncats-solubility.parquet'

matrix, matrix_labels, names = svm.read_parquet(file_name)

slice_sizes = [int(177/n) for n in [4,3,2,1]]
N = 1
limits = [34,26,18,12]
fit_count = 0

for i in tqdm(range(0,N)):
    vectors, labels, test_vectors, test_labels = svm.prepare_data_sets(matrix, matrix_labels, train_percentage=50, positive_negative_ratio=None, max_train_size=None, min_train_size=None, seed=i, normalize_data=True, print_info=False)
    default_gamma = 1 / (vectors.shape[1] * vectors.var())
    # param_grid = {'base': [2,10], 'num_encoding': [1,2,3,4],'kernel': [n*default_gamma for n in [0.2,0.4]], 'penalty': [0,1]}
    search_space = {
        'base': sks.Categorical([2,10]),
        'num_encoding': sks.Integer(1,4),
        'kernel': sks.Real(0.05 * default_gamma, 2 * default_gamma, prior='log-uniform'),
        'penalty': sks.Real(0,6),
    }
    # search_sizes = svm.estimate_search_space_size(svm.split_dwave_search_space(search_space, slice_size=None, max_qubo_dim=177), samples_per_real_interval=5)
    # limit = round(sum([svm.samples_per_size(search_size) for search_size in search_sizes]))
    inner_estimator = svm.qSVM_estimator(solver='SA', adjust_bias=False)
    for s, slice_size in enumerate(slice_sizes):
        print('Slice size: ', slice_size)
        limit = limits[s]
        estimator = svm.slice_estimator(estimator=inner_estimator, slice_size=slice_size, force_unbiased=False, adjust_outer_bias=True, seed=0)
        fits = svm.count_hyperparameter_optimization_fits(estimator, search_space=search_space, vectors=vectors, labels=labels, folds=4, filter=True, limit=limit, mode='bayes', seed=0, print_info=True)
        fit_count += fits

fit_count1 = fit_count
print(fit_count)

  0%|          | 0/1 [00:00<?, ?it/s]

Slice size:  44
Slice size:  59
Slice size:  88
Slice size:  177


100%|██████████| 1/1 [00:24<00:00, 24.44s/it]

2940





In [3]:
# file_name = 'Polaris/pgp-broccatelli.parquet'
file_name = 'Polaris/bbb-martins.parquet'
# file_name = 'Polaris/ncats-solubility.parquet'

matrix, matrix_labels, names = svm.read_parquet(file_name)

slice_sizes = [int(177/n) for n in [4,3,2,1]]
N = 1
limits = [34,26,18,12]
fit_count = 0

for i in tqdm(range(0,N)):
    vectors, labels, test_vectors, test_labels = svm.prepare_data_sets(matrix, matrix_labels, train_percentage=50, positive_negative_ratio=None, max_train_size=None, min_train_size=None, seed=i, normalize_data=True, print_info=False)
    default_gamma = 1 / (vectors.shape[1] * vectors.var())
    # param_grid = {'base': [2,10], 'num_encoding': [1,2,3,4],'kernel': [n*default_gamma for n in [0.2,0.4]], 'penalty': [0,1]}
    search_space = {
        'base': sks.Categorical([2,10]),
        'num_encoding': sks.Integer(1,4),
        'kernel': sks.Real(0.05 * default_gamma, 2 * default_gamma, prior='log-uniform'),
        'penalty': sks.Real(0,6),
    }
    # search_sizes = svm.estimate_search_space_size(svm.split_dwave_search_space(search_space, slice_size=None, max_qubo_dim=177), samples_per_real_interval=5)
    # limit = round(sum([svm.samples_per_size(search_size) for search_size in search_sizes]))
    inner_estimator = svm.qSVM_estimator(solver='SA', adjust_bias=False)
    for s, slice_size in enumerate(slice_sizes):
        print('Slice size: ', slice_size)
        limit = limits[s]
        estimator = svm.slice_estimator(estimator=inner_estimator, slice_size=slice_size, force_unbiased=True, adjust_outer_bias=True, seed=0)
        fits = svm.count_hyperparameter_optimization_fits(estimator, search_space=search_space, vectors=vectors, labels=labels, folds=4, filter=True, limit=limit, mode='bayes', seed=0, print_info=True)
        fit_count += fits
fit_count2 = fit_count
print(fit_count2)

  0%|          | 0/1 [00:00<?, ?it/s]

Slice size:  44
Slice size:  59
Slice size:  88
Slice size:  177


100%|██████████| 1/1 [01:38<00:00, 98.10s/it]

6937





In [4]:
# file_name = 'Polaris/pgp-broccatelli.parquet'
# file_name = 'Polaris/bbb-martins.parquet'
file_name = 'Polaris/ncats-solubility.parquet'

matrix, matrix_labels, names = svm.read_parquet(file_name)

slice_sizes = [int(177/n) for n in [4,3,2,1]]
N = 1
limits = [34,26,18,12]
fit_count = 0

for i in tqdm(range(0,N)):
    vectors, labels, test_vectors, test_labels = svm.prepare_data_sets(matrix, matrix_labels, train_percentage=50, positive_negative_ratio=None, max_train_size=None, min_train_size=None, seed=i, normalize_data=True, print_info=False)
    default_gamma = 1 / (vectors.shape[1] * vectors.var())
    # param_grid = {'base': [2,10], 'num_encoding': [1,2,3,4],'kernel': [n*default_gamma for n in [0.2,0.4]], 'penalty': [0,1]}
    search_space = {
        'base': sks.Categorical([2,10]),
        'num_encoding': sks.Integer(1,4),
        'kernel': sks.Real(0.05 * default_gamma, 2 * default_gamma, prior='log-uniform'),
        'penalty': sks.Real(0,6),
    }
    # search_sizes = svm.estimate_search_space_size(svm.split_dwave_search_space(search_space, slice_size=None, max_qubo_dim=177), samples_per_real_interval=5)
    # limit = round(sum([svm.samples_per_size(search_size) for search_size in search_sizes]))
    inner_estimator = svm.qSVM_estimator(solver='SA', adjust_bias=False)
    for s, slice_size in enumerate(slice_sizes):
        print('Slice size: ', slice_size)
        limit = limits[s]
        estimator = svm.slice_estimator(estimator=inner_estimator, slice_size=slice_size, force_unbiased=False, adjust_outer_bias=True, seed=0)
        fits = svm.count_hyperparameter_optimization_fits(estimator, search_space=search_space, vectors=vectors, labels=labels, folds=4, filter=True, limit=limit, mode='bayes', seed=0, print_info=True)
        fit_count += fits

fit_count3 = fit_count
print(fit_count)

  0%|          | 0/1 [00:00<?, ?it/s]

Slice size:  44
Slice size:  59
Slice size:  88
Slice size:  177


100%|██████████| 1/1 [00:29<00:00, 29.12s/it]

5670





In [36]:
N = 20
print('Total Time left in minutes: ', 3*60 + 2 + 31.68  / 60 - ((N - 15) * fit_count1 + (N - 15) * fit_count2 + (N - 6) * fit_count3) * qpu_access_time_250 / 60)

Total Time left in minutes:  3.682943609999967


In [30]:
fit_count4 = 0
start = 0
N = 1
# higher shots tests:
file_name = 'Polaris/pgp-broccatelli.parquet'
matrix, matrix_labels, names = svm.read_parquet(file_name)
for i in tqdm(range(start,N)):
    vectors, labels, test_vectors, test_labels = svm.prepare_data_sets(matrix, matrix_labels, train_percentage=50, positive_negative_ratio=None, max_train_size=None, min_train_size=None, seed=i, normalize_data=True, print_info=False)
    for slice_size in slice_sizes:
        slices, slice_labels, counts = svm.slice_training_data(vectors, labels, slice_size=slice_size, force_unbiased=False, print_info=False, seed=0)
        fit_count4 += len(slices)

fit_count5 = 0
file_name = 'Polaris/bbb-martins.parquet'
matrix, matrix_labels, names = svm.read_parquet(file_name)
for i in tqdm(range(start,N)):
    vectors, labels, test_vectors, test_labels = svm.prepare_data_sets(matrix, matrix_labels, train_percentage=50, positive_negative_ratio=None, max_train_size=None, min_train_size=None, seed=i, normalize_data=True, print_info=False)
    for slice_size in slice_sizes:
        slices, slice_labels, counts = svm.slice_training_data(vectors, labels, slice_size=slice_size, force_unbiased=True, print_info=False, seed=0)
        fit_count5 += len(slices)

fit_count6 = 0
file_name = 'Polaris/ncats-solubility.parquet'
matrix, matrix_labels, names = svm.read_parquet(file_name)
for i in tqdm(range(start,N)):
    vectors, labels, test_vectors, test_labels = svm.prepare_data_sets(matrix, matrix_labels, train_percentage=50, positive_negative_ratio=None, max_train_size=None, min_train_size=None, seed=i, normalize_data=True, print_info=False)
    for slice_size in slice_sizes:
        slices, slice_labels, counts = svm.slice_training_data(vectors, labels, slice_size=slice_size, force_unbiased=False, print_info=False, seed=0)
        fit_count6 += len(slices)

100%|██████████| 1/1 [00:00<00:00,  7.12it/s]
100%|██████████| 1/1 [00:01<00:00,  1.70s/it]
100%|██████████| 1/1 [00:00<00:00,  4.26it/s]


In [26]:
N = 18
print(N, 32 + 56/60)

18 32.93333333333333


In [46]:
(fit_count4 + fit_count5 + fit_count6) * 100 * qpu_access_time_50 / 60

9.418258466666666

In [32]:
file_name = 'Polaris/pgp-broccatelli.parquet'
# file_name = 'Polaris/bbb-martins.parquet'
# file_name = 'Polaris/ncats-solubility.parquet'

matrix, matrix_labels, names = svm.read_parquet(file_name)

slice_sizes = [int(177/n) for n in [4,3,2,1]]
N = 1
limits = [34,26,18,12]
fit_count7 = 0

for i in tqdm(range(0,N)):
    vectors, labels, test_vectors, test_labels = svm.prepare_data_sets(matrix, matrix_labels, train_percentage=50, positive_negative_ratio=None, max_train_size=None, min_train_size=None, seed=i, normalize_data=True, print_info=False)
    default_gamma = 1 / (vectors.shape[1] * vectors.var())
    # param_grid = {'base': [2,10], 'num_encoding': [1,2,3,4],'kernel': [n*default_gamma for n in [0.2,0.4]], 'penalty': [0,1]}
    search_space = {
        'base': sks.Categorical([2,10]),
        'kernel': sks.Real(0.05 * default_gamma, 2 * default_gamma, prior='log-uniform'),
        'penalty': sks.Real(0,6),
    }
    # search_sizes = svm.estimate_search_space_size(svm.split_dwave_search_space(search_space, slice_size=None, max_qubo_dim=177), samples_per_real_interval=5)
    # limit = round(sum([svm.samples_per_size(search_size) for search_size in search_sizes]))
    inner_estimator = svm.qSVM_estimator(solver='SA', num_encoding=1, adjust_bias=False)
    for s, slice_size in enumerate(slice_sizes):
        print('Slice size: ', slice_size)
        limit = limits[s]
        estimator = svm.slice_estimator(estimator=inner_estimator, slice_size=slice_size, force_unbiased=True, adjust_outer_bias=True, seed=0)
        fits = svm.count_hyperparameter_optimization_fits(estimator, search_space=search_space, vectors=vectors, labels=labels, folds=4, filter=True, limit=limit, mode='bayes', seed=0, print_info=True)
        fit_count7 += fits

print(fit_count7 * qpu_access_time_250)

  0%|          | 0/1 [00:00<?, ?it/s]

Slice size:  44
Slice size:  59
Slice size:  88
Slice size:  177


100%|██████████| 1/1 [01:42<00:00, 102.09s/it]

253.67344464





In [39]:
print(fit_count7 * qpu_access_time_250 / 60 * 5)


21.139453720000002


In [41]:
import numpy as np
import SVM_scripts as svm
import sklearn as skl
import skopt
import skopt.space as sks
from tqdm import tqdm
import pickle

data_name = 'pgp-broccatelli'
# data_name = 'bbb-martins'
# data_name = 'ncats-solubility'

run_name = 'QA'

file_name = 'Polaris/' + data_name + '.parquet'
matrix, matrix_labels, names = svm.read_parquet(file_name)

slice_sizes = [int(177/n) for n in [4,3,2,1]]
limits = [34,26,18,12]
N = 5

start = 0
test_values_list, best_params, param_tables, cv_results = ({slice_size: [] for slice_size in slice_sizes} for _ in range(4))
data_sets, search_spaces = [], []
fit_count = 0
for i in tqdm(range(start,N)):
    vectors, labels, test_vectors, test_labels = svm.prepare_data_sets(matrix, matrix_labels, train_percentage=50, positive_negative_ratio=None, max_train_size=None, min_train_size=None, seed=i, normalize_data=True, print_info=True)
    default_gamma = 1 / (vectors.shape[1] * vectors.var())
    search_space = {
        'base': sks.Categorical([2,10]),
        'num_encoding': sks.Integer(1,4),
        'kernel': sks.Real(0.05 * default_gamma, 2 * default_gamma, prior='log-uniform'),
        'penalty': sks.Real(0,6),
    }
    search_spaces += [search_space]
    data_sets += [(vectors, labels, test_vectors, test_labels)]
    inner_estimator = svm.qSVM_estimator(solver=('SA', 250), adjust_bias=False)
    for s, slice_size in enumerate(slice_sizes):
        print('Slice size: ', slice_size)
        estimator = svm.slice_estimator(estimator=inner_estimator, slice_size=slice_size, force_unbiased=True, adjust_outer_bias=True, seed=0)
        limit = limits[s]
        # f, param_table, opt, optimal_params = svm.hyperparameter_optimization(estimator=estimator, search_space=search_space, vectors=vectors, labels=labels, folds=4, mode='bayes', limit=limit, filter=True, print_info='q', seed=0)
        fits = svm.count_hyperparameter_optimization_fits(estimator, search_space=search_space, vectors=vectors, labels=labels, folds=4, filter=True, limit=limit, mode='bayes', seed=0, print_info=True)
        fit_count += fits
print(fit_count * qpu_access_time_250 / 60)

  0%|          | 0/5 [00:00<?, ?it/s]

Data size: 1218 (650 positive, 568 negative)
0 vectors removed due to inconsistently labelled degeneracies
Training data size: 609 (330 positive, 279 negative)
Slice size:  44
Slice size:  59
Slice size:  88
Slice size:  177


 20%|██        | 1/5 [00:48<03:13, 48.30s/it]

Data size: 1218 (650 positive, 568 negative)
0 vectors removed due to inconsistently labelled degeneracies
Training data size: 609 (332 positive, 277 negative)
Slice size:  44
Slice size:  59
Slice size:  88
Slice size:  177


 40%|████      | 2/5 [01:38<02:28, 49.43s/it]

Data size: 1218 (650 positive, 568 negative)
0 vectors removed due to inconsistently labelled degeneracies
Training data size: 609 (327 positive, 282 negative)
Slice size:  44
Slice size:  59
Slice size:  88
Slice size:  177


 60%|██████    | 3/5 [02:32<01:42, 51.33s/it]

Data size: 1218 (650 positive, 568 negative)
0 vectors removed due to inconsistently labelled degeneracies
Training data size: 609 (315 positive, 294 negative)
Slice size:  44
Slice size:  59
Slice size:  88
Slice size:  177


 80%|████████  | 4/5 [03:23<00:51, 51.35s/it]

Data size: 1218 (650 positive, 568 negative)
0 vectors removed due to inconsistently labelled degeneracies
Training data size: 609 (328 positive, 281 negative)
Slice size:  44
Slice size:  59
Slice size:  88
Slice size:  177


100%|██████████| 5/5 [04:19<00:00, 51.89s/it]

20.528326279999998





In [42]:
32 + 10/60 - 20.528326279999998

11.638340386666666