In [1]:
import numpy as np
import pandas as pd
import pyomo.environ as pyo

In [2]:
def csv_to_matrix(file_name):
    dimension = len(np.genfromtxt(file_name)) - 1
    rows = np.array([element[0] for element in pd.read_csv('mapping.csv', delimiter=';', usecols=[3]).to_numpy()])
    matrix = np.genfromtxt(file_name, delimiter=';', skip_header=1, usecols = range(1,dimension+1))
    return(rows, matrix)

def distance_to_similarity(Q, neutral_distance=1):
    # neutral distance is the distance which will be mapped to similarity 0, larger distance values will be mapped to negative similarity
    # The worst similarity (distance 1) will then be (neutral_distance-1)/neutral_distance
    P = 1 - np.array(Q)/neutral_distance
    return(P)

def load_mapping(file_name):
    frame = pd.read_csv(file_name, delimiter=';')
    class_to_number = {
        "Approved": 1,
        "Preclinical": 0,
        "Withdrawn": -1
    }
    dictionary = {key: class_to_number[val] for key, val in frame.set_index('ROWID')['Class'].to_dict().items()}
    return(frame, dictionary)

def Ising_to_QUBO(J,h):
    (m,n) = J.shape
    if len(h) != m or len(h) != n:
        raise ValueError("J must be quadratic with dimension of h!")
    Q = np.zeros(shape=(n,n))
    h_sum = 0
    for i in range(n):
        h_sum += h[i]
        J_sum = 0
        Ji_sum = 0
        for j in range(n):
            if i < j:
                Q[i,j] = 4*J[i,j]
                Ji_sum += J[i,j]
                J_sum += J[i,j]
            elif i > j:
                Ji_sum += J[j,i]
        Q[i,i] = 2*(h[i] - Ji_sum)
    epsilon = J_sum - h_sum
    return(Q,epsilon)

In [3]:
file_name = 'Matrix_ECFP4.csv'
mapping_file_name = 'mapping.csv'
rows, distance_J = csv_to_matrix(file_name)
mapping, bias_dict = load_mapping(mapping_file_name)
J = np.triu(distance_to_similarity(distance_J,0.3))
bias_scaling = 0.05*len(J)
bias = np.array([0.1*len(J) * bias_dict[row] for row in rows])

In [4]:
# # artifically create withdrawn compounds:
# for i in range(len(J)-10,len(J)):
#     bias[i] = - bias_scaling

In [5]:
# convert to QUBO:
Q, epsilon = Ising_to_QUBO(J,bias)
# set up Pyomo QUBO model:
model = pyo.ConcreteModel()
model.rows = pyo.Set(initialize=rows)
model.I2 = pyo.Set(initialize=model.rows*model.rows)
model.Q = pyo.Param(model.rows*model.rows, initialize={(rows[i],rows[j]):Q[i,j] for i in range(len(Q)) for j in range(len(Q))})
model.epsilon = pyo.Param(initialize = epsilon)
model.x = pyo.Var(model.rows, within = pyo.Binary)
def ObjRule(model):
    quadratic = sum(model.Q[i,j] * model.x[i] * model.x[j] for (i,j) in model.I2)
    constant = model.epsilon
    return(quadratic + constant)
P = 1 - 2 * np.random.rand(102,102)
model.P = pyo.Param(model.rows*model.rows, initialize={(rows[i],rows[j]):P[i,j] for i in range(len(Q)) for j in range(len(Q))}) 
def randon_obj(model):
    return(sum(model.P[i,j] * model.x[i] * model.x[j] for (i,j) in model.I2))
# model.obj = pyo.Objective(rule=ObjRule, sense=pyo.maximize)
model.obj = pyo.Objective(rule=randon_obj, sense=pyo.maximize)

ERROR: Rule failed when generating expression for Objective obj with index
    None: IndexError: only integers, slices (`:`), ellipsis (`...`),
    numpy.newaxis (`None`) and integer or boolean arrays are valid indices
ERROR: Constructing component 'obj' from data=None failed: IndexError: only
    integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and
    integer or boolean arrays are valid indices


IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

In [6]:
model.obj.pprint()

obj : Size=1, Index=None, Active=True
    Key  : Active : Sense    : Expression
    None :   True : maximize : 417.6874789803258*x[Row0]*x[Row0] - 8.968036529680365*x[Row0]*x[Row1] - 8.134831460674159*x[Row0]*x[Row2] - 7.6894977168949765*x[Row0]*x[Row3] - 5.7435897435897445*x[Row0]*x[Row4] - 7.80952380952381*x[Row0]*x[Row5] - 8.549019607843137*x[Row0]*x[Row6] - 7.397849462365592*x[Row0]*x[Row7] - 8.418300653594772*x[Row0]*x[Row8] - 8.072072072072071*x[Row0]*x[Row9] - 8.397660818713451*x[Row0]*x[Row10] - 7.6455696202531644*x[Row0]*x[Row11] - 8.352941176470589*x[Row0]*x[Row12] - 7.711711711711713*x[Row0]*x[Row13] - 7.746031746031747*x[Row0]*x[Row14] - 8.745098039215687*x[Row0]*x[Row15] - 8.148148148148149*x[Row0]*x[Row16] - 7.428571428571431*x[Row0]*x[Row17] - 8.592592592592593*x[Row0]*x[Row18] - 7.176470588235295*x[Row0]*x[Row19] - 8.277227722772277*x[Row0]*x[Row20] - 7.986531986531988*x[Row0]*x[Row21] - 7.25*x[Row0]*x[Row22] - 7.564625850340136*x[Row0]*x[Row23] - 6.144927536231885*x[Ro

In [7]:
instance = model.create_instance()
solver = pyo.SolverFactory('cplex', tee=False)
time_limit = 0
if time_limit != 0:
    solver.options['timelimit'] = time_limit
results = solver.solve(instance)
computation_time = results.solver.time
# check if it was feasible:
if results.solver.termination_condition == pyo.TerminationCondition.infeasible:
    raise Exception('infeasible')
elif results.solver.termination_condition == pyo.TerminationCondition.unbounded:
    raise Exception('No solution found')
elif results.solver.termination_condition == pyo.TerminationCondition.unknown:
    raise Exception('No solution found. Increase time limit')
objective = pyo.value(instance.obj)
solution = np.array([instance.x[row].value for row in model.rows])
solution_dict = {row: instance.x[row].value for row in model.rows}



In [10]:
print('Computation_time: ' + str(computation_time), 'Objective ' + str(objective))
print('Group Good: ' + str(np.count_nonzero(solution > 0.5)), 'Group Bad: ' + str(np.count_nonzero(solution < 0.5)))
print(
    'Approved which are bad: ' + str(np.count_nonzero(np.logical_and(np.sign(bias) == 1, solution < 0.5))) + '\n',
    'Withdrawn which are good: ' + str(np.count_nonzero(np.logical_and(np.sign(bias) == -1, solution > 0.5))) + '\n',
    'Preclinical which are bad ' + str(np.count_nonzero(np.logical_and(np.sign(bias) == 0, solution < 0.5))) + '\n',
    'Preclinical which are good ' + str(np.count_nonzero(np.logical_and(np.sign(bias) == 0, solution > 0.5))) + '\n'
    )
print(solution)

Computation_time: 0.7051537036895752 Objective 11658.17047631195
Group Good: 51 Group Bad: 51
Approved which are bad: 0
 Withdrawn which are good: 0
 Preclinical which are bad 51
 Preclinical which are good 0

[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0.]


In [18]:
np.logical_and(np.sign(bias) == 1, np.sign(bias) == 0)

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False])

In [9]:
1 - 2 * np.random.rand(102,102)

array([[-0.91517545, -0.3710789 ,  0.31730227, ...,  0.05741622,
         0.82891644,  0.28097201],
       [ 0.1194123 , -0.64227658, -0.4116585 , ...,  0.21080753,
         0.77304073, -0.13840229],
       [-0.99284384,  0.85387094,  0.17942438, ..., -0.95350746,
        -0.58693518, -0.42993666],
       ...,
       [-0.58956473, -0.50887073, -0.68517222, ..., -0.55146006,
        -0.09190872, -0.43175628],
       [ 0.0195105 , -0.79997441,  0.27309958, ...,  0.50054095,
         0.06333809,  0.32360881],
       [-0.27027406,  0.23881729, -0.11429155, ...,  0.22804601,
        -0.75105016, -0.58709018]])