In [1]:
import pandas as pd
import gurobipy as gp
from gurobipy import GRB

In [2]:
aminoacids   = list(range(1,51))
hydrophobics = [2, 4, 5, 6, 11, 12, 17, 20, 21, 25, 
               27, 28, 30, 31, 33, 37, 44, 46]

In [43]:
# We could search among ALL pairs of positions in hydrophobics, 
# but in reality, matches are feasible only in a subset of all
# these pairs. So, let's make this subset.

pairs = [] # possible matching pairs (subset of ALL possible pairs)

# Since the folding is in a 2D grid, the protein has to make a
# 180 degrees curve to allow a match to occur. This eliminates
# the possibility that neighbors acids match.

for i in hydrophobics:
    for j in hydrophobics:
        if j > i + 1:
            pairs.append((i, j))
            
# Now, giving a pair, in which position would a fold appear?
# Anywhere between i and j, as long as there's an even number
# of acids between them.

match_and_fold = []
match_and_cant_fold = []

for i, j in pairs:
    b = j - i - 1                                # number of acids between i and j   
    for f in range(i, j):
        if f == i + b/2 and b % 2 == 0:          # is b an even number?
            match_and_fold.append((i, j, f))
        else:
            match_and_cant_fold.append((i, j, f))
           

In [72]:
model = gp.Model('Protein Folding')

# add vars
match = model.addVars(pairs, 
                      vtype=GRB.BINARY, 
                      name='match')
fold = model.addVars(aminoacids,
                     vtype=GRB.BINARY,
                     name='fold')

# objective function
model.setObjective(gp.quicksum(match[i,j] for i, j, f in match_and_fold),
                   GRB.MAXIMIZE)

# add constraints
model.addConstrs((match[i, j] == fold[f] for i, j, f in match_and_fold),
                  name='if_match_then_fold')

model.addConstrs((fold[f] + match[i, j] <= 1 for i, j, f in match_and_cant_fold),
                  name='block_other_folds')
model.update()

In [73]:
model.write('Protein Folding.lp')
model.optimize()

Gurobi Optimizer version 9.1.0 build v9.1.0rc0 (win64)
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads
Optimize a model with 2441 rows, 197 columns and 4882 nonzeros
Model fingerprint: 0xcec2b67c
Variable types: 0 continuous, 197 integer (197 binary)
Coefficient statistics:
  Matrix range     [1e+00, 1e+00]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 1e+00]
Found heuristic solution: objective -0.0000000
Presolve removed 2441 rows and 197 columns
Presolve time: 0.00s
Presolve: All rows and columns removed

Explored 0 nodes (0 simplex iterations) in 0.02 seconds
Thread count was 1 (of 8 available processors)

Solution count 2: 8 

Optimal solution found (tolerance 1.00e-04)
Best objective 8.000000000000e+00, best bound 8.000000000000e+00, gap 0.0000%


In [79]:
print("Total of {} matches".format(sum(match[m].x for m in match)))
print("Matches between these pairs:")
for m in match:
    if match[m].x == 1: print(m)
print("Folds in these acids:")
for f in fold:
    if fold[f].x == 1: print(f)

Total of 8.0 matches
Matches between these pairs:
(2, 5)
(5, 12)
(6, 11)
(12, 33)
(17, 28)
(20, 25)
(31, 46)
(33, 44)
Folds in these acids:
3
8
22
38
