# Implementation of common string model for MCSP

In [1]:
from typing import *

import numpy as np

In [2]:
from ortools.init import pywrapinit
from ortools.linear_solver import pywraplp

solver = pywraplp.Solver.CreateSolver('GLOP')

In [3]:
def gen_substring_pos(s: str, T: Set[str]) -> Dict[str, List[Tuple[int,int]]]:
  res: Dict[str, List[Tuple[int,int]]] = {}

  for i in range(len(s)+1):
    for j in range(i+1,len(s)+1):
      if s[i:j] in T:
        if s[i:j] in res:
          res[s[i:j]].append((i,j))
        else:
          res[s[i:j]] = [(i,j)]

  return res

In [4]:
def gen_substring_set(s: str) -> Set[str]:
  res: Set[str] = set()

  for i in range(len(s)+1):
    for j in range(i+1,len(s)+1):
      res.add(s[i:j])

  return res

In [5]:
S1 = 'AGACTG'
S2 = 'ACTAGG'
N = len(S1)

In [6]:
SUBS_OF_S1 = gen_substring_set(S1)
SUBS_OF_S2 = gen_substring_set(S2)

In [7]:
T = set.intersection(SUBS_OF_S1, SUBS_OF_S2)

In [8]:
Q = [gen_substring_pos(S1, T), gen_substring_pos(S2, T)]

# Variables

In [9]:
y = {}
for t_idx, t in enumerate(T):
  for q_idx,q in enumerate(Q):
    for k in q[t]:
      y[t_idx,q_idx,k[0],k[1]] = solver.IntVar(0,1,f'y^{q_idx}_{t_idx}_{k[0]}_{k[1]}')

In [10]:
print('Number of variables =', solver.NumVariables())

Number of variables = 20


# Constraints

caso: $\sum\limits_{k \in Q^{1}_{t}} y^{1}_{t,k} = \sum\limits_{k \in Q^{2}_{t}} y^{2}_{t,k}$

In [11]:
for t_idx, t in enumerate(T):
  lista1 = [ y[t_idx,0,k[0],k[1]] for k in Q[0][t]]
  lista2 = [ y[t_idx,1,k[0],k[1]] for k in Q[1][t]]

  solver.Add(solver.Sum(lista1) == solver.Sum(lista2))

caso: $\sum\limits_{t \in T} \sum\limits_{k \in Q^{1}_{t} | k \leq j < k + |t| } y^{1}_{t,k} = 1$

In [12]:
for j in range(N):
  lista = []
  for t_idx, t in enumerate(T):
    for k in Q[0][t]: # k[1] == k[0] + size(t)
      if k[0] <= j < k[1]:
        lista.append(y[t_idx,0,k[0],k[1]])
  
  solver.Add(solver.Sum(lista) == 1)

caso: $\sum\limits_{t \in T} \sum\limits_{k \in Q^{2}_{t} | k \leq j < k + |t| } y^{2}_{t,k} = 1$

In [14]:
for j in range(N):
  lista = []
  for t_idx, t in enumerate(T):
    for k in Q[1][t]: # k[1] == k[0] + size(t)
      if k[0] <= j < k[1]:
        lista.append(y[t_idx,1,k[0],k[1]])

  solver.Add(solver.Sum(lista) == 1)

# Objective Function

função: $\sum\limits_{t \in T} \sum\limits_{k \in Q^{1}_{t}} y^{1}_{t,k}$

In [17]:
objective_terms = []
for t_idx, t in enumerate(T):
  for k in Q[0][t]:
    objective_terms.append(y[t_idx,0,k[0],k[1]])

solver.Minimize(solver.Sum(objective_terms))

# Solving

In [18]:
status = solver.Solve()

In [19]:
if status == pywraplp.Solver.OPTIMAL or status == pywraplp.Solver.FEASIBLE:
  print(solver.Objective().Value())
else:
  print("Não há solução ótima.")

3.0


In [24]:
res = []
for t_idx, t in enumerate(T):
  for k in Q[0][t]:
    print(f'{t_idx},{0},{k[0]},{k[1]}: {y[t_idx,0,k[0],k[1]].solution_value()}')

    if y[t_idx,0,k[0],k[1]].solution_value() == 1:
      res.append(S1[k[0]:k[1]])

0,0,1,2: 0.0
0,0,5,6: 1.0
1,0,3,5: 0.0
2,0,0,1: 0.0
2,0,2,3: 0.0
3,0,2,5: 1.0
4,0,2,4: 0.0
5,0,3,4: 0.0
6,0,0,2: 1.0
7,0,4,5: 0.0


In [27]:
res.sort()

In [28]:
res

['ACT', 'AG', 'G']