In [1]:
from typing import *

from ortools.init import pywrapinit
from ortools.linear_solver import pywraplp

In [2]:
def gen_substring_set(s: str) -> Set[str]:
  """This function returns all substring of the given string s

  Args:
      s (str): a string we want extract all substring

  Returns:
      Set[str]: A set containing every substring without duplicates.
  """
  res: Set[str] = set()

  for i in range(len(s)):
    for j in range(i+1,len(s)+1):
      res.add(s[i:j])

  return res

In [3]:
def get_ocurrence_indices(S: str, pattern: str):
  res = []
  for i in range(len(S)):
    if S[i:i+len(pattern)] == pattern:
      res.append(i)
  return res

In [4]:
def gen_common_blocks(S1: str, S2: str, T: Set[str]) -> Dict[str, List[Tuple[int,int]]]:
  blocks: Dict[str, List[Tuple[int,int]]] = {}
  
  for t in T:
    pos_of_t_in_S1 = get_ocurrence_indices(S1, t) #[m.start() for m in re.finditer(t, S1)]
    pos_of_t_in_S2 = get_ocurrence_indices(S2, t) #[m.start() for m in re.finditer(t, S2)]
  
    for i in pos_of_t_in_S1:
      for j in pos_of_t_in_S2:
        if t in blocks:
          blocks[t].append((i,j))
        else:
          blocks[t] = [(i,j)]
        
  return blocks

In [5]:
# S1 = 'AGACTGAGG'
# S2 = 'ACTAGGAGG'
# res: 3

# S1 = 'A'*4 # AAAA
# S2 = 'A'*4

S1 = '34431433244412241432221422333133411412412412412434222241332234331311124144111432313224141131223313212444442323313'
S2 = '11431111312241224222342323243133341424113423432413431242234321214232414233413221234334331413141234241211444432114'
# res: 41

# S1 = '11431131222222242211241142233142424314334333334114212232341142441422241314343143441412214343314324312341333414223221242432233112313443421344143312213144413121'
# S2 = '21114222233233431242212421223443444222434144334233243143431241134141124112331441131423434214333342211141234323241224133411332243342213211142413134241311424312'
# res: 52

N = len(S1)

In [6]:
N

113

In [7]:
SUBS_OF_S1 = gen_substring_set(S1)
SUBS_OF_S2 = gen_substring_set(S2)
T: Set[str] = set.intersection(SUBS_OF_S1, SUBS_OF_S2)

In [8]:
#T

{'1',
 '11',
 '111',
 '113',
 '1131',
 '11312',
 '113122',
 '114',
 '1143',
 '12',
 '122',
 '1224',
 '12241',
 '124',
 '13',
 '131',
 '1312',
 '13122',
 '132',
 '1322',
 '133',
 '14',
 '141',
 '1412',
 '142',
 '143',
 '144',
 '2',
 '21',
 '212',
 '214',
 '2142',
 '22',
 '221',
 '222',
 '223',
 '2234',
 '22343',
 '224',
 '2241',
 '23',
 '232',
 '2323',
 '233',
 '234',
 '2343',
 '23433',
 '24',
 '241',
 '2412',
 '2413',
 '2414',
 '243',
 '3',
 '31',
 '311',
 '3111',
 '312',
 '3122',
 '313',
 '3133',
 '314',
 '32',
 '321',
 '3212',
 '322',
 '323',
 '324',
 '33',
 '331',
 '333',
 '334',
 '3341',
 '34',
 '341',
 '342',
 '343',
 '3433',
 '34331',
 '4',
 '41',
 '411',
 '4113',
 '412',
 '4122',
 '41224',
 '413',
 '414',
 '42',
 '422',
 '4222',
 '4223',
 '423',
 '4232',
 '42323',
 '43',
 '431',
 '432',
 '433',
 '4331',
 '44',
 '443',
 '444',
 '4444'}

In [9]:
B = gen_common_blocks(S1, S2, T)

In [10]:
#B

In [11]:
solver = pywraplp.Solver.CreateSolver('CBC')

## Variáveis

In [12]:
x = {}
for t_idx, t in enumerate(T):
  for b in B[t]:
    x[t_idx,b[0],b[1]] = solver.BoolVar(f'x[{t_idx},{b[0]},{b[1]}]')

print('Number of variables =', solver.NumVariables())

Number of variables = 4229


## Constraint

caso: $\sum\limits_{i \in \{1,\dots,m\ | k^{1}_{i} \leq j < k^{1}_{i} + |t_i|\}} x_i = 1, \forall j = 1, \dots, n$

In [13]:
for j in range(N):
  blocks_at_pos_j = []
  for t_idx, t in enumerate(T):
    for b in B[t]:
      if b[0] <= j < (b[0] + len(t)):
        blocks_at_pos_j.append(x[t_idx, b[0], b[1]])
  
  solver.Add(solver.Sum(blocks_at_pos_j) == 1)

caso: $\sum\limits_{i \in \{1,\dots,m\ | k^{2}_{i} \leq j < k^{2}_{i} + |t_i|\}} x_i = 1, \forall j = 1, \dots, n$

In [14]:
for j in range(N):
  blocks_at_pos_j = []
  for t_idx, t in enumerate(T):
    for b in B[t]:
      if b[1] <= j < (b[1] + len(t)):
        blocks_at_pos_j.append(x[t_idx, b[0], b[1]])
        
  solver.Add(solver.Sum(blocks_at_pos_j) == 1)

## Objective function

$ \text{Minimize} \sum\limits^{m}_{i=1} x_i $

In [15]:
objetive_terms = []
for t_idx, t in enumerate(T):
  for b in B[t]:
    objetive_terms.append(x[t_idx,b[0],b[1]])

solver.Minimize(solver.Sum(objetive_terms))

## Solving

In [16]:
status = solver.Solve()

In [17]:
if status == pywraplp.Solver.OPTIMAL or status == pywraplp.Solver.FEASIBLE:
  print(solver.Objective().Value())
else:
  print("Não há solução ótima.")

40.19178082191781


In [18]:
res = []

In [19]:
for t_idx, t in enumerate(T):
  for b in B[t]:
    if abs(x[t_idx,b[0],b[1]].solution_value() - 1) <= 0.1:
      res.append(t)

In [23]:
res

['34']

In [43]:
teste = []
for t_idx, t in enumerate(T):
  for b in B[t]:
    if x[t_idx,b[0],b[1]].solution_value() != 0:
      print(t, x[t_idx,b[0],b[1]].solution_value())
    

124 0.102739726027397
124 0.102739726027397
124 0.102739726027397
124 0.102739726027397
22 0.15068493150684859
22 0.12328767123287565
22 0.12328767123287832
141 0.01369863013698569
34 1.0
34 0.06164383561643916
34 0.12328767123287832
34 0.332191780821917
113122 0.698630136986303
3212 0.7294520547945208
14 0.147260273972602
14 0.054794520547946035
14 0.27054794520547915
14 0.5205479452054795
14 0.1849315068493147
324 0.366438356164386
324 0.01369863013698569
33 0.3664383561643808
33 0.34589041095890405
33 0.15068493150685225
33 2.220446049250313e-16
2241 0.13698630136986267
444 1.1102230246251565e-16
444 0.2705479452054792
42323 0.12328767123287698
41 1.1102230246251565e-16
41 0.06164383561643905
41 0.04109589041095796
41 0.22945205479452113
41 0.7328767123287638
131 0.28767123287671126
131 0.25342465753425125
43 0.3356164383561646
43 0.13698630136986256
43 0.27397260273972546
43 0.12328767123287698
223 0.1849315068493147
223 0.08904109589040932
3111 0.26369863013698325
142 0.1369863013

36

In [21]:
# with open('modelo.txt', 'w') as f:
#   f.write(solver.ExportModelAsLpFormat(False))
#   f.close()

In [22]:
# N