## sort cyclic shifts of string s + '$' to construct the suffix array

In [0]:
from collections import OrderedDict

def getCharOrder(s):
  n = len(s)
  numChars = 256
  count = [0]*numChars # totally 256 chars, if you want, can print it out to see these chars
  
  order = [0]*(n)
  
  #count the occurrence of each char
  for c in s:
    count[ord(c)] += 1
    
  # prefix sum of each char
  for i in range(1, numChars):
    count[i] += count[i-1]
    
  # assign from count down to be stable
  for i in range(n-1,-1,-1):
    count[ord(s[i])] -=1
    order[count[ord(s[i])]] = i # put the index into the order instead the suffix string
    
  return order
    

In [0]:
def getCharClass(order, cls):
  n = len(order)
  cls = [0]*n
  # if it all differs, then cls[i] = order[i]
  cls[order[0]] = 0 #the 6th will be 0
  for i in range(1, n):
    # use order[i] as index, so the last index
    if s[order[i]] != s[order[i-1]]:
      print('diff',s[order[i]],s[order[i-1]])
      cls[order[i]] = cls[order[i-1]] + 1
    else:
      cls[order[i]] = cls[order[i-1]]
  return cls
      

In [0]:
'''It is a counting sort using the first part as class'''
def sortDoubled(s, L, order, cls):
  n = len(s)
  count = [0] * n
  new_order = [0] * n
  # their key is the class
  for i in range(n):
    count[cls[i]] += 1
    
  # prefix sum
  for i in range(1, n):
    count[i] += count[i-1]
    
  # assign from count down to be stable
  # sort the first half
  for i in range(n-1, -1, -1):
    start = (order[i] - L + n) % n #get the start index of the first half, 
    count[cls[start]] -= 1
    new_order[count[cls[start]]] = start
    
  return new_order

In [0]:
def updateClass(order, cls, L):
  n = len(order)
  new_cls = [0]*n
  # if it all differs, then cls[i] = order[i]
  new_cls[order[0]] = 0 #the 6th will be 0
  for i in range(1, n):
    cur_order, prev_order = order[i], order[i-1]
    # use order[i] as index, so the last index
    if cls[cur_order] != cls[prev_order] or cls[(cur_order+L) % n] != cls[(prev_order+L) % n]:
      new_cls[cur_order] = new_cls[prev_order] + 1
    else:
      new_cls[cur_order] = new_cls[prev_order]
  return new_cls

In [0]:

def cyclic_shifts_sort(s):
  s = s + '$'
  n = len(s)
  order = getCharOrder(s)
  cls = getCharClass(s, order)
  print(order, cls)
  L = 1
  while L < n:
    order = sortDoubled(s, 1, order, cls)
    cls = updateClass(order, cls, L)
    print(order, cls)
    L *= 2
  
  return order

In [32]:
s = 'ababaa'
cyclic_shifts_sort(s)

diff a $
diff b a
[6, 0, 2, 4, 5, 1, 3] [1, 2, 1, 2, 1, 1, 0]
[6, 5, 4, 0, 2, 1, 3] [3, 4, 3, 4, 2, 1, 0]
[6, 5, 4, 0, 2, 3, 1] [3, 6, 4, 5, 2, 1, 0]
[6, 5, 4, 0, 2, 3, 1] [3, 6, 4, 5, 2, 1, 0]


[6, 5, 4, 0, 2, 3, 1]