## String

### String Reversal

#### String reversal: iterative
e.g., 'abcd' -> 'dcba'

In [None]:
# use i, j two index pointer
# swap ith and jth item in the string until i meet j

def reverse(s):
  if not s:
    return s
  input = list(s)
  i, j = 0, len(input) - 1
  while i < j:
    input[i], input[j] = input[j], input[i]
    i += 1
    j -= 1
  return ''.join(input)

# time complexity: O(n)
# space complexity: O(1)

reverse('abcd')

'dcba'

#### String reversal: recursive

In [None]:
# 
def reverse(sList, i, j):
  # base case:
  if not sList or i >= j:
    return
  # recursive rule
  sList[i], sList[j] = sList[j], sList[i]
  reverse(sList, i + 1, j - 1)

# time complexity: O(n)
# space complexity: O(1)

sList = list('abcd')
reverse(sList, 0, 3)
sList

['d', 'c', 'b', 'a']

#### I love yahoo → yahoo love I

In [None]:
# 'I love yahoo' -> 'yahoo love I'
# first reverse the whole sentence
# then reverse each word

def reverse2(sList):
  if not sList:
    return
  reverse(sList, 0, len(sList) - 1)
  i = j = 0
  for j in range(len(sList)):
    if sList[j] == ' ':
      reverse(sList, i, j - 1)
      i = j + 1

# time complexity: O(n)
# space complexity: O(1)

input = list('I love yahoo')
reverse2(input)
input

['y', 'a', 'h', 'o', 'o', ' ', 'l', 'o', 'v', 'e', ' ', 'I']

#### k position shifting: shift the whole string to the right-hand by two positions. 
e.g., abcdef → efabcd

In [None]:
# basically this is a reverse word problem
# the string can be divided into two words, the last k letters, and the remain letters
# the problem is to reverse the two words without reverse the letters in each word
# so we can reverse the whole string first
# then reverse each word

def shift(sList, k):
  if not sList:
    return
  k = k % len(sList)
  reverse(sList, 0, len(sList) - 1)
  reverse(sList, 0, len(sList) -k - 1)
  reverse(sList, len(sList) - k, len(sList) - 1)

# time complexity: O(n)
# space complexity: O(1)
input = list('abcdef')
shift(input, 2)
input

['c', 'd', 'e', 'f', 'a', 'b']

### Char Replacement
Example 1: "student" → "stuxxt' (s1: den → s2: xx)

Example 2: www.yahoo.com/?q=flower_market#flower_store
(s1 = '_' change to s2 = '20%')

In [None]:
# if len(s1) >= len(s2), then we can start from begin to end with two pointer index i and j
# left to i (not include i) is the output result
# j is the index to iterate the string
# when s1 is found at index j, replace it with s2

def replace1(sList, s1, s2):
  if not sList:
    return sList
  i = j = 0
  while j < len(sList):
    if check(sList, j, s1):
      for char in s2:
        sList[i] = char
        i += 1
      j += len(s1)
    else:
      sList[i] = sList[j]
      i += 1
      j += 1
  return sList[:i]

def check(sList, j, s1):
  if not sList or j + len(s1) - 1 >= len(sList):
    return False
  for k in range(len(s1)):
    if sList[j + k] != s1[k]:
      return False
  return True


sList = list('studenden')
replace1(sList, 'den', 'x')

['s', 't', 'u', 'x', 'x']

In [None]:
# if len(s1) < len(s2), then the resulting string will be longer than input string
# we can not start from begin to end, but can start from end to begin.
# first scan the input string, find the locations where s1 happen and the count of how many times it happen
# calculate the resulting string length, extend the input string to the resulting length
# use two pointers i and j, start from the end of the string
# righ to i (not include i) is the output result
# j is the iterator to scan the input string
# whenever a s1 appears, replace it with s2
# time complexity: O(n * m + k * o), n = len(input), m = len(s1), k = times of s1, o = len(s2)
# space complexity: O(k)

def replace2(sList, s1, s2):
  i = 0
  count = 0 # store how many times s1 appears in sList
  indices = [] # store the indices of the end of s1 appears in sList
  while i <= len(sList) - len(s1):
    if check(sList, i, s1):
      count += 1
      indices.append(i + len(s1) - 1)
      i += len(s1)
    else:
      i += 1

  j = len(sList) - 1
  sList += [''] * count * (len(s2) - len(s1))
  i = len(sList) - 1
  while j >= 0:
    if indices and j == indices[-1]:
      for k in range(len(s2) - 1, -1, -1):
        sList[i] = s2[k]
        i -= 1
      indices.pop()
      j -= len(s1)
    else:
      sList[i] = sList[j]
      i -= 1
      j -= 1
  return sList

sList = list('student')
replace2(sList, 't', 'xyz')


['s', 'x', 'y', 'z', 'u', 'd', 'e', 'n', 'x', 'y', 'z']

## Advanced Topics

### String Shuffling

#### First direction: A1B2C3D4 → ABCD1234

In [None]:
# define letters are smaller than numbers, and sort the string with merge sort
# time complexity O(nlgn)
# space complexity O(1)
def shuffle1(sList):
  '''
  input: a list of letters and numbers
  output: a shuffled list of letters and numbers
  '''
  # base case
  if not sList or len(sList) <= 1:
    return sList
  # recursive rule
  mid = (len(sList) - 1) // 2
  return merge(shuffle1(sList[:mid + 1]), shuffle1(sList[mid + 1:]))

def merge(s1, s2):
  '''
  input: two sorted lists of letters and numbers
  output: merged sorted list of letters and numbers
  '''
  if not s1:
    return s2
  if not s2:
    return s1
  result = []
  i = j = 0
  while i < len(s1) and j < len(s2):
    if not (s1[i].isdigit() ^ s2[j].isdigit()):
      # s1[i] and s2[j] are either letters or numbers
      if s1[i] <= s2[j]:
        result.append(s1[i])
        i += 1
      else:
        result.append(s2[j])
        j += 1
    elif s1[i].isdigit() and not s2[j].isdigit():
      result.append(s2[j])
      j += 1
    else:
      result.append(s1[i])
      i += 1
  if i < len(s1):
    return result + s1[i:]
  else:
    return result + s2[j:]


sList = list('A1B2C3D4E5')
shuffle1(sList)

['A', 'B', 'C', 'D', 'E', '1', '2', '3', '4', '5']

#### Second direction: ABCD1234 → A1B2C3D4




In [None]:
# assume the number of letters are same as number of digits
# ABCDE 12345 -> AB12 CDE345
# l lm  m rm     l lm    rm   

def shuffle2(sList, left, right):
  '''
  input: a list of characters including letters and numbers sorted with letters in front
  output: a shuffled character list with alternating letters and numbers
  '''
  # base case
  if right - left <= 1:
    return

  # recursive rule
  length = right - left + 1
  m = left + length // 2
  lm = left + length // 4
  rm = m + length // 4

  reverse(sList, lm, m - 1)
  reverse(sList, m, rm - 1)
  reverse(sList, lm, rm - 1)
  #reverse(sList, lm, lm + length // 4 - 1)
  #reverse(sList, lm + length // 4, rm - 1)

  shuffle2(sList, left, left + 2 * (lm - left) - 1)
  shuffle2(sList, left + 2 * (lm - left), right)
  #shuffle2(sList, left, lm + length // 4 - 1)
  #shuffle2(sList, lm + length // 4, right)

sList = list('ABCDE12345')
shuffle2(sList, 0, len(sList) - 1)
print(sList)
sList = list('')
shuffle2(sList, 0, len(sList) - 1)
print(sList)

['A', '1', 'B', '2', 'C', '3', 'D', '4', 'E', '5']
[]


### String Permutation

#### Q2.1 no duplicate letters in input string
e.g., input string abc, output: abc, acb, bac, bca, cab, cba

In [6]:
def helper(sList, index, result):
  '''
  input: 
    sList: a list of letters
    index: the index of the recursive level
    result: a list to save permutation result
  output: None, result is updated with a list of all permutations
  '''
  # base case
  if index == len(sList) - 1:
    result.append(''.join(sList))
    return

  # recursive rule
  for i in range(index, len(sList)):
    sList[index], sList[i] = sList[i], sList[index]
    helper(sList, index + 1, result)
    sList[index], sList[i] = sList[i], sList[index]

def permu(sList):
  '''
  input: a list of letters
  output: a list of all permutation
  '''
  if not sList:
    return []
  result = []
  helper(sList, 0, result)
  return result

permu(list('abc'))

['abc', 'acb', 'bac', 'bca', 'cba', 'cab']

#### Q2.2 have duplicate letters in input string
e.g., input string: aab, output: aab, aba, baa

In [None]:
def helper(sList, index, result):
  '''
  input:
    sList: a list of letters
    index: the index of recursive level
    result: a list to save output
  output: None, result is updated with all permutation
  '''
  # base case
  if index == len(sList) - 1:
    result.append(''.join(sList))
    return
  # recursive rule
  letters = set()
  for i in range(index, len(sList)):
    if sList[i] not in letters:
      letters.add(sList[i])
      sList[i], sList[index] = sList[index], sList[i]
      helper(sList, index + 1, result)
      sList[i], sList[index] = sList[index], sList[i]
      


def permu(sList):
  '''
  input: a list of letters, could have duplicate letters
  output: a list of all permutations
  '''
  if not sList:
    return []
  result = []
  helper(sList, 0, result)
  return result

permu(list('aab'))

['aab', 'aba', 'baa']

### String En/Decoding
E.g., 'aaaabccaaaaa' → 'a4b1c2a5', Restriction: in-place

In [None]:
# 1st scan the string to encode the letters continuously appears more than 1 times,
# and count how many times a copy of letter happens one time in the string
# 2nd scan, extend the string using the count in the 1st scan, from the righ to left, insert 1 accordingly
# time complexity: O(n)
# space complexity: O(1)
def encoding(s):
  '''
  input: a list of letters
  output: a encoded list of letters
  '''
  if not s:
    return
  # first scan
  # left to i is the output string (not include i)
  # j is the scan iterator
  i = j = 0
  count = 0
  count_1_lett = 0
  for j in range(len(s)):
    if j == 0 or s[j] == s[j - 1]:
      count += 1
    else:
      s[i] = s[j - 1]
      i += 1
      if count == 1:
        count_1_lett += 1
      else:
        s[i] = str(count)
        i += 1
      count = 1
  s[i] = s[j]
  i += 1
  if count == 1:
    count_1_lett += 1
  else:
    s[i] = str(count)
    i += 1

  # 2nd scan
  del s[i:]
  s.extend([' '] * count_1_lett)
  # right to i is the output string (not include i)
  # j is the scan iterator
  right = i - 1
  i = len(s) - 1
  pre_is_letter = True
  for j in range(right, -1, -1):
    if pre_is_letter and not s[j].isdigit():
      s[i] = '1' 
      s[i - 1] = s[j]
      i -= 2
    elif s[j].isdigit():
      pre_is_letter = False
      s[i] = s[j]
      i -= 1
    else:
      pre_is_letter = True
      s[i] = s[j]
      i -= 1

s = list('a')
encoding(s)
print(''.join(s))

a1


### Sliding Window in a string (fast + slow indices)

#### Q4.1 Longest substring that contains only unique char
Given a string, returns the length of the longest substring without duplicate characters. 

For example, the longest substrings without repeating characters for "BDEFGADE" are "BDEFGA", whose size is 6. (Follow up: without duplicate k-times characters. e.g k = 5)

In [None]:
# using two indices i and j as the left and right boundary of the substring
# use a hash table to store the characters and frequencies of each character in the substring
# when there is no duplicate characters, move j forward to add one more charcter
# when there is duplicate, move i forward until there is no duplicate in substring
# record max number of the substring
# time complexity: O(n)
# space complexity: O(n)

def longest_sub(s):
  '''
  input: string s
  output: the length of the longest substring without repeating chr
  '''
  if not s:
    return 0
  i = j = 0
  table = {}
  length_max = 0
  length = 0
  noDup = True
  while j < len(s):
    if noDup:
      if s[j] not in table:
        table[s[j]] = 1
        length += 1
        length_max = max(length_max, length)
      else:
        table[s[j]] += 1
        length += 1
        noDup = False
      j += 1
    else:
      if table[s[i]] == 2:
        table[s[i]] = 1
        noDup = True
      else:
        del table[s[i]]
      length -= 1
      i += 1
  return length_max
# test
longest_sub('aa')


1

In [None]:
# Follow up: without duplicate k-times characters. e.g k = 5)
# use a hash table to store the chars in the substring
# when there is no k-times chars, increment j
# when there is k-times chars, increment i until there is no k-times chars
# return the longest sub string length

def longest_sub_k(s, k=2):
  if not s:
    return 0
  i = j = 0
  count = 0
  length_max = 0
  table = {}
  nokDup = True
  while j < len(s):
    if nokDup:
      if s[j] not in table:
        table[s[j]] = 1
        length_max = max(length_max, j - i + 1)
      elif table[s[j]] == k - 1:
        table[s[j]] = k
        nokDup = False
      else:
        table[s[j]] += 1
        length_max = max(length_max, j - i + 1)
      j += 1
    else:
      if table[s[i]] == k:
        nokDup = True
      table[s[i]] -= 1
      i += 1
  return length_max

# test
longest_sub_k('aabbbbbbbb',3)
  

4

#### Q4.2 Find all anagrams of a substring S2 in a long string S1
String S1: "zzzz cdebcaabcyywwwx", S2: "aabc"

In [None]:
# using a sliding window to iterate the string s1
# the length of sliding window is same as s2
# employ a hash table to store the matched letters of current sliding window
# for each iteration, remove the left letter and add a right letter to the sliding window
# and update the hash table accordingly.
# when there is no more letters to match, then it is matched, return it as a result
# time complexity O(len(s1))
# space complexity: O(len(s2))

def sub_anagram(s1, s2):
  '''
  input: string s1 and string s2
  output: start indices of all the substring matches s2's anagram
  '''
  if not s1 or not s2:
    return []
  if len(s1) < len(s2):
    return []

  remaining_match_types = 0
  to_match = {}
  for letter in s2:
    if letter not in to_match:
      to_match[letter] = 1
      remaining_match_types += 1
    else:
      to_match[letter] += 1
  
  result = []
  # initialize the sliding window
  i = 0
  for j in range(len(s2)):
    if s1[j] in to_match:
      to_match[s1[j]] -= 1
      if to_match[s1[j]] == 0:
        remaining_match_types -= 1
  if remaining_match_types == 0:
    result.append(i)
  # iterate the string s1
  while j < len(s1) - 1:
    if s1[i] in to_match:
      to_match[s1[i]] += 1
      if to_match[s1[i]] == 1:
        remaining_match_types += 1
    if s1[j + 1] in to_match:
      to_match[s1[j + 1]] -= 1
      if to_match[s1[j + 1]] == 0:
        remaining_match_types -= 1
    i += 1
    j += 1
    if remaining_match_types == 0:
      result.append(i)
  return result

    
# test
s1 = "ssabssab"
s2 = "sab"
sub_anagram(s1, s2)

[1, 2, 5]

#### Q4.3 Given a 0-1 array, you can flip at most k '0's to '1's. Please find the longest subarray that consists of all '1's.



In [None]:
# employ a sliding window to include the substring with at most k '0's
# initialize the sliding window with k '0's
# each time move the right boundary until there is k + 1 '0's
# then move the left boundary until there is k '0's
# record the longest sliding window length and the starting index
# time complexity: O(n)
# space complexity: O(1)

def longest_sub(s, k):
  '''
  input:
    s: input string consist of '0' and '1'
    k: int, at most k '0' for the substring
  output:
    (length, starting index) of the longest substring with '1's
  '''
  if not s:
    return 0
  i = j = 0
  count = 0 # number of '0's in the sliding window
  length_max = 0
  while j < len(s):
    if count < k:
      if s[j] == '0':
        count += 1
      length_max = max(length_max, j - i + 1)
      j += 1
    else:
      # count == k, increment i until a '0' is removed from the sliding window
      if s[j] == '0':
        while s[i] != '0':
          i += 1
        i += 1
        count -= 1
      else:
        length_max = max(length_max, j - i + 1)
        j += 1
  return length_max

# test
longest_sub('111', 0)
      


3