## Hash Table

### Q1: For a composition with different kinds of words, try to find the top k frequent words from the composition.

In [18]:
# method 1: k-size min-heap
import heapq
def kFrequent(k, words):
  if k <= 0:
    return []

  # Create a hash table to store words frequency
  dict = {}
  for word in words:
    if word not in dict:
      dict[word] = 1
    else:
      dict[word] += 1

  # Find the top k frequent word
  # Create a k-size min heap
  freqs = [(item[1], item[0]) for item in list(dict.items())]
  if k >= len(freqs):
    return freqs
  pq = freqs[:k]
  heapq.heapify(pq) # min heap
  for i in range(k, len(freqs)):
    if freqs[i][0] > pq[0][0]:
      heapq.heappop(pq)
      heapq.heappush(pq, freqs[i])
  return pq

words = ['a', 'a', 'a', 'b', 'b', 'c']
kFrequent(0, words)

[]

In [24]:
# method 2: n-size max-heap
import heapq
def kFrequent(k, words):
  if k <= 0:
    return []

  # Create a hash table to store words frequency
  dict = {}
  for word in words:
    if word not in dict:
      dict[word] = 1
    else:
      dict[word] += 1

  # Find the top k frequent word
  # Create a n-size max heap
  freqs = [(-item[1], item[0]) for item in list(dict.items())]
  if k >= len(freqs):
    return [(-item[1], item[0]) for item in freqs]
  pq = freqs
  heapq.heapify(pq) # max-heap: same as min-heap of minus freq number
  res = []
  for i in range(k):
    poped = heapq.heappop(pq)
    res.append((-poped[0], poped[1]))
  return res

words = ['a', 'a', 'a', 'b', 'b', 'c']
kFrequent(2, words)

[(3, 'a'), (2, 'b')]

### Q2: If there is only one missing number from 1 to n in an unsorted array. How to find it in O(n) time? size of the array is n - 1

In [27]:
# mthod 1: math
def findMissing(a):
  n = len(a) + 1
  return n * (n + 1) // 2 - sum(a)

findMissing([1,2,3,4,6,7])

5

In [29]:
# method 2: Hash table
def findMissing(a):
  atoSet = set(a)
  for i in range(1, len(a) + 2):
    if i not in atoSet:
      return i
      
findMissing([1,2,3,4,6,7])

5

In [31]:
# method 3: bit operation
def findMissing(a):
  res = 0
  for i in range(1, len(a) + 2):
    res = res ^ i
  for i in a:
    res = res ^ i
  return res

findMissing([1,2,3,4,6,7])

5

### Q3: Find the common numbers between two sorted arrays a[N], b[M], N, M

In [34]:
# method 1: hash table
def commonNum(a, b):
  n = len(a)
  m = len(b)
  if n > m:
    a, b = b, a # make a is the shorter array
  aSet = set(a)
  res = []
  for num in b:
    if num in aSet:
      res.append(num)
  return res

a = [1, 2, 3, 4, 5]
b = [5, 6, 7]
commonNum(a, b)

[5]

In [35]:
# meothd 2: binary search
def commonNum(a, b):
  n = len(a)
  m = len(b)
  if n > m:
    a, b = b, a
  res = []
  for i in a:
    # binary search i in b
    idx = bSearch(i, b)
    if idx >= 0:
      res.append(i)
  return res

def bSearch(i, b):
  if not b:
    return -1
  l, r = 0, len(b) - 1
  while l <= r:
    mid = l + (r - l) // 2
    if b[mid] == i:
      return mid
    elif b[mid] < i:
      l = mid + 1
    else: # b[mid] > i
      r = mid - 1
  return -1

a = [1, 2, 3, 4, 5]
b = [5, 6, 7]
commonNum(a, b)

[5]

In [37]:
# method 3: two pointers
def commonNum(a, b):
  res = []
  i, j = 0, 0
  while i < len(a) and j < len(b):
    if a[i] == b[j]:
      res.append(a[i])
      i += 1
      j += 1
    elif a[i] < b[j]:
      i += 1
    else: # a[i] > b[j]
      j += 1
  return res

a = [1, 2, 3, 4, 5]
b = [5, 6, 7]
commonNum(a, b)

[5]

## String I

### 1. Char Removal

#### Q1.1 (char removal) Remove a/some particular chars from a string.
Example: string input = 'student', remove 'u and n' → output: 'stdet'

In [6]:
# method 1: from front to end
# use two pointers i and j
# i: the result is to the left of i (not including i)
# j: the iterater for the string

def removeChar(inputS, removeS):
  if not inputS:
    return ""
  inputList = list(inputS)
  removeSet = set(list(removeS))
  i = j = 0
  for j in range(len(inputList)):
    if inputList[j] not in removeSet:
       inputList[i], inputList[j] = inputList[j], inputList[i]
       i += 1
  return ''.join(inputList[:i])

removeChar("", 'ab')

# time complexity: O(n)
# space complexity: O(m)

In [51]:
# method 2: from end to front
def removeChar(inputS, removeS):
  if not inputS:
    return ""
  inputList = list(inputS)
  i = j = len(inputS) - 1
  while j >= 0:
    if inputList[j] in removeS:
      j -= 1
    else:
      inputList[j], inputList[i] = inputList[i], inputList[j]
      i -= 1
      j -= 1
  return ''.join(inputList[i + 1:])

removeChar("abcdeabc", 'ab')

'cdec'

#### Q1.2 Remove all leading/trailing and duplicate empty spaces (only leave one empty space if duplicated spaces happen) from the input string.
Example: input = “   abc   ed  ef  ”       output = 'abc ed ef'

In [4]:
# use two pointers i and j
# i: the result is left to i (not include i)
# j: the itererator of the string
# if the iterated item is space, check the previous one, keep it only if the pre is not space
# for the initial space, set a boolean value to check
# remove the traling space if there is any at last step
def removeSpace(inputS):
  if not inputS:
    return ""

  inputList = list(inputS)
  i = 0
  pre = ''
  isHead = True
  for j in range(len(inputList)):
    if isHead:
      if inputList[j] != ' ':
        isHead = False
        pre = inputList[j]
        inputList[j], inputList[i] = inputList[i], inputList[j]
        i += 1
    else:
      if inputList[j] != ' ':
        pre = inputList[j]
        inputList[j], inputList[i] = inputList[i], inputList[j]
        i += 1
      else:
        if pre != ' ':
          pre = ' '
          inputList[j], inputList[i] = inputList[i], inputList[j]
          i += 1
  if inputList[i - 1] == ' ':
    return ''.join(inputList[:i - 1])
  else:
    return ''.join(inputList[:i])

# time complexity: O(n), n is the length of input string
# space complexity: O(1), since it's operated in place


removeSpace("   abc   ed  ef ")


'abc ed ef'

### 2. Char De-duplication

#### Q2.1 Remove duplicated and adjacent letters (leave only one letter in each duplicated section) in a string.
E.g., "aabb__cc" → "ab_c"

In [8]:
# use two pointer index i and j
# i: left to i is the output result (not include i)
# j: iterator of the string
# when s[j] == s[i - 1], skip it; otherwise, copy it to s[i] and increment i

def removeDup(inputList):
  if not inputList:
    return None
  i = 1
  for j in range(1, len(inputList)):
    if inputList[j] != inputList[i - 1]:
      inputList[i] = inputList[j]
      i += 1
  return ''.join(inputList[:i])

# time complexity: O(n)
# space complexity: O(1)

removeDup(list('aabb__cc'))

'ab_c'

#### Q2.2 Char de-duplication adjacent letters repeatedly

Example: abbbaz → aaz → z

In [13]:
# use two pointer indices i and j
# i: left to i is the output result (not include i)
# j: j is the iterator of the input string
# if input[j] == input[i - 1], keep skipping the same letter until input[j] != input[i - 1], then i -= 1
# if input[j] != input[i - 1], input[i] = input[j] and j += 1
def deDup(inputList):
  if not inputList:
    return None
  i = j = 0
  isDup = False
  while j < len(inputList):
    if i == 0 or inputList[j] != inputList[i - 1]:
      inputList[i] = inputList[j]
      i += 1
      j += 1
    while j < len(inputList) and inputList[j] == inputList[i - 1]:
      isDup = True
      j += 1
    if isDup:
      i -= 1
      isDup = False
  return ''.join(inputList[:i])

# time complexity: O(n)
# space complexity: O(1)

deDup(list('abbbaz_'))

'z_'

### 3. Sub-string Finding

#### Q3 (Strstr) Substring problem: how to determine whether a string is a substring of another string.
Example: s1 = 'a b c d e', s2 = 'c d'; return 2, return -1 if s2 is not in s1

In [11]:
# method 1: iterate s1, check if it has s2 for each iterated index
def strStr(s1, s2):
  if not s1 or not s2:
    return -1
  for i in range(len(s1) - len(s2) + 1):
    j = 0
    while j < len(s2):
      if s1[i + j] != s2[j]:
        break
      j += 1
    if j == len(s2):
      return i
  return -1

# time complexity: O(n * m)
# space complexity: O(1)

strStr('abcde', 'bb')

-1

In [17]:
# method 2: Robin-Carp
def strStr(s1, s2):



# time complexity: O(n)
# space complexity: O(1)

[0, 2, 4]