<a href="https://colab.research.google.com/github/liyin2015/Algorithms-and-Coding-Interviews/blob/master/chapter_sorting_and_selection_algorithms.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Naive Sort in O(n^2)


### Insertion Sort in O(n^2)

In [0]:
# Not in-place
def insertionSort(a):
  if not a or len(a) == 1:
    return a
  n = len(a)
  sl = [a[0]] # sorted list
  for i in range(1, n):
    for j in range(i):
      if sl[j] > a[i]:
        sl.insert(j, a[i])
        break
    if len(sl) != i + 1: # not inserted yet
      sl.insert(i, a[i])
  return sl

In [0]:
a = [9, 10, 2, 8, 9, 3, 7]
sa = insertionSort(a)
sa

[2, 3, 7, 8, 9, 9, 10]

In [0]:
# Backward and in-place
def insertionSort(a):
  if not a or len(a) == 1:
    return a
  n = len(a)
  for i in range(1, n):
    t = a[i]
    j = i - 1
    while j >= 0 and t < a[j]: 
      a[j+1] = a[j] # Move item backward
      j -= 1
    a[j+1] = t      
  return

In [0]:
a = [9, 10, 2, 8, 9, 3, 7]
insertionSort(a)
print(a)

[2, 3, 7, 8, 9, 9, 10]


### Bubble Sort in O(n^2)

In [0]:
def bubbleSort(a):
  if not a or len(a) == 1:
    return 
  n = len(a)
  for i in range(n - 1): #n-1 passes 
    for j in range(n - i -1): 
      # Swap
      if a[j] > a[j + 1]:
        a[j], a[j + 1] = a[j + 1], a[j] 
  return

In [0]:
a = [9, 10, 2, 8, 9, 3]
bubbleSort(a)
a

[2, 3, 8, 9, 9, 10]

In [0]:
def bubbleSortOptimized(a):
    if not a or len(a) == 1:
        return
    n = len(a)
    for i in range(n - 1): #n-1 passes, 
      bSwap = False
      for j in range(n - i -1): #each pass will have valid window [0, n-i], and j is the starting index of each pair
        if a[j] > a[j + 1]:
          a[j], a[j + 1] = a[j + 1], a[j] #swap
          bSwap = True
      if not bSwap:
        break
    return

In [0]:
a = [9, 10, 2, 8, 9, 3]
bubbleSortOptimized(a)
a

[2, 3, 8, 9, 9, 10]

### Selection Sort in O(n^2)
In selection sort, each time it selects the current largest item and swap it with the last item in the unrestricted region.
Given the input size to be `n`, we have index `[0, n-1]`. 

* At the first pass, we choose the largest item from `A[0,n-1]` and swap it with `A[n-1]`. 

* At the second pass, we choose the largest item from `A[0,n-2]` and swap it with `A[n-2]`. 

In [0]:
def selectSort(a):
  n = len(a)
  for i in range(n - 1): #n-1 passes 
    ti = n - 1 - i
    li = 0 # The index of the largest item
    for j in range(n - i):
      if a[j] >= a[li]:
        li = j
    # swap li and ti
    a[ti], a[li] = a[li], a[ti]
  return 

In [0]:
def selectSort(a):
  n = len(a)
  for i in range(n - 1): #n-1 passes 
    ti = n - 1 - i
    li = 0 # The index of the largest item
    for j in range(n - i):
      if a[j] >= a[li]:
        li = j
    # swap li and ti
    print('swap', a[li], a[ti], li)
    a[ti], a[li] = a[li], a[ti]
    print(a)
  return a

In [0]:
a = [9, 10, 2, 8, 9, 3]
selectSort(a)
a

swap 10 3 1
[9, 3, 2, 8, 9, 10]
swap 9 9 4
[9, 3, 2, 8, 9, 10]
swap 9 8 0
[8, 3, 2, 9, 9, 10]
swap 8 2 0
[2, 3, 8, 9, 9, 10]
swap 3 3 1
[2, 3, 8, 9, 9, 10]


[2, 3, 8, 9, 9, 10]

In [0]:
def insertionSort(a):
  '''implement insertion sort'''
  if not a or len(a) == 1:
    return a
  n = len(a)
  sl = [a[0]] + [None] *(n-1) # sorted list
  for i in range(1, n): # items to be inserted into the sorted
    key = a[i]
    j = i-1 

    while j >= 0 and sl[j] > key: # compare key from the last sorted element
      sl[j+1] = sl[j] # shift a[j] backward
      j -= 1
    sl[j+1] = key
    print(sl)
  return sl
      

In [0]:
def shift(a, start, end):
  for i in range(end, start, -1): # [i, j)
    a[i] = a[i-1]
    
def insertionSortForward(a):
  if not a or len(a) == 1:
    return a
  n = len(a)
  sl = [a[0]] # sorted list
  for i in range(1, n): # items to be inserted into the sorted
    for j in range(i):
      if a[i] < a[j]:
        # shift all other elements [j, i-1]
        tmp = a[i]
        shift(a, j, i)
        a[j] = tmp   
  return a

def insertionSortInPlace(a):
  if not a or len(a) == 1:
    return a
  n = len(a)
  for i in range(1, n): # items to be inserted into the sorted
    t = a[i]
    j = i - 1
    while j >= 0 and t < a[j]: # keep comparing if target is still smaller
      a[j+1] = a[j] # shift current item backward
      j -= 1
    a[j+1] = t # a[j] <= t , insert t at the location j+1     
  return a

In [0]:
a = [9, 10, 2, 8, 9, 3, 7]
print(insertionSortInPlace(a))

[2, 3, 7, 8, 9, 9, 10]


## Merge Sort O(nlgn)

In [0]:
def merge(l, r): 
  ans = []
  # Two pointers each points at l and r
  i = j = 0 
  n, m = len(l), len(r)

  while i < n and j < m: 
    if l[i] <= r[j]:
      ans.append(l[i])
      i += 1
    else:
      ans.append(r[j])
      j += 1
      
  ans += l[i:]
  ans += r[j:]
  return ans
  

In [0]:
def mergeSort(a, s, e):
  if s == e:
    return [a[s]]

  m = (s + e) // 2 

  l = mergeSort(a, s , m)
  r = mergeSort(a, m+1, e)
  return merge(l, r)

In [0]:
a = [9, 10, 2, 8, 9, 3, 7, 9]
mergeSort(a, 0, len(a)-1)

[2, 3, 7, 8, 9, 9, 9, 10]

### prove merge sort is stable by sorting tuple and printing id

In [0]:
def mergeTuple(l, r): 
  '''combine the left and right sorted list'''
  ans = []
  i = j = 0 # two pointers each points at l and r
  n, m = len(l), len(r)
  
  # first while loop to merge
  while i < n and j < m: 
    if l[i][0] <= r[j][0]: # chaning it to l[i][0] < r[j][0] will not be stable anymore. 
      ans.append(l[i])
      i += 1
    else:
      ans.append(r[j])
      j += 1
      
  # now one list of l and r might have items left
  ans += l[i:]
  ans += r[j:]
  return ans

def mergeSortTuple(a, s, e):
  # base case , can not be divided further
  if s == e:
    return [a[s]]
  # divide into two halves from the middle point
  m = (s + e) // 2
  
  # conquer
  l = mergeSort(a, s , m)
  r = mergeSort(a, m+1, e)
  
  # combine
  return mergeTuple(l, r)

In [0]:
a = [(9, 1), (10, 1), (2, 1), (8, 1), (9, 2), (3, 1), (7, 1), (9, 3)] # the second item represents the index of duplcates
ids = [id(x) if x[0] == 9 else None for x in a]
sorted_a = mergeSortTuple(a, 0, len(a)-1)
ids2 = [id(x) if x[0] == 9 else None for x in sorted_a]
print(sorted_a)
ids, ids2

[(2, 1), (3, 1), (7, 1), (8, 1), (9, 2), (9, 3), (9, 1), (10, 1)]


([140381548618120,
  None,
  None,
  None,
  140381548653128,
  None,
  None,
  140381548653320],
 [None,
  None,
  None,
  None,
  140381548653128,
  140381548653320,
  140381548618120,
  None])

## QuickSort in O(nlogn)

In [0]:
def partition(a, s, e):
  p = a[e]
  i = s - 1
  # Scan unresticted area
  for j in range(s, e): 
    # Swap 
    if a[j] <= p:
      i += 1
      a[i], a[j] = a[j], a[i] 
  a[i+1], a[e] = a[e], a[i+1]
  return i+1

In [0]:
# Experiment the correctness of lumutos partition
lst = [9, 10, 2, 8, 9, 3, 7]
print(partition(lst, 0, len(lst)-1))
print(lst)

2
[2, 3, 7, 8, 9, 10, 9]


In [0]:
# main algorithm of quick sort
def quickSort(a, s, e, partition=partition):
  # base case , can not be divided further
  if s >= e:
    return 
  p = partition(a, s, e)
  
  # conquer smaller problem
  quickSort(a, s , p-1, partition)
  quickSort(a, p+1, e, partition)
  return

In [0]:
quickSort(lst, 0, len(lst) - 1)
lst

[2, 3, 7, 8, 9, 9, 10]

### Quick Select

In [0]:
def quickSelect(a, s, e, k, partition=partition):
  if s >= e:
    return a[s]

  p = partition(a, s, e) 
  if p == k:
    return a[p]
  if k > p:
    return quickSelect(a, p+1, e, k, partition)
  else:
    return quickSelect(a, s, p-1,  k, partition)

In [0]:
lst = [9, 10, 2, 8, 9, 3, 7]
quickSelect(lst, 0, len(lst) - 1, 2)

7

### experiment to see the stability of quick sort

In [0]:
#a = [(5, 1), (7, 1),(3, 1), (2, 1), (5, 2), (6,1), (7, 2), (8, 1), (9, 1), (5, 3), (5, 4)] # the second item represents the index of duplcates
a = [(2, 1), (2, 2), (1, 1)]
def partition_tuple(a, s, e):
  '''Lumutos partition'''
  p = a[e][0]
  i = s - 1
  for j in range(s, e): #a[s, e-1]
    
    if a[j][0] <= p:
      i += 1
      a[i], a[j] = a[j], a[i] # swap a[i] and a[j]
  a[i+1], a[e] = a[e], a[i+1]
  return i+1
quickSort(a, 0, len(a) - 1, partition_tuple)
print(a)

[(1, 1), (2, 2), (2, 1)]


### experiment to see the performance of worst time

In [0]:
import random, time
lst1 = [random.randint(1, 25) for i in range(400)]
lst2 = [i for i in range(400)[::-1]]
t1 = time.time()
quickSort(lst1, 0, len(lst1)-1, partition)
print('time for random values:', time.time()-t1)

t1 = time.time()
quickSort(lst2, 0, len(lst2)-1, partition)
print('time for sorted values:', time.time()-t1)

time for random values: 0.0017516613006591797
time for sorted values: 0.0171658992767334


### Hoare Partition

In [0]:
# def partition_hoare(a, s, e):
#   '''Hoare Parition'''
#   p = a[e]
#   i = s
#   j = e-1
#   while True:
#     while a[i] <= p and i < j:
#       i += 1
#     while a[j] > p and i < j:
#       j -= 1
#     if i < j:
#       a[i], a[j] = a[j], a[i]
#     else:
#       return j
#   return j

In [0]:
# lst = [9, 10, 2, 8, 9, 3, 7]
# print(partition_hoare(lst, 0, len(lst)-1))
# print(lst)

2
[3, 2, 10, 8, 9, 9, 7]


## HeapSort in O(nlogn)

In [0]:
from heapq import heapify, heappop
def heapsort(a):
  heapify(a)
  return [heappop(a) for i in range(len(a))]

In [0]:
lst = [21, 1, 45, 78, 3, 5]
heapsort(lst)

[1, 3, 5, 21, 45, 78]

## Linear Sort

### Bucket Sort

In [0]:
# Prepare input data
import numpy as np
np.random.seed(1)
a = np.random.uniform(0,  1, 10)
a = np.round(a, decimals=2)
a

array([0.42, 0.72, 0.  , 0.3 , 0.15, 0.09, 0.19, 0.35, 0.4 , 0.54])

In [0]:
from functools import reduce
def bucketSort(a):
  n = len(a)
  buckets = [[] for _ in range(n)]
  # Divide numbers into buckets
  for v in a:
    buckets[int(v*n)].append(v)
  print(buckets)
  # Apply insertion sort within each bucket
  for i in range(n):
    insertionSort(buckets[i])
  # Combine sorted buckets
  return reduce(lambda a, b: a + b, buckets)

In [0]:
bucketSort(a)

[[0.0, 0.09], [0.15, 0.19], [], [0.3, 0.35], [0.42, 0.4], [0.54], [], [0.72], [], []]


[0.0, 0.09, 0.15, 0.19, 0.3, 0.35, 0.4, 0.42, 0.54, 0.72]

### Counting Sort

In [0]:
a1 = [1, 4, 0, 2, 7, 5, 9]
a2 = [1, 4, 1, 2, 7, 5, 2]

In [0]:
def countSort(a):
  minK, maxK = min(a), max(a)
  k = maxK - minK + 1
  count = [0] * (maxK - minK + 1)
  n = len(a)
  order = [0] * n
  # Get occurrence
  for key in a:
    count[key - minK] += 1
  
  # Get prefix sum
  for i in range(1, k):
    count[i] += count[i-1]
    
  # Put key in position
  for i in range(n-1, -1, -1):
    key = a[i] - minK
    count[key] -= 1 # to get the index as position
    order[count[key]] = a[i] 
  return order

In [0]:
countSort(a1)

[0, 1, 2, 4, 5, 7, 9]

In [0]:
countSort(a2)

[2, 2, 0, 1, 1, 0, 1]
[2, 4, 4, 5, 6, 6, 7]
[1, 1, 2, 2, 4, 5, 7]


[1, 1, 2, 2, 4, 5, 7]

### Radix Sort

In [0]:
# Get digits
a = 178
digits = []
while a > 0:
    digits.append(a%10)
    a = a // 10
digits

[8, 7, 1]

In [0]:
def count_sort(a, exp):
  count = [0] * 10 # [0, 9]
  n = len(a)
  order = [0] * n
  # Get occurrence
  for key in a:
    key = (key // exp) % 10
    count[key] += 1
  
  # Get prefix sum
  for i in range(1, 10):
    count[i] += count[i-1]
    
  # Put key in position
  for i in range(n-1, -1, -1):
    key = (a[i] // exp) % 10
    count[key] -= 1 # to get the index as position
    order[count[key]] = a[i] 
  return order

In [0]:
a = [170, 45, 75, 90, 802, 24]

# LSD radix sortin
def radixSort(a):
  maxInt = max(a)
  exp = 1
  while maxInt // exp > 0:
    a = count_sort(a, exp)
    exp *= 10
  return a

In [0]:
a = radixSort(a)
a

[24, 45, 75, 90, 170, 802]

In [0]:
# MSD radix soring of strings with bucket sort
def MSD_radix_string_sort(a, i):
  '''
  s : strings
  i: starting radix for sorting
  '''
  # End condition: bucket has only one item
  if len(a) <= 1:
    return a

  # Divide
  buckets = [[] for _ in range(26)]
  done_bucket = []
  for s in a:
    if i >= len(s):
      done_bucket.append(s)
    else:
      buckets[ord(s[i]) - ord('a')].append(s)
  # Conquer and chain all buckets
  ans = []
  for b in buckets:
    ans += MSD_radix_string_sort(b, i + 1)
  return done_bucket + ans

In [0]:
s = ['apple', 'pear', 'berry', 'peach', 'apricot', 'ap', 'pear']
MSD_radix_string_sort(s, 0)

['ap', 'apple', 'apricot', 'berry', 'peach', 'pear', 'pear']

In [0]:
# Experiment with Python sort