# Counting Inversions
---
### Algorithm
- Count inversions while merging
- Assume L and R are sorted
- If we add $i_{m}$ from R to the output, $i_{m}$ is smaller than elements currently in L
- $i_{m}$ is hence inverted with respect to elements currently in L
- Add current size of L to inversion count

In [24]:
def mergeAndCount(A, B): # Assume A and B are sorted
    (m, n) = (len(A), len(B))
    # C: merged list
    # i, j, k: pointers to A, B and C
    # count: number of inversions
    (C, i, j, k, count) = ([], 0, 0, 0, 0)
    # Stop when A and B are merged
    while k < m+n:
        
        # When A is exhausted, append element from B will not contribute to inversion
        # As B is already sorted appending from it alone results in no inversion
        if i == m:
            C.append(B[j])
            (j, k) = (j+1, k+1)
        
        # When B is exhausted
        elif j == n:
            C.append(A[i])
            (i, k) = (i+1, k+1)
            
        # When element of B is greater it will not contribute to inversion
        # Values on the right (i.e B) should be bigger 
        # But is values on the left (i.e A) are bigger, cause inversion
        elif A[i] < B[j]:
            C.append(A[i])
            (i, k) = (i+1, k+1)
        
        # When value from left (i.e A) is bigger...cause inversion
        # (m-i): number of values left in A
        # In other words, B[j] forms inversions with all values of A[i:]
        else:
            C.append(B[j])
            (j,k,count) = (j+1,k+1,count+(m-i))
        
    return (C, count)

In [25]:
def sortAndCount(A): # Count number of inversions in the list A
    n = len(A)
    if n <= 1:
        return (A, 0)
    
    # Divide the problem into sub problems
    # countL: number of inversions in list L
    (L, countL) = sortAndCount(A[:n//2])
    (R, countR) = sortAndCount(A[n//2:])
    
    # Merge the sub problem solutions efficiently
    # L and R are now sorted
    (B, countB) = mergeAndCount(L, R)
    
    # A is now sorted list as B
    # Return this count: inversions in L + inversions in R + inversions between L and R when merging it to B
    return (B, countL+countR+countB)

print(sortAndCount([1, 2, 3, 4]))
print(sortAndCount([4, 3, 2, 1]))
print(sortAndCount([4, 1, 3, 2]))

([1, 2, 3, 4], 0)
([1, 2, 3, 4], 6)
([1, 2, 3, 4], 4)


# Closest pair of points
- [Related youtube video - Inside Code](https://www.youtube.com/watch?v=ldHA8UcQI9Q)

In [1]:
import math

def dist(p1, p2):
    return math.sqrt(((p2[1]-p1[1])**2)+((p2[0]-p1[0])**2))

def closest_brute_force(points):
    min_dist = float("inf")
    p1 = None
    p2 = None
    for i in range(len(points)):
        for j in range(i+1, len(points)):
            d = dist(points[i], points[j])
            if d < min_dist:
                min_dist = d
                p1 = points[i]
                p2 = points[j]
    return p1, p2, min_dist


def rec(xsorted, ysorted):
    n = len(xsorted)
    if n <= 3:
        return closest_brute_force(xsorted)
    else:
        midpoint = xsorted[n//2]
        xsorted_left = xsorted[:n//2]
        xsorted_right = xsorted[n//2:]
        ysorted_left = []
        ysorted_right = []
        for point in ysorted:
            ysorted_left.append(point) if (point[0] <= midpoint[0]) else ysorted_right.append(point)
        (p1_left, p2_left, delta_left) = rec(xsorted_left, ysorted_left)
        (p1_right, p2_right, delta_right) = rec(xsorted_right, ysorted_right)
        (p1, p2, delta) = (p1_left, p2_left, delta_left) if (delta_left < delta_right) else (p1_right, p2_right, delta_right)
        in_band = [point for point in ysorted if midpoint[0]-delta < point[0] < midpoint[0]+delta]
        for i in range(len(in_band)):
            for j in range(i+1, min(i+7, len(in_band))):
                d = dist(in_band[i], in_band[j])
                if d < delta:
                    print(in_band[i], in_band[j])
                    (p1, p2, delta) = (in_band[i], in_band[j], d)
        return p1, p2, delta


def closest(points):
    xsorted = sorted(points, key=lambda point: point[0])
    ysorted = sorted(points, key=lambda point: point[1])
    return rec(xsorted, ysorted)

pts =  [(2, 15), (40, 5), (20, 1), (21, 14), (1,4), (3, 11)]
closest(pts)

((2, 15), (3, 11), 4.123105625617661)

# Karatsuba Algorithm
- [Inside code YT video](https://www.youtube.com/watch?v=yWI2K4jOjFQ)

In [9]:
(123456)//(10**3)

123

In [2]:
def karatsuba(x, y):
    if x < 10 or y < 10:
        return x * y
    else:
        n = max(len(str(x)), len(str(y)))
        half = n // 2
        a = x // (10 ** (half))  # left part of x
        b = x % (10 ** (half))  # right part of x
        c = y // (10 ** (half))  # left part of y
        d = y % (10 ** (half))  # right part of y
        ac = karatsuba(a, c)
        bd = karatsuba(b, d)
        ad_plus_bc = karatsuba(a+b, c+d)-ac-bd
        return ac * (10 ** n) + (ad_plus_bc * (10 ** half)) + bd
karatsuba(3456,8902)

30765312

# Quick Select: Divide and Conquer
- First half of the code is quick sort
- Worst case Time Complexity is $O(n^2)$
    - Occurs when the list is sorted in ascending or descending order

In [4]:
def quickselect(L,l,r,k): # k-th smallest in L[l:r]
  if (k < 1) or (k > r-l):
    return(None)

  (pivot,lower,upper) = (L[l],l+1,l+1)
  for i in range(l+1,r):
    if L[i] > pivot:  # Extend upper segment
      upper = upper + 1
    else: # Exchange L[i] with start of upper segment
      (L[i], L[lower]) = (L[lower], L[i])
      (lower,upper) = (lower+1,upper+1)
  (L[l],L[lower-1]) = (L[lower-1],L[l]) # Move pivot
  lower = lower - 1

  # Recursive calls
  lowerlen = lower - l
  if k <= lowerlen:
    return(quickselect(L,l,lower,k))
  elif k == (lowerlen + 1):
    return(L[lower])
  else:
    return(quickselect(L,lower+1,r,k-(lowerlen+1)))
print(quickselect([5,3,7,2,1],0,5,2))

2


# Median of Medians
---
### Proof: Why is this pivot good?
**Lemma**: The median of medians will return a pivot element that is greater than and less than at least 30% of all elements in the whole list.

**proof**:
<img src="./images/14.png">
- Each vertical line is a block of 5 elements
- Each vertical line has median, present inside the blue band
- Each block is sorted in ascending order (to to bottom)
- Consider `Mom()` resulted in the blue box as the median
- So all the elements on the left of median (blue box) will be less and all elements on the right of it would be bigger
- As each block is sorted, elements in red region are less than median (blue box)
- And elements in greed region are greater than median
- **so how many elements are less than and greater than median (blue box)?**
- On doing math you will find that median block lies between $3len(L)/10$ and $7len(L)/10$
<img src="./images/14.png">

In [6]:
def MoM(L):
    # Base case, return median of L
    if len(L) <= 5:
        L.sort()
        # return approximate median
        return (L[len(L)//2])
    
    # Construct list of block medians
    M = []
    
    # Filling M with block medians
    for i in range(0, len(L), 5):
        X = L[i:i+5]
        X.sort()
        M.append(X[len(X)//2])
    
    return MoM(M)

L = [5, 2, 9, 2, 6, 4] # [2, 2, 4, 5, 6, 9]
MoM(L)

5

### Master Theorem
- [Master
  Theorem](https://www.youtube.com/playlist?list=PLPGw-ZD97tXcoR1F6ZrjArSsv076IAo-L)

<br>

<img src="./images/24.png" width=550 height=550>

# Fast Select
- Use median of block medians to locate pivot for `quickselect()`
    - We can name this `fastselect()`
---
### Time Complexity
---
#### MoM is $O(n)$
- $T(1)\ =\ 1$
- $T(n)\ =\ T(n/5)\ +\ n$
- By master theorem we get $T(n)\ =\ O(n)$
    - Each term is decreasing
    - Root dominates the sum
---    
#### `fastselect` Recurrence
- $T(1)\ =\ 1$
- $T(n)\ =\ max(T(3m\ /\ 10),\ T(7m\ /\ 10))\ +\ n$
    - Where `m = len(lower)`
- $T(n)$ is $O(n)$
    - Found using master theorem
    - Each term is decreasing
    - Root dominates the sum
---
#### `quicksort`
- Using `MoM` you can make quicksort $O(n\ logn)$ even in the worst case

In [17]:
def quickselect(L, l, r, k): # Select k-th smallest in L[l:r]
    if (k < 1) or (k > r-l):
        return None
    
    # Find MoM pivot and move to L[l] to ensure rest of code remains same as QuickSelect
    # Choosing MoM pivot ensures better partitioning
    pivot = MoM(L[l:r])
    # Now we need to find where MoM pivot is located in the array
    # Get position of the first MoM pivot (as there could be duplicates)
    pivotpos = min([i for i in range(l, r) if L[i]==pivot])
    # Swap both to ensure rest works the same as quickselect
    (L[l], L[pivotpos]) = (L[pivotpos], L[l])
    
    (pivot, lower, upper) = (L[l], l+1, l+1)
    for i in range(l+1, r):
        if L[i] > pivot:
            upper += 1
        else:
            (L[i], L[lower]) = (L[lower], L[i])
            (lower, upper) = (lower+1, upper+1)
    (L[l], L[lower-1]) = (L[lower-1], L[l])
    lower -= 1
    lowerlen = lower - l
    if k <= lowerlen:
        return quickselect(L, l, lower, k)
    elif k == (lowerlen + 1):
        return (L[lower])
    else:
        return quickselect(L, lower+1, r, k-(lowerlen+1))
    
L = [8, 4, 2, 9, 10, 14, 1]
print(quickselect(L, 0, len(L), 1)) # gives 1st smallest value
print(quickselect(L, 0, len(L), 5)) # gives 5th smallest value

1
9


# Experiments

In [27]:
# PROVING THAT MoM ALWAYS GIVES A MEDIAN IN THE 30%-70% RANGE
from random import *
A = [randrange(1000) for i in range(200)]
B = sorted(A)

# Finding ranges and median from the sorted list: B
print("30% ends here", B[3*len(B)//10])
print("True median", B[len(B)//2])
print("70% starts here", B[7*len(B)//10])

# MoM will always give a median between 30% and 70% range
print("Approximate median by MoM", MoM(A))

30% ends here 308
True median 498
70% starts here 690
Approximate median by MoM 480


In [26]:
# RUNNING QUICKSELECT - IMPLICITLY SORTING IN ASCENDING ORDER
C = [randrange(100) for i in range(100)]
for i in range(len(C)+2):
    print(quickselect(C, 0, len(C), i))

None
1
1
2
4
4
4
4
4
5
6
9
10
10
11
13
14
16
18
18
18
18
19
20
20
21
21
21
21
22
23
23
25
26
26
26
27
28
29
29
30
31
31
32
34
34
36
37
40
40
41
42
43
44
45
45
46
46
49
49
49
51
53
53
54
58
60
62
62
63
64
65
65
66
67
68
69
71
72
72
72
76
77
78
80
82
83
84
85
90
90
90
92
92
93
94
96
97
98
98
99
None
