In [1]:
import numpy as np
import copy

# Q1 - 
## Check if two sequences have same set of elements. 
# Proposed solution is O(n) or O(nlogn) depending on sorting.

```python
    INPUT sequence D1 and D2 of n elements

    SORT D1 and D2 
    //O(nlogn) is attainable via heap sort if data does not meet assumptions 
    //of counting, bucket or radix sort (O(n)) are not met

    //get number of unique elements in D1 and D2, k_D1 and k_D2 respectively 
    k_D1 = 0
    for i in 0 to len(D1):
         CONTINUE IF D1[i] == D1[i-1] ELSE k_D1 = k_D1 + 1 //O(n)
    k_D2 = 0
    for i in 0 to len(D2):
         CONTINUE if D2[i]==D2[i-1] ELSE k_D2 = k_D2 + 1 //O(n)
    
    RETURN FALSE IF k_D1 != k_D2 ELSE 
        INITIALISE 
        arrays d1[0,1,2,....,k_D1], d2[0,1,2,...k_D2] 
        d1[0],d2[0] = D1[0],D2[0]
        
        //making a sequence of unique elements of D1 (O(n))
        for i in 1 to len(D1):
            CHECK IF D1[i]==d1[i-1] ELSE d1[i]=D1[i] //O(1) operation
        //making a sequence of unique elements of D2 (O(n))
        for i in 1 to len(D2):
            CHECK IF D2[i]==d2[i-1] ELSE d2[i]=D2[i] //O(1) operation

        for i in 0 to len(d1):
            RETURN FALSE IF d1[i]!=d2[i] ELSE TRUE //O(n) operation
```
    

# Implementation of Algorithm proposed for Q1

In [2]:
def check_if_same_elements(D1,D2):
    #sorting. Built in python sorting is O(nlogn)
    D1.sort()
    D2.sort()
    
    #number of unique elements of D1
    k_D1 = 0
    for i in range(0,len(D1)):
        if D1[i] == D1[i-1] :
            continue
        else:
            k_D1 = k_D1 + 1
    #number of unique elements of D2
    k_D2 = 0
    for i in range(0,len(D2)):
        if D2[i] == D2[i-1] :
            continue
        else:
            k_D2 = k_D2 + 1
    #are the number of unique elements different? They can never have the same set of elements then.
    if(k_D1 != k_D2 ):
        return False

    #if not, check element by element
    d1 = []
    d2 = []

    for i in range(0,len(D1)):
        if(D1[i] in d1):
            continue
        else:
            d1.append(D1[i])

    for i in range(0,len(D2)):
        if(D2[i] in d2):
            continue
        else:
            d2.append(D2[i])
    for i in range(len(d1)):
        if d1[i] !=  d2[i]:
           return False
        else:
            continue
    return True

#Testing
D1 = [1,3,4,4]
D2 = [3,1,4,3]
print(f" Do {D1} and {D2} have same set of elements?",check_if_same_elements(D1,D2))

D1 = [1,3,4,1]
D2 = [3,1,1,5]
print(f" Do {D1} and {D2} have same set of elements?",check_if_same_elements(D1,D2))

D1 = [0.1,3,4,0.1]
D2 = [3,0.1,0.1,5]
print(f" Do {D1} and {D2} have same set of elements?",check_if_same_elements(D1,D2))

 Do [1, 3, 4, 4] and [3, 1, 4, 3] have same set of elements? True
 Do [1, 3, 4, 1] and [3, 1, 1, 5] have same set of elements? False
 Do [0.1, 3, 4, 0.1] and [3, 0.1, 0.1, 5] have same set of elements? False


# Q2  - Sorting D in O(n) time <- Radix

### Given n elements in an array. The maximum element is $n^2-1$. 
### If it is known elements in this are uniformly distributed, then we can create n buckets each with one element in it. This is basically bucket sort. Since creating n buckets takes $O(n)$, this algorithm will then be $O(n)$.

### However, what happens when data is not uniformly distributed? When the maximum element of an array is $O(n)$, then counting sort algorithm can sort it in $O(k+n)$ time. Given that maximum element is $O(n^2)$, an extension of counting sort is radix sort. 

### If there are n integers to sort, each integer has d digits, and each digit can take on up to k possible values, then radix sort can sort the numbers in $\theta(d(n+k))$ time. When d is a constant and k is $O(n)$, radix sort runs in linear time.


# Implementation of the radix sort (Q2)

### Given that it is not required by the question to implement the radix sort algorithm, the following is just limited to show an example of the implementation. It cannot handle negative numbers. This is because counting_sort_for_radix in the following, is not designed to handle negative numbers. In this notebook, there is also a function named counting_sort_negative which is a more robust counting sort algorithm which can handle negative numbers.

In [3]:
def counting_sort_for_radix(D, m):
    """
    input - sequence D and a parameter m which is needed to select the digit at a specified location using (number//m)%10
    output - sorted D based on the digit at location using (number//m)%10 
    """""
    k = 10
    n = len(D)

    #to store the sorted numbers
    B = [0] * n
    #count array
    C = [0] * k

    #count of each number
    for i in range(0, n):
        digits = D[i] // m
        digit = digits % 10 
        C[digit] += 1
    #cumulative count
    for i in range(1,k):
        C[i] = C[i] + C[i-1]

    #sorting
    for i in range(n-1, -1, -1):
        digits = D[i] // m
        digit = digits % 10
        B[C[digit] - 1] = D[i]
        C[digit]= C[digit] - 1
    return B

def radixSort(D):
    d = copy.deepcopy(D)
    max_D = max(D)
    m = 1
    while max_D // m > 0:
        d = counting_sort_for_radix(d, m)
        m *= 10
    return d

# Testing
D = [28, 83, 94, 58, 69, 75, 34, 63]
D_copy = copy.deepcopy(D)
D = radixSort(D)
print(f"Radix Sort of {D} is", D)
D_copy.sort()
print(f"Is it actually sorted?",D==D_copy)

D = [28, 83, 0, 0, 69, 75, 34, 63]
D_copy = copy.deepcopy(D)
D = radixSort(D)
print(f"Radix Sort of {D} is", D)
D_copy.sort()
print(f"Is it actually sorted?",D==D_copy)

#test case where max element is n^2-1
D = [1,4,5,6,24]
D_copy = copy.deepcopy(D)
D = radixSort(D)
print(f"Radix Sort of {D} is", D)
D_copy.sort()
print(f"Is it actually sorted?",D==D_copy)

# D = [28, -83, 0, 0, 69, -75, 34, 63]
# D_copy = copy.deepcopy(D)
# D = radixSort(D)
# print(f"Radix Sort of {D} is", D)
# D_copy.sort()
# print(f"Is it actually sorted?",D==D_copy)

Radix Sort of [28, 34, 58, 63, 69, 75, 83, 94] is [28, 34, 58, 63, 69, 75, 83, 94]
Is it actually sorted? True
Radix Sort of [0, 0, 28, 34, 63, 69, 75, 83] is [0, 0, 28, 34, 63, 69, 75, 83]
Is it actually sorted? True
Radix Sort of [1, 4, 5, 6, 24] is [1, 4, 5, 6, 24]
Is it actually sorted? True


# Q3 - 
## Check if a sequences has atleast one repeated element. 
# Proposed solution is O(nlogn). 
### Note - O(n) is attainable with a hash table which that stores counts of each element. O(n) to store counts of each elements from D to hash table. Then run through has that table to check if their values are not equal 1 (O(n)). Net O(n)

```
    INPUT D

    SORT D
    //O(nlogn) is attainable via heap sort if data does not meet assumptions 
    //of counting, bucket or radix sort (O(n)) are not met

    GET SIZE k = len(D) //O(n) operation

    //check if consecutive elements are equal (O(n))
    for i in range(0,k-1):
        if D[i+1] == D[i]
            RETURN TRUE
    RETURN FALSE

    
```

# Implementation of Q3 - $O(nlogn)$

In [4]:
def check_if_atleast_one_same_element(D):
    D.sort()
    k = len(D)
    for i in range(0,k-1):
        if D[i+1] == D[i]:
            return True
    return False

D1 = [3,1,4,3]
print(f"Does {D1} have atleast one repeated element in it?",check_if_atleast_one_same_element(D1))
D1 = [3,1,4,5]
print(f"Does {D1} have atleast one repeated element in it?",check_if_atleast_one_same_element(D1))
D1 = [0.3,1,0.3,5]
print(f"Does {D1} have atleast one repeated element in it?",check_if_atleast_one_same_element(D1))

Does [3, 1, 4, 3] have atleast one repeated element in it? True
Does [3, 1, 4, 5] have atleast one repeated element in it? False
Does [0.3, 1, 0.3, 5] have atleast one repeated element in it? True


# Implementation of Q3 using a hash table (dictionary in python) - $O(n)$

In [5]:
def check_if_atleast_one_same_element_hash(D):
    d = {}
    n = len(D) #O(n) operation
    for i in range(n): #initialising the keys of the dictionary. O(n) operation
        d[D[i]] = 0
    for i in range(n):  #O(n) loop to store the counts
        d[D[i]] += 1
    for i in range(n):
        if d[D[i]] > 1:
            return True
        else:
            return False

D1 = [3,1,4,3]
print(f"Does {D1} have atleast one repeated element in it?",check_if_atleast_one_same_element_hash(D1))
D1 = [3,1,4,5]
print(f"Does {D1} have atleast one repeated element in it?",check_if_atleast_one_same_element_hash(D1))
D1 = [0.3,1,0.3,5]
print(f"Does {D1} have atleast one repeated element in it?",check_if_atleast_one_same_element_hash(D1))

Does [3, 1, 4, 3] have atleast one repeated element in it? True
Does [3, 1, 4, 5] have atleast one repeated element in it? False
Does [0.3, 1, 0.3, 5] have atleast one repeated element in it? True


# Q4 merge sort pending

# Q5 - Heap Sort
## Works for integers or floats in any range. It is an inplace sorting operation

In [6]:
#python indexing starts at 0. Getting the left and right child of an index i
def left(i):
    return (2*i) + 1
def right(i):
    return (2*i) + 2

def max_heap(A,i,size):
    """
    Resolve local triads and go down the tree to resolve further
    Input - Array (A), index (i)
    Output - Array (modified in place) 
    """
    # print(i)
    heap_size = size
    l = left(i) #index of left child
    r = right(i) #index of right child
    #if left child is larger than root
    if (l < heap_size) and (A[l]>A[i]):
        largest = l
    else:
        largest = i
    #if right child is larger than root
    if (r < heap_size) and (A[r]>A[largest]):
        largest = r
    #if largest (index) is not that of the root
    if largest != i:
        A[i], A[largest] = A[largest], A[i]
        #float this through the heap to get partial order in the subtree where the largest node of the triad existed initially 
        max_heap(A,largest,size)


def build_max_heap(A):
    """
    Build the max heap property
    Input - Array(A)
    Out - Array(A) modified in place where the max heap property is satisfied
    """

    heap_size = len(A)
    for i in range(int(np.floor(len(A)/2))-1,-1,-1):
        max_heap(A,i,heap_size)
    
def heap_sort(A):
    """
    Heap sort
    Input - Array to be sorted
    Output - Sorted Array (in place)
    """
    heap_size = len(A)
    build_max_heap(A)
    for i in range(len(A)-1,0,-1):
        A[0],A[i] = A[i],A[0]
        heap_size = heap_size-1
        max_heap(A,0,heap_size)
        
# Testing
D = [28, 83, 94, 58, 69, 75, 34, 63]
D_copy = copy.deepcopy(D)
heap_sort(D)
print(f"Heap Sort of {D} is", D)
D_copy.sort()
print(f"Is it actually sorted?",D==D_copy)

D = [28, 83, 0, 0, 69, 75, 34, 63]
D_copy = copy.deepcopy(D)
heap_sort(D)
print(f"Heap Sort of {D} is", D)
D_copy.sort()
print(f"Is it actually sorted?",D==D_copy)

D = [28, 83.5, 0, 0, 69.9, 75, 34, 63]
D_copy = copy.deepcopy(D)
heap_sort(D)
print(f"Heap Sort of {D} is", D)
D_copy.sort()
print(f"Is it actually sorted?",D==D_copy)

D = [28, 83.5, 0.1, 0.1,0, 69.9, 75, 34, 63]
D_copy = copy.deepcopy(D)
heap_sort(D)
print(f"Heap Sort of {D} is", D)
D_copy.sort()
print(f"Is it actually sorted?",D==D_copy)

D = [-28, -83.5, 0.1, 0.1,0, -69.9, 75, 34, 63]
D_copy = copy.deepcopy(D)
heap_sort(D)
print(f"Heap Sort of {D} is", D)
D_copy.sort()
print(f"Is it actually sorted?",D==D_copy)

Heap Sort of [28, 34, 58, 63, 69, 75, 83, 94] is [28, 34, 58, 63, 69, 75, 83, 94]
Is it actually sorted? True
Heap Sort of [0, 0, 28, 34, 63, 69, 75, 83] is [0, 0, 28, 34, 63, 69, 75, 83]
Is it actually sorted? True
Heap Sort of [0, 0, 28, 34, 63, 69.9, 75, 83.5] is [0, 0, 28, 34, 63, 69.9, 75, 83.5]
Is it actually sorted? True
Heap Sort of [0, 0.1, 0.1, 28, 34, 63, 69.9, 75, 83.5] is [0, 0.1, 0.1, 28, 34, 63, 69.9, 75, 83.5]
Is it actually sorted? True
Heap Sort of [-83.5, -69.9, -28, 0, 0.1, 0.1, 34, 63, 75] is [-83.5, -69.9, -28, 0, 0.1, 0.1, 34, 63, 75]
Is it actually sorted? True


# Q6: Counting sort
### By definition, it works only for integers and the maximum value of a sequence (k) is $O(n)$. Given this assumption, counting sort can take in one of two kinds of inputs along with the sequence to be sorted. 
(1) The maximum element - If not known a-priori, it is an $O(n)$ operation or lesser to find out. This is more general.  
(2) The length of the sequence - If not known, it is an $O(n)$ operation to find it out. If length of the sequence is fed in as input, then the range of values in the sequence must be strictly lesser than this length.

In the testing of my implementation, I have tested both these kinds of inputs 

In [7]:
def counting_sort(A,k):
    """
    Counting Sort
    A: unsorted array
    B: empty arrray to put sorted values into
    k: Under the assumption that k is O(n), this is the length of the array or the maximum value of the array. 
    """
    #initialise empty array
    B = [0]*len(A)
    C = [0]*(k+1)
    #storing number of occurrences of elements in A
    for j in range(0,len(A)):
        C[A[j]] = C[A[j]] + 1
    #cumulative sum
    for i in range(1,k+1):
        C[i] = C[i] + C[i-1]
    # putting elements into B
    for j in range(len(A)-1,-1,-1):
        B[C[A[j]]-1] = A[j]
        C[A[j]] = C[A[j]] - 1

    #this works too (instead of the above for loop) but it wont be stable
    # for j in range(0,len(A)): 
    #     B[C[A[j]]-1] = A[j]
    #     C[A[j]] = C[A[j]] - 1

    return B

# Testing
D = [5,2,6,5,2,1,2,3,4,5,8,0,2,3,2,15]
D_copy = copy.deepcopy(D)
print(f"Count Sort of {D} is", counting_sort(D,len(D)))
D_copy.sort()
print(f"Is it actually sorted?",counting_sort(D,len(D))==D_copy)

#testing length of sequence as input. When this is the case, 
# the input sequence cannot have a value greater than its length. 
D = [5,2,6,5,2,1,2,3,4,5,8,0,2,3,2,10]
D_copy = copy.deepcopy(D)
print(f"Count Sort of {D} is", counting_sort(D,len(D)))
D_copy.sort()
print(f"Is it actually sorted?",counting_sort(D,len(D))==D_copy)

#testing maximum value as the input. When this is the case, 
# the input sequence can have a value greater than its length (but still O(n) for counting sort to be O(n))
D = [5,2,6,5,2,11]
D_copy = copy.deepcopy(D)
print(f"Count Sort of {D} is", counting_sort(D,max(D)))
D_copy.sort()
print(f"Is it actually sorted?",counting_sort(D,max(D))==D_copy)

D = [-5,2,-6,5,2,1,2,3,4,5,8,0,2,3,2,10]
D_copy = copy.deepcopy(D)
print(f"Count Sort of {D} is", counting_sort(D,len(D)))
D_copy.sort()
print(f"Is it actually sorted?",counting_sort(D,len(D))==D_copy)

Count Sort of [5, 2, 6, 5, 2, 1, 2, 3, 4, 5, 8, 0, 2, 3, 2, 15] is [0, 1, 2, 2, 2, 2, 2, 3, 3, 4, 5, 5, 5, 6, 8, 15]
Is it actually sorted? True
Count Sort of [5, 2, 6, 5, 2, 1, 2, 3, 4, 5, 8, 0, 2, 3, 2, 10] is [0, 1, 2, 2, 2, 2, 2, 3, 3, 4, 5, 5, 5, 6, 8, 10]
Is it actually sorted? True
Count Sort of [5, 2, 6, 5, 2, 11] is [2, 2, 5, 5, 6, 11]
Is it actually sorted? True
Count Sort of [-5, 2, -6, 5, 2, 1, 2, 3, 4, 5, 8, 0, 2, 3, 2, 10] is [0, 1, 2, 2, 2, 2, 2, 3, 3, 4, 5, 5, 8, 10, -6, -5]
Is it actually sorted? False


### As one can see above, counting sort in its traditional version fails to sort when negative numbers are present. To allow it to sort negative numbers, a modification to the indices of the array C, such that C[0] now stores the count of the least number is needed and is implemented below. Note, as reasoned above, using max value of the input sequence to set the length of the count array is more general and that is what is used below.

In [8]:
def counting_sort_negative(A):
    """
    Counting Sort
    A: unsorted array
    B: empty arrray to put sorted values into
    k: Under the assumption that k is O(n), this is the length of the array or the maximum value of the array. 
    """
    #initialise empty array
    B = [0]*len(A)
    min_val = min(A)
    if min_val >= 0:
        min_val = 0
    C = [0]*(max(A)-min_val+1)
    #storing number of occurrences of elements in A
    for j in range(0,len(A)):
        C[A[j]-min_val] = C[A[j]-min_val] + 1
    #cumulative sum
    for i in range(1,len(C)):
        C[i] = C[i] + C[i-1]
    # putting elements into B
    for j in range(len(A)-1,-1,-1):
        B[C[A[j]-min_val]-1] = A[j]
        C[A[j]-min_val] = C[A[j]-min_val] - 1

    return B

#test cases when negative numbers are present for counting sort
D = [-1,-2,-3]
D_copy = copy.deepcopy(D)
print(f"Count Sort of {D} is", counting_sort_negative(D))
D_copy.sort()
print(f"Is it actually sorted?",counting_sort_negative(D)==D_copy)

D = [-1,2,-3]
D_copy = copy.deepcopy(D)
print(f"Count Sort of {D} is", counting_sort_negative(D))
D_copy.sort()
print(f"Is it actually sorted?",counting_sort_negative(D)==D_copy)

D = [1,2,3]
D_copy = copy.deepcopy(D)
print(f"Count Sort of {D} is", counting_sort_negative(D))
D_copy.sort()
print(f"Is it actually sorted?",counting_sort_negative(D)==D_copy)

D = [-5,2,-6,5,2,1,2,3,4,5,8,0,2,3,2,10]
D_copy = copy.deepcopy(D)
print(f"Count Sort of {D} is", counting_sort_negative(D))
D_copy.sort()
print(f"Is it actually sorted?",counting_sort_negative(D)==D_copy)

Count Sort of [-1, -2, -3] is [-3, -2, -1]
Is it actually sorted? True
Count Sort of [-1, 2, -3] is [-3, -1, 2]
Is it actually sorted? True
Count Sort of [1, 2, 3] is [1, 2, 3]
Is it actually sorted? True
Count Sort of [-5, 2, -6, 5, 2, 1, 2, 3, 4, 5, 8, 0, 2, 3, 2, 10] is [-6, -5, 0, 1, 2, 2, 2, 2, 2, 3, 3, 4, 5, 5, 8, 10]
Is it actually sorted? True


# Q7 - Bucket Sort
### Part 1 involves creating an insertion sort to be used in the bucket sort algorithm. The insertion sort is an inplace operation and is $O(n^2)$
### Part 2 is the actual bucket sort algorithm - $O(n)$. Two bucket sort algorithms are coded for. One is the standard one in which the range of the input elements is between 0 and 1. The second can take input elements greater than 1.


In [9]:
def insertion_sort(A):
    """
    Insertion sort
    Input A
    Output - Sorted A in place
    """
    for j in range(1,len(A)):
        key = A[j]
        i = j-1
        while i>-1 and A[i]>key:
            A[i+1] = A[i]
            i = i - 1
        A[i + 1] = key

# Testing
D = [28, 83, 94, 58, 69, 75, 34, 63]
D_copy = copy.deepcopy(D)
insertion_sort(D)
print(f"Insertion Sort of {D} is", D)
D_copy.sort()
print(f"Is it actually sorted?",D==D_copy)

D = [28, 83, 0, 0, 69, 75, 34, 63]
D_copy = copy.deepcopy(D)
insertion_sort(D)
print(f"Insertion Sort of {D} is", D)
D_copy.sort()
print(f"Is it actually sorted?",D==D_copy)

D = [28, 83.5, 0, 0, 69.9, 75, 34, 63]
D_copy = copy.deepcopy(D)
insertion_sort(D)
print(f"Insertion Sort of {D} is", D)
D_copy.sort()
print(f"Is it actually sorted?",D==D_copy)

D = [28, 83.5, 0.1, 0.1, 0, 69.9, 75, 34, 63]
D_copy = copy.deepcopy(D)
insertion_sort(D)
print(f"Insertion Sort of {D} is", D)
D_copy.sort()
print(f"Is it actually sorted?",D==D_copy)

D = [-28, 83.5, -0.1, 0.1, 0, 69.9, -75, 34, 63]
D_copy = copy.deepcopy(D)
insertion_sort(D)
print(f"Insertion Sort of {D} is", D)
D_copy.sort()
print(f"Is it actually sorted?",D==D_copy)

Insertion Sort of [28, 34, 58, 63, 69, 75, 83, 94] is [28, 34, 58, 63, 69, 75, 83, 94]
Is it actually sorted? True
Insertion Sort of [0, 0, 28, 34, 63, 69, 75, 83] is [0, 0, 28, 34, 63, 69, 75, 83]
Is it actually sorted? True
Insertion Sort of [0, 0, 28, 34, 63, 69.9, 75, 83.5] is [0, 0, 28, 34, 63, 69.9, 75, 83.5]
Is it actually sorted? True
Insertion Sort of [0, 0.1, 0.1, 28, 34, 63, 69.9, 75, 83.5] is [0, 0.1, 0.1, 28, 34, 63, 69.9, 75, 83.5]
Is it actually sorted? True
Insertion Sort of [-75, -28, -0.1, 0, 0.1, 34, 63, 69.9, 83.5] is [-75, -28, -0.1, 0, 0.1, 34, 63, 69.9, 83.5]
Is it actually sorted? True


In [10]:
def bucket_sort(A):
    """
    Bucket Sort (for numbers uniformly distributed between 0 and 1)
    Input Array (A)
    """
    sorted_A = [] #for the final concatenation
    B = [None]*len(A)
    n = len(A)
    #list of lists.
    #Here it is pre-determined that there are as many buckets as the length of the input sequence. 
    # Better algorithms to decide the number of bins exist
    for i in range(0,n):
        B[i] = []
    #putting elements into respective buckets
    for i in range (0,n):
        B[int(np.floor(n*A[i]))].append(A[i])
    #sorting elements in respective buckets
    for i in range(0,n):
        insertion_sort(B[i])
    #concatenating sorted elements
    for i in range(0,n):
        if B[i]:
            sorted_A.extend(B[i])
    return sorted_A

# Testing
D = list(np.random.uniform(size=5))
D_copy = copy.deepcopy(D)
print(f"Bucket Sort of {D} is", bucket_sort(D))
D_copy.sort()
print(f"Is it actually sorted?",bucket_sort(D)==D_copy)

D = list(np.random.uniform(size=10))
D_copy = copy.deepcopy(D)
print(f"Bucket Sort of {D} is", bucket_sort(D))
D_copy.sort()
print(f"Is it actually sorted?",bucket_sort(D)==D_copy)

D = list(np.random.uniform(size=20))
D_copy = copy.deepcopy(D)
print(f"Bucket Sort of {D} is", bucket_sort(D))
D_copy.sort()
print(f"Is it actually sorted?",bucket_sort(D)==D_copy)

Bucket Sort of [0.5908286977912766, 0.5399137884242866, 0.1646222730064777, 0.9737577932574181, 0.9155290540232929] is [0.1646222730064777, 0.5399137884242866, 0.5908286977912766, 0.9155290540232929, 0.9737577932574181]
Is it actually sorted? True
Bucket Sort of [0.4096255215404788, 0.4182457736869081, 0.8761924343274996, 0.5710574482593302, 0.5144198755237122, 0.8349073084474985, 0.690753513471651, 0.8479659764857366, 0.44506616389661635, 0.6124973264461641] is [0.4096255215404788, 0.4182457736869081, 0.44506616389661635, 0.5144198755237122, 0.5710574482593302, 0.6124973264461641, 0.690753513471651, 0.8349073084474985, 0.8479659764857366, 0.8761924343274996]
Is it actually sorted? True
Bucket Sort of [0.9718718814588301, 0.6710307293911676, 0.20348410374497305, 0.737939932080781, 0.4048119316797859, 0.9237266398588808, 0.29616817336536105, 0.9458154044264642, 0.41067325161256363, 0.7828478893584035, 0.729117756221396, 0.529563089579481, 0.6609693666324875, 0.9117156195708093, 0.053645

In [11]:
def bucket_sort_general(A):
    """
    Bucket Sort (for integers uniformly distributed between 0 and 100)
    Input Array (A)
    """
    sorted_A = [] #for the final concatenation
    B = [None]*len(A)
    n = len(A)
    #list of lists indicating the number of buckets. Here it is pre-determined that there are as many buckets as the length of the input sequence. 
    # Better algorithms to decide the number of bins exist.
    for i in range(0,n):
        B[i] = []
    #putting elements into respective buckets.
    for i in range (0,n):
        bucket_start_index = int(np.floor(A[i]/n))
        #if bucket index is greater than the length of the sequence
        if bucket_start_index >= n:
            B[n-1].append(A[i])
        elif bucket_start_index <= 0:
            B[0].append(A[i])
        else:
            B[bucket_start_index].append(A[i])
    #sorting elements in respective buckets
    for i in range(0,n):
        insertion_sort(B[i])
    #concatenating sorted elements
    for i in range(0,n):
        if B[i]:
            sorted_A.extend(B[i])
    return sorted_A

# Testing
D = list(np.random.randint(0,100,10))
D_copy = copy.deepcopy(D)
print(f"Bucket Sort of {D} is", bucket_sort_general(D))
D_copy.sort()
print(f"Is it actually sorted?",bucket_sort_general(D)==D_copy)

D = list((100 - 0) * np.random.random_sample(size=8) + 0)
D_copy = copy.deepcopy(D)
print(f"Bucket Sort of {D} is", bucket_sort_general(D))
D_copy.sort()
print(f"Is it actually sorted?",bucket_sort_general(D)==D_copy)

# Testing
D = [-0.1,-0.5,5,10]
D_copy = copy.deepcopy(D)
print(f"Bucket Sort of {D} is", bucket_sort_general(D))
D_copy.sort()
print(f"Is it actually sorted?",bucket_sort_general(D)==D_copy)

Bucket Sort of [76, 3, 31, 90, 20, 1, 95, 76, 57, 85] is [1, 3, 20, 31, 57, 76, 76, 85, 90, 95]
Is it actually sorted? True
Bucket Sort of [1.3531790289103895, 20.00025064443518, 50.626354046153686, 92.83268304657918, 93.197788880035, 62.24694519562212, 50.58568125704355, 7.696384724650629] is [1.3531790289103895, 7.696384724650629, 20.00025064443518, 50.58568125704355, 50.626354046153686, 62.24694519562212, 92.83268304657918, 93.197788880035]
Is it actually sorted? True
Bucket Sort of [-0.1, -0.5, 5, 10] is [-0.5, -0.1, 5, 10]
Is it actually sorted? True
