# Heap Practice I #

In [1]:
import heapq  

### Ex.1 Kth Largest Element in Arary

Find the kth largest element in an unsorted array.

In [None]:
# O(k+(n-k)lgk) time, min-heap
def findKthLargest(nums, k):
    heap = []
    for num in nums:
        heapq.heappush(heap, num)
        if len(heap) > k:
            heapq.heappop(heap)
    
    return heapq.heappop(heap)


In [None]:
nums = [5,11,3,6,12,9,8,10,14,1,4,2,7,15]
k = 5
findKthLargest(nums, k)

In [None]:
# O(k+(n-k)lgk) time, min-heap        
def findKthLargest(nums, k):
    return heapq.nlargest(k, nums)[k-1]

In [None]:
nums = [5,11,3,6,12,9,8,10,14,1,4,2,7,15]
k = 5
findKthLargest(nums, k)

### Ex.2 Top K Frequent Words

Given a non-empty list of words, return the k most frequent elements.

Your answer should be sorted by frequency from highest to lowest. If two words have the same frequency, then the word with the lower alphabetical order comes first.

In [2]:
import collections
import heapq
import functools

@functools.total_ordering
class Element:
    def __init__(self, count, word):
        self.count = count
        self.word = word
        
    def __lt__(self, other):
        if self.count == other.count:
            return self.word > other.word
        return self.count < other.count
    
    def __eq__(self, other):
        return self.count == other.count and self.word == other.word

def topKFrequent(words, k):
    counts = collections.Counter(words)   

    freqs = []
    heapq.heapify(freqs)
    for word, count in counts.items():
        heapq.heappush(freqs, (Element(count, word), word))
        if len(freqs) > k:
            heapq.heappop(freqs)

    res = []
    for _ in range(k):
        res.append(heapq.heappop(freqs)[1])
    return res[::-1]

In [3]:
words = ["i", "love", "you", "i", "love", "coding","i","like","sports"]
k = 2
topKFrequent(words, k)

['i', 'love']

In [4]:
words = ["i", "love", "you", "i", "love", "coding","i","like","sports","i","love","travel","coding","is","fun"]
k = 4
topKFrequent(words, k)

['i', 'love', 'coding', 'fun']

In [5]:
def topKFrequent(nums, k):
    from collections import Counter as ct
    return [k for (k,v) in ct(nums).most_common(k)]

In [6]:
words = ["i", "love", "you", "i", "love", "coding","i","like","sports","i","love","travel","coding","is","fun"]
k = 4
topKFrequent(words, k)

['i', 'love', 'coding', 'you']

### Ex.3 Ugly Number

Write a program to check whether a given number is an ugly number.

Ugly numbers are positive numbers whose prime factors only include 2, 3, 5. For example, 6, 8 are ugly while 14 is not ugly since it includes another prime factor 7.

In [None]:
def uglyNumber(num):
    for p in 2, 3, 5:
        while num % p == 0 < num:
            num /= p
    return num == 1

### Ex.4 Ugly Number II

Write a program to find the n-th ugly number.

Ugly numbers are positive numbers whose prime factors only include 2, 3, 5. For example, 1, 2, 3, 4, 5, 6, 8, 9, 10, 12 is the sequence of the first 10 ugly numbers.

Note that 1 is typically treated as an ugly number.

In [7]:
def nthUglyNumber(n):
    q2, q3, q5 = [2], [3], [5]
    ugly = 1
    for u in heapq.merge(q2, q3, q5):
        if n == 1:
            return ugly
        if u > ugly:
            ugly = u
            n -= 1
            q2 += 2 * u,
            q3 += 3 * u,
            q5 += 5 * u,

In [8]:
nthUglyNumber(10)

12

### Ex.5 Find K Pairs with Smallest Sums

You are given two integer arrays nums1 and nums2 sorted in ascending order and an integer k.

Define a pair (u,v) which consists of one element from the first array and one element from the second array.

Find the k pairs (u1,v1),(u2,v2) ...(uk,vk) with the smallest sums.

<img src="../images/ch15/heap4.png" width="460"/>

Solution 1: Brute Force
    
Just produce all pairs, sort them by sum, and return the first k.

In [None]:
import itertools
def kSmallestPairs(nums1, nums2, k):
    return sorted(itertools.product(nums1, nums2), key=sum)[:k]

In [None]:
nums1 = [1,7,11]
nums2 = [2,4,6]
k = 4
kSmallestPairs(nums1, nums2, k)

In [None]:
nums1 = [1,1,2]
nums2 = [1,2,3]
k = 4
kSmallestPairs(nums1, nums2, k)

Solution 2: Less Brute Force
    
Still going through all pairs, but only with a generator and heapq.nsmallest, which uses a heap of size k. So this only takes O(k) extra memory and O(mn log k) time.    

In [None]:
# takes O(k) extra memory and O(mn log k) time
import heapq
def kSmallestPairs(nums1, nums2, k):
    return heapq.nsmallest(k, ([u, v] for u in nums1 for v in nums2), key=sum)

In [None]:
nums1 = [1,7,11]
nums2 = [2,4,6]
k = 4
kSmallestPairs(nums1, nums2, k)

In [None]:
# takes O(k) extra memory and O(k^2 log k) time
import heapq
def kSmallestPairs(nums1, nums2, k):
    n1 = nums1[:k+1]
    n2 = nums2[:k+1]
    return heapq.nsmallest(k, ([u, v] for u in n1 for v in n2), key=sum)

In [None]:
nums1 = [1,7,11]
nums2 = [2,4,6]
k = 4
kSmallestPairs(nums1, nums2, k)

Solution 3: Fast Solution
    
Basic idea: Use min_heap to keep track on next minimum pair sum, and we only need to maintain K possible candidates in the data structure.

Some observations: For every numbers in nums1, its best partner(yields min sum) always strats from nums2[0] since arrays are all sorted; And for a specific number in nums1, its next candidate sould be [this specific number] + nums2[current_associated_index + 1], unless out of boundary;)  

In [None]:
# O(kLogk) 
def kSmallestPairs(nums1, nums2, k):
    queue = []
    def push(i, j):
        if i < len(nums1) and j < len(nums2):
            heapq.heappush(queue, [nums1[i] + nums2[j], i, j])
    push(0, 0)
    pairs = []
    while queue and len(pairs) < k:
        _, i, j = heapq.heappop(queue)
        pairs.append([nums1[i], nums2[j]])
        push(i, j + 1)
        if j == 0:
            push(i + 1, 0)
    return pairs

In [None]:
nums1 = [1,7,11]
nums2 = [2,4,6]
k = 20
kSmallestPairs(nums1, nums2, k)

In [None]:
nums1 = [1,1,2]
nums2 = [1,2,3]
k = 2
kSmallestPairs(nums1, nums2, k)

In [None]:
def kSmallestPairs2(nums1, nums2, k):
    queue = []
    def push(i, j):
        if i < len(nums1) and j < len(nums2):
            heapq.heappush(queue, [nums1[i] + nums2[j], i, j])
    for i in range(0, k):
        push(i, 0)
    pairs = []
    while queue and len(pairs) < k:
        _, i, j = heapq.heappop(queue)
        pairs.append([nums1[i], nums2[j]])
        push(i, j + 1)
    return pairs

In [None]:
nums1 = [1,7,11]
nums2 = [2,4,6]
k = 20
kSmallestPairs2(nums1, nums2, k)

In [None]:
nums1 = [1,1,2]
nums2 = [1,2,3]
k = 2
kSmallestPairs(nums1, nums2, k)

<img src="../images/ch16/kpairsums.png" width="660"/>