Skip to content

Top K Elements

kjingers edited this page Jan 26, 2022 · 16 revisions

Top K Numbers

Uses heap. Time complexity is N*log(k). So better than sorting.

from heapq import *


def find_k_largest_numbers(nums, k):

  minHeap = []

  # First, Insert k elements to the heap
  for i in range(k):
    heappush(minHeap, nums[i])

  # Loop through the rest of the array.
  # If larger than root, then pop root and insert new num
  for i in range(k, len(nums)):
    if nums[i] > minHeap[0]:
      heappop(minHeap)
      heappush(minHeap, nums[i])

  return list(minHeap)


def main():

  print("Here are the top K numbers: " +
        str(find_k_largest_numbers([3, 1, 5, 12, 2, 11], 3)))

  print("Here are the top K numbers: " +
        str(find_k_largest_numbers([5, 12, 11, -1, 12], 3)))


main()

Kth Smallest Number

Very similar to above

from heapq import *

def find_Kth_smallest_number(nums, k):

      maxHeap = []

      # Add K elements to maxHeap
      for i in range(k):
            heappush(maxHeap, -nums[i])


      # Loop through rest of array
      # if smaller than root, pop root and insert
      for i in range(k, len(nums)):
            if nums[i] < -maxHeap[0]:
                  heappop(maxHeap)
                  heappush(maxHeap, -nums[i])


      # Root of maxHeap will be the kth smallest Number
      return -maxHeap[0]


def main():

  print("Kth smallest number is: " +
        str(find_Kth_smallest_number([1, 5, 12, 2, 11, 5], 3)))

  # since there are two 5s in the input array, our 3rd and 4th smallest numbers should be a '5'
  print("Kth smallest number is: " +
        str(find_Kth_smallest_number([1, 5, 12, 2, 11, 5], 4)))

  print("Kth smallest number is: " +
        str(find_Kth_smallest_number([5, 12, 11, -1, 12], 3)))


main()

K Closest Points to Origin

Very similar to above. We define a Point class with a lt function for the Max Heap.

We could probably use a tuple where (distance, point) if not given Point class

from heapq import *

def find_Kth_smallest_number(nums, k):

      maxHeap = []

      # Add K elements to maxHeap
      for i in range(k):
            heappush(maxHeap, -nums[i])


      # Loop through rest of array
      # if smaller than root, pop root and insert
      for i in range(k, len(nums)):
            if nums[i] < -maxHeap[0]:
                  heappop(maxHeap)
                  heappush(maxHeap, -nums[i])


      # Root of maxHeap will be the kth smallest Number
      return -maxHeap[0]


def main():

  print("Kth smallest number is: " +
        str(find_Kth_smallest_number([1, 5, 12, 2, 11, 5], 3)))

  # since there are two 5s in the input array, our 3rd and 4th smallest numbers should be a '5'
  print("Kth smallest number is: " +
        str(find_Kth_smallest_number([1, 5, 12, 2, 11, 5], 4)))

  print("Kth smallest number is: " +
        str(find_Kth_smallest_number([5, 12, 11, -1, 12], 3)))


main()

Connect n Ropes with Minimum Cost

from heapq import *

def minimum_cost_to_connect_ropes(ropeLengths):
  minHeap = []
  result = 0
  
  # First Add all ropes to Min Heap
  for i in range(len(ropeLengths)):
    heappush(minHeap, ropeLengths[i])

  # Connect Ropes Until we only have one rope left
  # Always want to connect the two ropes with the shortest lengths
  while len(minHeap) > 1:
    temp = heappop(minHeap) + heappop(minHeap)
    result += temp
    heappush(minHeap, temp)



  return result


def main():

  print("Minimum cost to connect ropes: " +
          str(minimum_cost_to_connect_ropes([1, 3, 11, 5])))
  print("Minimum cost to connect ropes: " +
        str(minimum_cost_to_connect_ropes([3, 4, 5, 6])))
  print("Minimum cost to connect ropes: " +
        str(minimum_cost_to_connect_ropes([1, 3, 11, 5, 2])))


main()


Top K Frequent Numbers

from collections import Counter
from heapq import *

def find_k_frequent_numbers(nums, k):
  minHeap = []
  result = []

  # Create Dict that maps nums to counts
  freqs = Counter(nums)

  for num, count in freqs.items():
    heappush(minHeap, (count, num))
    if len(minHeap) > k:
      heappop(minHeap)
  
  while minHeap:
    result.append(heappop(minHeap)[1])

  return result


def main():

  print("Here are the K frequent numbers: " +
        str(find_k_frequent_numbers([1, 3, 5, 12, 11, 12, 11], 2)))

  print("Here are the K frequent numbers: " +
        str(find_k_frequent_numbers([5, 12, 11, 3, 11], 2)))


main()

Sort Characters By Frequency

from heapq import *
from collections import Counter

def sort_character_by_frequency(str):

  maxHeap = []
  result = []

  # Create a hash map of char -> count
  counts = Counter(str)

  # Add all distinct characters and counts to maxHeap
  for char, count in counts.items():
    heappush(maxHeap, (-count, char))

  # Pop one distinct character at a time from maxHeap
  while maxHeap:
    count, char = heappop(maxHeap)
    # Add character "count" times to result string
    for i in range(-count):
      result.append(char)




  return ''.join(result)


def main():

  print("String after sorting characters by frequency: " +
        sort_character_by_frequency("Programming"))
  print("String after sorting characters by frequency: " +
        sort_character_by_frequency("abcbab"))


main()

Kth Largest Number in a Stream

from heapq import *

class KthLargestNumberInStream:

  def __init__(self, nums, k):
    self.k = k
    self.minHeap = []
    for num in nums:
      self.add(num)

  def add(self, num):

    heappush(self.minHeap, num)

    if len(self.minHeap) > self.k:
      heappop(self.minHeap)

    return self.minHeap[0]


def main():

  kthLargestNumber = KthLargestNumberInStream([3, 1, 5, 12, 2, 11], 4)
  print("4th largest number is: " + str(kthLargestNumber.add(6)))
  print("4th largest number is: " + str(kthLargestNumber.add(13)))
  print("4th largest number is: " + str(kthLargestNumber.add(4)))


main()

Find K Closest Elements

One option to solve (below) is to use binary search to find the closest value to X. Then add all diffs of nums from idx-K to idx+K to minHeap. Then, pop K numbers to get the K nums with the smallest difference.

Another option is to again use binary search to get closest value. But then use two pointers to comparedifferences on each side, and iterating accordingly. This has better time complexity.

A third option (on LC) is to just use binary search to find the left-most number of the answer. Then we return the subarray from arr[left:left+K]

from heapq import *

def find_closest_elements(arr, K, X):
      result = []
      minHeap = []

      # Binary Search to find index of X. If X not present,
      # return index to left (unless at index 0)
      idx = binary_search(arr, X)

      # Make upperbound = index + K and lowerbound = index - K
      # Check for arr bound limits
      upperBound = min(len(arr) - 1, idx + K)
      lowerBound = max(0, idx - K)

      # Add all diffs from index - K to index + K to minHeap, where
      # diff = abs(num - X)
      for i in range(lowerBound, upperBound + 1):
            diff = abs(arr[i] - X)
            heappush(minHeap, (diff, arr[i]))

      # Pop K elements from minHeap to get K closest numbers
      for _ in range(K):
            diff, num = heappop(minHeap)
            result.append(num)

      # Return the result in sorted order
      return sorted(result)

def binary_search(arr, key):
      start, end = 0, len(arr) - 1

      while start <= end:
            mid = start + (end - start) // 2

            if key < arr[mid]:
                  end = mid - 1
            elif key > arr[mid]:
                  start = mid + 1
            else:
                  return mid

      # If not found, return index to the left
      return end


def main():
  print("'K' closest numbers to 'X' are: " +
        str(find_closest_elements([5, 6, 7, 8, 9], 3, 7)))
  print("'K' closest numbers to 'X' are: " +
        str(find_closest_elements([2, 4, 5, 6, 9], 3, 6)))
  print("'K' closest numbers to 'X' are: " +
        str(find_closest_elements([2, 4, 5, 6, 9], 3, 10)))


main()

Maximum Distinct Elements

from heapq import *
from collections import Counter

def find_maximum_distinct_elements(nums, k):

      distinctCount = 0
      minHeap = []

      # Create a hashmap of characters with counts
      counts = Counter(nums)

      # Put all non-distinct characters with count > 1
      # into a minHeap by count. Increment distinctCount for each
      # distinct character
      for num, count in counts.items():
            if count > 1:
                  heappush(minHeap, count)
            else: # count == 1
                  distinctCount += 1

      # Pop from minHeap. Make each character distinct until
      # no more non-distinct characters, or k <= 0
      while minHeap and k > 0:
            count = heappop(minHeap)

            # Try to make distinct. Update k
            k -= (count - 1)

            # If k isn't negative, then we are successful in making
            # this char distinct
            if k >= 0:
                  distinctCount += 1


      # If no more non-distinct characters but k > 0, then must
      # remove k disctint characters

      if k > 0:
            distinctCount -= k

      return distinctCount



def main():

  print("Maximum distinct numbers after removing K numbers: " +
        str(find_maximum_distinct_elements([7, 3, 5, 8, 5, 3, 3], 2)))
  print("Maximum distinct numbers after removing K numbers: " +
        str(find_maximum_distinct_elements([3, 5, 12, 11, 12], 3)))
  print("Maximum distinct numbers after removing K numbers: " +
        str(find_maximum_distinct_elements([1, 2, 3, 3, 3, 3, 4, 4, 5, 5, 5], 2)))


main()

Sum of Elements between K2th and K1th Smallest

from heapq import *

def find_sum_of_elements(nums, k1, k2):

  # We can use a Max Heap to keep track of the smallest K2 numbers
  # Then, once we've gone through the array, we can:
  # - Pop once
  # - Then add the next K2 - K1 - 1 Numbers from the top

  maxHeap = []

  # Loop through and add K2 elements to maxHeap
  # If maxHeap has more than K2 elements, the pop largest
  for i in range(len(nums)):
    heappush(maxHeap, -nums[i])

    if len(maxHeap) > k2:
      heappop(maxHeap)


  # Pop One since we don't need K2th elements for sum
  heappop(maxHeap)
  rangeSum = 0
  for _ in range(k2 - k1 - 1):
    rangeSum += -heappop(maxHeap)

  return rangeSum



def main():

  print("Sum of all numbers between k1 and k2 smallest numbers: " +
        str(find_sum_of_elements([1, 3, 12, 5, 15, 11], 3, 6)))
  print("Sum of all numbers between k1 and k2 smallest numbers: " +
        str(find_sum_of_elements([3, 5, 8, 7], 1, 4)))


main()

Rearrange String

from heapq import *
from collections import Counter


def rearrange_string(str):
  # We will use a maxHeap to store the frequencies of all characters.
  # That way, we can always add the character with the highest frequency, (while avoiding dupes)

  # Pulling the character with the highest frequency gives the best chance to create a string
  # without any dupes. Greedy approach

  maxHeap = []
  resultString = []

  # Create hashmap for character and counts
  counts = Counter(str)

  # Add all to maxHeap
  for char, count in counts.items():
    heappush(maxHeap, (-count, char))

  prevChar = None
  prevCount = 0

  # Pop the char with largest frequency
  # After, put back the previous char (to avoid dupes)
  # Loop will exit when maxHeap is empty.
  while maxHeap:
    currCount, currChar = heappop(maxHeap)

    # Replace previous char. Count will be negative or 0
    if prevChar and prevCount < 0:
      heappush(maxHeap, (prevCount, prevChar))

    resultString.append(currChar)
    currCount += 1

    prevChar = currChar
    prevCount = currCount

  
  # If we exit loop but haven't appended all chars, then we were
  # unable to form a string 
  if len(resultString) == len(str):
    return ''.join(resultString)
  else:
    return ""
    



  return ""


def main():
  print("Rearranged string:  " + rearrange_string("aappp"))
  print("Rearranged string:  " + rearrange_string("Programming"))
  print("Rearranged string:  " + rearrange_string("aapa"))


main()


Rearrange String K Distance Apart

from heapq import *
from collections import Counter
from collections import deque

def reorganize_string(str, k):
  # Similar to Rearrange String

  # Store all char in maxHeap by freq
  # Use greedy approach to append char with highest freq
  # Put the currentChar and currFreq in queue
  # Once K has passed, pop from queue and put back in heap

  # If k < 2, then we can have the same string
  if k < 2:
    return str

  maxHeap = []
  resultString = []
  charQueue = deque()

  counts = Counter(str)

  for char, count in counts.items():
    heappush(maxHeap, (-count, char))


  prevChar, prevCount = None, 0
  i = 0
  while maxHeap:

    # Pop and append to result string
    currCount, currChar = heappop(maxHeap)
    resultString.append(currChar)

    # Decrement Count and append to queue
    charQueue.append((currCount + 1, currChar))
    i += 1

    # If we've done k iterations, pop from queue
    # This is the char that we appended k iterations ago
    if i >= k:
      prevCount, prevChar = charQueue.popleft()
      if prevCount < 0:
        heappush(maxHeap, (prevCount, prevChar))


  return ''.join(resultString) if len(resultString) == len(str) else ""


def main():
  print("Reorganized string: " + reorganize_string("mmpp", 2))
  print("Reorganized string: " + reorganize_string("Programming", 3))
  print("Reorganized string: " + reorganize_string("aab", 2))
  print("Reorganized string: " + reorganize_string("aapa", 3))


main()

Scheduling Tasks

from heapq import *
from collections import deque
from collections import Counter

def schedule_tasks(tasks, k):

  # We should always try to schedule the task with the highest count.
  # So, we can get the counts for each task and add to max Heap.
  # Then, pop from maxHeap to schedule (k + 1) tasks
  # For each task, add to a waitlist
  # If we cannot add (k + 1) different tasks from max heap, we need
  # to increment interval count by the difference to insert idles
  # Then, put the tasks in the waitlist back into maxHeap

  intervalCount = 0
  maxHeap = []

  # Get Counts
  counts = Counter(tasks)

  # Put into maxHeap

  for char, count in counts.items():
    heappush(maxHeap, (-count, char))


  
  while maxHeap:
    waitList = []
    # Number of different tasks to try and execute
    n = k + 1

    # Try to Execute k + 1 tasks from maxHeap
    while maxHeap and n > 0:
      count, char = heappop(maxHeap)
      #print(char, end=", ")
      count += 1
      if count < 0:
        waitList.append((count, char))
      n -= 1
      intervalCount += 1
      #print(n)
    
    # Put waitlist tasks back into maxHeap
    for count, char in waitList:
      heappush(maxHeap, (count, char))

    # If we still have tasks to execute, but haven't done k + 1 tasks this iteration,
    # then we need to increment for idle tasks
    if maxHeap:
      intervalCount += n
      #for i in range(n):
        #print("idle", end=", ")
  #print("")

  return intervalCount


def main():
  print("Minimum intervals needed to execute all tasks: " +
        str(schedule_tasks(['a', 'a', 'a', 'b', 'c', 'c'], 2)))
  print("Minimum intervals needed to execute all tasks: " +
        str(schedule_tasks(['a', 'b', 'a'], 3)))
  print("Minimum intervals needed to execute all tasks: " +
        str(schedule_tasks(['a', 'a', 'b', 'b', 'c', 'c'], 2)))


main()


Maximum Frequency Stack

'''
Everytime we need to push, we need to keep track of the number, frequency count, and sequence number
- sequence number allows the max heap to break frequency ties with sequence numer (more recent num)
- Can keep track of frequency count in a hash map
'''

from heapq import *

class Element:
  
  def __init__(self, number, frequency, sequenceNumber):
    self.number = number
    self.frequency = frequency
    self.sequenceNumber = sequenceNumber

  def __lt__(self, other):
    if self.frequency != other.frequency:
      return self.frequency > other.frequency
    return self.sequenceNumber > other.sequenceNumber

class FrequencyStack:

  maxHeap = []
  sequenceNumber = 0
  frequencyMap = {}

  def push(self, num):

    if num not in self.frequencyMap:
      self.frequencyMap[num] = 0

    self.frequencyMap[num] += 1

    heappush(self.maxHeap, Element(num, self.frequencyMap[num], self.sequenceNumber))

    self.sequenceNumber += 1

  def pop(self):
    element = heappop(self.maxHeap)
    self.frequencyMap[element.number] -= 1

    if self.frequencyMap[element.number] < 1:
      del self.frequencyMap[element.number]

    return element.number
    


def main():
  frequencyStack = FrequencyStack()
  frequencyStack.push(1)
  frequencyStack.push(2)
  frequencyStack.push(3)
  frequencyStack.push(2)
  frequencyStack.push(1)
  frequencyStack.push(2)
  frequencyStack.push(5)
  print(frequencyStack.pop())
  print(frequencyStack.pop())
  print(frequencyStack.pop())


main()

Find K Pairs With Largest Sum

'''
The input arrays are sorted in decending order

Similar to "Kth Smallest / Largest Numbers" Except with pair sum

We can add K pair sums + pair tuples to minHeap
For each array, need to loop from 0 to min(k, len(arr)), since the largest
K pairs are guarenteed to be made up of numbers from 0 to K

For every pair after K, we compare sum with root of minHeap
- If larger than root, pop root and push new pair
- Else, break from current inner loop, since the other numbers will only
  get smaller.
'''
from heapq import *

def find_k_largest_pairs(nums1, nums2, k):
  result = []
  minHeap = []

  for i in range(min(k, len(nums1))):
    for j in range(min(k, len(nums2))):

      # If we don't have K pairs in minHeap yet, then push
      if len(minHeap) < k:
        heappush(minHeap, (nums1[i] + nums2[j], nums1[i], nums2[j]))

      # We already have K pairs, so need to see if current pair yields a larger sum
      # than the smallest sum in the minHeap. 
      # - If so, pop root and push new pair
      # - If not, break from current inner loop, since we will only be decending
      else:
        if (nums1[i] + nums2[j]) < minHeap[0][0]:
          break
        else:
          heappop(minHeap)
          heappush(minHeap, (nums1[i] + nums2[j], nums1[i], nums2[j]))

  # At this point, our minHeap has K pairs with the largest sum. 
  # So, just need to put into result array
  for (val, num1, num2) in minHeap:
    result.append([num1, num2])

  return result


def main():
  print("Pairs with largest sum are: " +
        str(find_k_largest_pairs([9, 8, 2], [6, 3, 1], 3)))


main()