# Description:

Given a non-empty list of words, return the k most frequent elements.

Your answer should be sorted by frequency from highest to lowest. If two words have the same frequency, then the word with the lower alphabetical order comes first.

# Example:

Input: ["i", "love", "leetcode", "i", "love", "coding"], k = 2

Output: ["i", "love"]

Explanation: "i" and "love" are the two most frequent words.

    Note that "i" comes before "love" due to a lower alphabetical order.

Input: ["the", "day", "is", "sunny", "the", "the", "the", "sunny", "is", "is"], k = 4

Output: ["the", "is", "sunny", "day"]

Explanation: "the", "is", "sunny" and "day" are the four most frequent words, with the number of occurrence being 4, 3, 2 and 1 respectively.

# Solution:

In [24]:
class BinHeap:
    """建立一个二叉堆"""
    
    def __init__(self):
        self.heaplist = [0]
        self.size = 0
    
    
    def minChild(self, i):
        """返回较小的子节点"""
        
        if i * 2 + 1 > self.size or self.heaplist[i * 2] < self.heaplist[i * 2 + 1]:
            return i * 2
        
        return i * 2 + 1
    
    
    def percdown(self, i):
        """比较与较小的子节点的大小，若大于较小的子节点，则与之交换"""
        
        while i * 2 <= self.size:
            mc = self.minChild(i)
            
            if self.heaplist[i] > self.heaplist[mc]:
                self.heaplist[i], self.heaplist[mc] = self.heaplist[mc], self.heaplist[i]
                i = mc
                
            else:
                break
            
        
    def percup(self, i):
        """比较与父节点的大小，若小于父节点，则与父节点交换"""
        
        while i // 2 > 0:
            if self.heaplist[i] < self.heaplist[i // 2]:
                self.heaplist[i], self.heaplist[i // 2] = self.heaplist[i // 2], self.heaplist[i]
                i = i // 2
                
            else:
                break
        
    
    def heapify(self, alist):
        """现有列表构建二叉堆"""
        
        self.size = len(alist)
        i = self.size // 2
        self.heaplist = [0] + alist
        while i > 0:
            self.percdown(i)
            i -= 1
            
    
    def heappush(self, n):
        """插入新值"""
        
        self.heaplist.append(n)
        self.size += 1
        self.percup(self.size)
        
        
    def heappop(self):
        """返回最小值，将末尾值放在根节点，然后重新构建堆"""
        
        self.heaplist[self.size], self.heaplist[1] = self.heaplist[1], self.heaplist[self.size]
        minval = self.heaplist.pop()
        self.size -= 1
        self.percdown(1)
        
        return minval

In [25]:
class Word:
    def __init__(self, freq, word):
        self.freq = freq
        self.word = word
    
    def __lt__(self, other):
        if self.freq == other.freq:
            return self.word > other.word
        return self.freq < other.freq
    
    def __eq__(self, other):
        return self.freq == other.freq and self.word == other.word

In [28]:
from collections import defaultdict

class Solution_1:
    """利用二叉堆，复杂度O(nlgk)"""
    
    def topKFrequent(self, words, k):
        """
        :type words: List[str]
        :type k: int
        :rtype: List[str]
        """
        dic = defaultdict(int)
        for word in words:
            dic[word] += 1
            
        heapq = BinHeap()
        heapq.heapify([])
        for word, freq in dic.items():
            heapq.heappush(Word(freq, word))
            if heapq.size > k:
                heapq.heappop()
        
        res = []
        for _ in range(k):
            res.insert(0, heapq.heappop().word)
        
        return res

In [36]:
from collections import Counter
import heapq

class Solution_2:
    """利用二叉堆内置库，复杂度O(nlgk)"""
    
    def topKFrequent(self, words, k):
        """
        :type words: List[str]
        :type k: int
        :rtype: List[str]
        """
        count = Counter(words)
            
        heap = []
        for word, freq in count.items():
            heapq.heappush(heap, Word(freq, word))
            if len(heap) > k:
                heapq.heappop(heap)
        
        res = []
        for _ in range(k):
            res.insert(0, heapq.heappop(heap).word)
        
        return res

# Test:

In [37]:
if __name__ == '__main__':
    s_1 = Solution_1()
    s_2 = Solution_2()
    print (s_1.topKFrequent(["the", "day", "is", "sunny", "the", "the", "the", "sunny", "is", "is"], 4))
    print (s_2.topKFrequent(["the", "day", "is", "sunny", "the", "the", "the", "sunny", "is", "is"], 4))

['the', 'is', 'sunny', 'day']
['the', 'is', 'sunny', 'day']
