# 堆排序

堆排序（Heapsort）是指利用堆这种数据结构所设计的一种排序算法。堆积是一个近似完全二叉树的结构，并同时满足堆积的性质：即子结点的键值或索引总是小于（或者大于）它的父节点。

7.1 算法描述

将初始待排序关键字序列(R1,R2….Rn)构建成大顶堆，此堆为初始的无序区；

将堆顶元素R[1]与最后一个元素R[n]交换，此时得到新的无序区(R1,R2,……Rn-1)和新的有序区(Rn),且满足R[1,2…n-1]<=R[n]；

由于交换后新的堆顶R[1]可能违反堆的性质，因此需要对当前无序区(R1,R2,……Rn-1)调整为新堆，然后再次将R[1]与无序区最后一个元素交换，得到新的无
序区(R1,R2….Rn-2)和新的有序区(Rn-1,Rn)。不断重复此过程直到有序区的元素个数为n-1，则整个排序过程完成。

## 调整成最大堆，初始堆时，从下往上；交换堆顶与堆尾后，从上往下调整

# 结点i的孩子结点为 2i+1 和 2i+2

In [60]:
from __future__ import print_function
class Solution(object):
    def adjust(self, heap, start, end):  # 最大堆
        j = 2 * start + 1
        while j <= end:
            if j < end and heap[j] < heap[j+1]:
                j += 1
            if heap[start] > heap[j]:
                break
            heap[start], heap[j] = heap[j], heap[start]  ## 每次都两两交换
            start = j
            j = j * 2 + 1 
        
    def heap_sort(self, array):
        # 初始化最大堆
        for i in range(len(array)//2-1, -1, -1):
            self.adjust(array, i, len(array)-1)
        print('初始化：', array)
        # 不断调整堆
        for i in range(len(array)-1, -1, -1):
            array[0], array[i] = array[i], array[0]
            self.adjust(array, 0, i-1)
        print('排序后：', array)
        return array

In [61]:
s = Solution()
s.heap_sort([4,5,1,6,2,7,3,8])

初始化： [8, 6, 7, 5, 2, 1, 3, 4]
排序后： [1, 2, 3, 4, 5, 6, 7, 8]


[1, 2, 3, 4, 5, 6, 7, 8]

In [62]:
class Solution(object):
    def adjust(self, heap, start, end):  # 最大堆
        tmp = heap[start]
        j = 2 * start + 1  # 孩子结点为2i+1 和 2i+2
        while j <= end:
            if j < end and heap[j] < heap[j+1]:
                j += 1
            if tmp > heap[j]:
                break
            heap[start] = heap[j]  ## 挖坑法，最后把heap[start]放到合适的坑里
            start = j
            j = 2 * j + 1
        heap[start] = tmp
                
    def heap_sort(self, array):
        # 初始化堆
        for i in range(len(array)//2-1, -1, -1):
            self.adjust(array, i, len(array)-1)
        print('初始化：', array)
        # 每次都把堆顶和最后一个未排序的交换位置，再调整堆
        for i in range(len(array)-1, -1, -1):
            array[0], array[i] = array[i], array[0]
            self.adjust(array, 0, i-1)
        print('排序后：', array)
        return array

In [63]:
s = Solution()
s.heap_sort([4,5,1,6,2,7,3,8])

初始化： [8, 6, 7, 5, 2, 1, 3, 4]
排序后： [1, 2, 3, 4, 5, 6, 7, 8]


[1, 2, 3, 4, 5, 6, 7, 8]

In [67]:
import heapq
lst = [-4,-5,-1,-6,-2,-7,-3,-8]
heapq.heapify(lst)
print('初始化：', lst)
heapq.heappush(lst, -10)
print('插入-10：', lst)
heapq.heappop(lst)
print('pop最小值：', lst)
heapq.nsmallest(3, lst)

初始化： [-8, -6, -7, -5, -2, -1, -3, -4]
插入-10： [-10, -8, -7, -6, -2, -1, -3, -4, -5]
pop最小值： [-8, -6, -7, -5, -2, -1, -3, -4]


[-8, -7, -6]

# 最小的k个数

In [52]:
# -*- coding:utf-8 -*-
class Solution:
    def GetLeastNumbers_Solution(self, tinput, k):
        # write code here
        if not tinput or k <= 0 or len(tinput) < k:
            return []
        # 初始化一个大小为k的最大堆，保留最小的k个数
        heap = tinput[:k]
        for i in range(len(heap)/2-1, -1, -1):
            self.adjust(heap, i, len(heap)-1)
        print(heap)
        # 遍历后面的每一个数，如果比堆顶元素小，需要重新调整堆
        for i in range(k, len(tinput)):
            if tinput[i] < heap[0]:
                heap[0] = tinput[i]
                self.adjust(heap, 0, len(heap)-1)
        print(heap)
        for i in range(k-1, -1, -1):
            heap[0], heap[i] = heap[i], heap[0]
            self.adjust(heap, 0, i-1)
        return heap
        
    def adjust(self, heap, start, end):  ## 最大堆
        j = start * 2 + 1
        while j <= end:
            if j < end and heap[j] < heap[j+1]:
                j += 1
            if heap[start] > heap[j]:
                break
            heap[j], heap[start] = heap[start], heap[j]
            start = j
            j = j * 2 + 1

In [53]:
lst = [4,5,1,6,2,7,3,8]
k = 4
s = Solution()
s.GetLeastNumbers_Solution(lst, k)

[6, 5, 1, 4]
[4, 3, 1, 2]


[1, 2, 3, 4]

In [5]:
import heapq
# -*- coding:utf-8 -*-
class Solution:
    def GetLeastNumbers_Solution(self, tinput, k):
        # write code here
        if not tinput or k <= 0 or len(tinput) < k:
            return []
        heap = [-x for x in tinput[:k]]
        heapq.heapify(heap)
        for i in range(k, len(tinput)):
            if -tinput[i] > heap[0]:
                heapq.heappop(heap)
                heapq.heappush(heap, -tinput[i])
        res = []
        while heap:
            res.append(-heapq.heappop(heap))
        return res[::-1]

In [6]:
lst = [4,5,1,6,2,7,3,8]
k = 4
s = Solution()
s.GetLeastNumbers_Solution(lst, k)

[1, 2, 3, 4]