## 收集人口数据

从[国家统计局](https://data.stats.gov.cn/)网站上可以收集到官方版本的各种国内数据和近几次人口普查的详细数据。

## 分析历史数据

对历史数据进行可视化+从各个不同的角度进行部分分析。 结合具体时间进行详细分析。

In [1]:
import pandas as pd
from pyecharts import options as opts
from pyecharts.charts import Geo, Timeline, Grid, Bar, Map, Pie, Line
from pyecharts.globals import ChartType, ThemeType
from typing import List
from pyecharts.commons.utils import JsCode
from pyecharts.charts import Timeline, Grid, Bar, Map, Pie, Line

### 1. 可视化第五次人口普查数据

#### （1）全国各地区总人口数量与城乡人口数量

In [2]:
root_path = '第五次人口普查/'

full_view_path = root_path + 't0101.xls'
data = pd.read_excel(io=full_view_path,header=6).dropna()
data.columns = [i for i in range(17)]
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
1,北京市,4417045.0,4096844.0,320201.0,13569194.0,7074518.0,6494676.0,108.93,11922945.0,5925670.0,5997275.0,98.81,1646249.0,1148848.0,497401.0,230.97,2.91
2,天津市,3079377.0,2976741.0,102636.0,9848731.0,5016375.0,4832356.0,103.81,9218220.0,4605720.0,4612500.0,99.85,630511.0,410655.0,219856.0,186.78,3.1
3,河北省,18302744.0,17934977.0,367767.0,66684419.0,33936333.0,32748086.0,103.63,64296922.0,32494651.0,31802271.0,102.18,2387497.0,1441682.0,945815.0,152.43,3.59
4,山西省,8825948.0,8650261.0,175687.0,32471242.0,16800758.0,15670484.0,107.21,31448836.0,16107462.0,15341374.0,104.99,1022406.0,693296.0,329110.0,210.66,3.64
5,内蒙古自治区,6907890.0,6784470.0,123420.0,23323347.0,12061615.0,11261732.0,107.1,22618741.0,11611976.0,11006765.0,105.5,704606.0,449639.0,254967.0,176.35,3.33
7,辽宁省,13046524.0,12866262.0,180262.0,41824412.0,21323383.0,20501029.0,104.01,40597750.0,20547768.0,20049982.0,102.48,1226662.0,775615.0,451047.0,171.96,3.16
8,吉林省,7953615.0,7848446.0,105169.0,26802191.0,13720747.0,13081444.0,104.89,26076948.0,13265133.0,12811815.0,103.54,725243.0,455614.0,269629.0,168.98,3.32
9,黑龙江省,11070922.0,10955750.0,115172.0,36237576.0,18520747.0,17716829.0,104.54,35457466.0,18040981.0,17416485.0,103.59,780110.0,479766.0,300344.0,159.74,3.24
11,上海市,5642588.0,5299068.0,343520.0,16407734.0,8430262.0,7977472.0,105.68,14787225.0,7381235.0,7405990.0,99.67,1620509.0,1049027.0,571482.0,183.56,2.79
12,江苏省,22034267.0,21375726.0,658541.0,73043577.0,36982038.0,36061539.0,102.55,69372929.0,34775918.0,34597011.0,100.52,3670648.0,2206120.0,1464528.0,150.64,3.25


In [3]:
change_dict = {'广西壮族自治区':'广西','西藏自治区':'西藏','新疆维吾尔自治区':'新疆',"内蒙古自治区":"内蒙古","宁夏回族自治区":"宁夏"}
area_list_full = data[0].values.tolist()
area_list = [area[:-1] if area not in change_dict.keys() else change_dict[area] for area in area_list_full]
total_pop_list = data[4].values.tolist()
man_list = data[5].values.tolist()
woman_list = data[6].values.tolist()
average_per_house_list = data[16].values.tolist()

In [5]:
c = (
    Map()
    .add(
        "population",
        [list(z) for z in zip(area_list, total_pop_list)],
        "china"
    )
#     .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
    .set_global_opts(
        visualmap_opts=opts.VisualMapOpts(max_ = 100000000,is_piecewise=True),
        title_opts=opts.TitleOpts(title="Population_Map_For_2000"),
    )
)
c.render_notebook()

In [6]:
def change_area(area_list, value_list):
    Xy = [(xi, yi) for xi, yi in zip(area_list, value_list)]
    sorted_Xy = sorted(Xy,key = lambda x: x[1],reverse= 1)
    sorted_area = [xi for xi,_ in sorted_Xy]
    sorted_value = [yi for _,yi in sorted_Xy]
    return sorted_area, sorted_value
    
sorted_area,sorted_total_pop = change_area(area_list,total_pop_list)

In [7]:
bar = (
        Bar(init_opts=opts.InitOpts(bg_color='rgba(51,51,51)'))
        .add_xaxis(xaxis_data=sorted_area)
        .add_yaxis(
            series_name="",
            y_axis=sorted_total_pop,
            label_opts=opts.LabelOpts(
                is_show=True, position="right", formatter="{b} : {c}"
            ),
        )
        .reversal_axis()
        .set_global_opts(
            xaxis_opts=opts.AxisOpts(
                max_=100000000, axislabel_opts=opts.LabelOpts(is_show=False)
            ),
            yaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(is_show=False)),
            tooltip_opts=opts.TooltipOpts(is_show=False),
            visualmap_opts=opts.VisualMapOpts(
                is_calculable=True,
                dimension=0,
                pos_left="10",
                pos_top="top",
                range_text=["High", "Low"],
                range_color=["lightskyblue", "yellow", "orangered"],
                textstyle_opts=opts.TextStyleOpts(color="#ddd"),
                min_=0,
                max_=100000000,
            ),
        )
    )

bar.render_notebook()

#### 男性人数

In [8]:
c = (
    Map()
    .add(
        "population",
        [list(z) for z in zip(area_list, man_list)],
        "china"
    )
#     .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
    .set_global_opts(
        visualmap_opts=opts.VisualMapOpts(max_ = 50000000,is_piecewise=True),
        title_opts=opts.TitleOpts(title="Male_Population_Map_For_2000"),
    )
)
c.render_notebook()

In [9]:
sorted_area,sorted_man = change_area(area_list,man_list)
bar = (
        Bar(init_opts=opts.InitOpts(bg_color='rgba(51,51,51)'))
        .add_xaxis(xaxis_data=sorted_area)
        .add_yaxis(
            series_name="",
            y_axis=sorted_man,
            label_opts=opts.LabelOpts(
                is_show=True, position="right", formatter="{b} : {c}"
            ),
        )
        .reversal_axis()
        .set_global_opts(
            xaxis_opts=opts.AxisOpts(
                max_=50000000, axislabel_opts=opts.LabelOpts(is_show=False)
            ),
            yaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(is_show=False)),
            tooltip_opts=opts.TooltipOpts(is_show=False),
            visualmap_opts=opts.VisualMapOpts(
                is_calculable=True,
                dimension=0,
                pos_left="10",
                pos_top="top",
                range_text=["High", "Low"],
                range_color=["lightskyblue", "yellow", "orangered"],
                textstyle_opts=opts.TextStyleOpts(color="#ddd"),
                min_=0,
                max_=50000000,
            ),
        )
    )

bar.render_notebook()

#### 女性人数

In [10]:
c = (
    Map()
    .add(
        "population",
        [list(z) for z in zip(area_list, woman_list)],
        "china"
    )
#     .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
    .set_global_opts(
        visualmap_opts=opts.VisualMapOpts(max_ = 50000000,is_piecewise=True),
        title_opts=opts.TitleOpts(title="Female_Population_Map_For_2000"),
    )
)
c.render_notebook()

In [11]:
sorted_area,sorted_woman = change_area(area_list,woman_list)
bar = (
        Bar(init_opts=opts.InitOpts(bg_color='rgba(51,51,51)'))
        .add_xaxis(xaxis_data=sorted_area)
        .add_yaxis(
            series_name="",
            y_axis=sorted_woman,
            label_opts=opts.LabelOpts(
                is_show=True, position="right", formatter="{b} : {c}"
            ),
        )
        .reversal_axis()
        .set_global_opts(
            xaxis_opts=opts.AxisOpts(
                max_=50000000, axislabel_opts=opts.LabelOpts(is_show=False)
            ),
            yaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(is_show=False)),
            tooltip_opts=opts.TooltipOpts(is_show=False),
            visualmap_opts=opts.VisualMapOpts(
                is_calculable=True,
                dimension=0,
                pos_left="10",
                pos_top="top",
                range_text=["High", "Low"],
                range_color=["lightskyblue", "yellow", "orangered"],
                textstyle_opts=opts.TextStyleOpts(color="#ddd"),
                min_=0,
                max_=50000000,
            ),
        )
    )

bar.render_notebook()

#### 每户平均人口数

In [4]:
c = (
    Map()
    .add(
        "population",
        [list(z) for z in zip(area_list, average_per_house_list)],
        "china"
    )
#     .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
    .set_global_opts(
        visualmap_opts=opts.VisualMapOpts(max_ = 5, min_ = 2.9, is_piecewise=True),
        title_opts=opts.TitleOpts(title="Population_Per_Household_Map_for_2000"),
    )
)
c.render_notebook()

In [13]:
sorted_area,sorted_average = change_area(area_list,average_per_house_list)
bar = (
        Bar(init_opts=opts.InitOpts(bg_color='rgba(51,51,51)'))
        .add_xaxis(xaxis_data=sorted_area)
        .add_yaxis(
            series_name="",
            y_axis=sorted_average,
            label_opts=opts.LabelOpts(
                is_show=True, position="right", formatter="{b} : {c}"
            ),
        )
        .reversal_axis()
        .set_global_opts(
            xaxis_opts=opts.AxisOpts(
                max_=5, min_ = 2, axislabel_opts=opts.LabelOpts(is_show=False)
            ),
            yaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(is_show=False)),
            tooltip_opts=opts.TooltipOpts(is_show=False),
            visualmap_opts=opts.VisualMapOpts(
                is_calculable=True,
                dimension=0,
                pos_left="10",
                pos_top="top",
                range_text=["High", "Low"],
                range_color=["lightskyblue", "yellow", "orangered"],
                textstyle_opts=opts.TextStyleOpts(color="#ddd"),
                min_=2,
                max_=5,
            ),
        )
    )

bar.render_notebook()

### (2) 外来人口分析

In [14]:
full_view_path = root_path + 't0104.xls'
data = pd.read_excel(io=full_view_path,header=6).dropna()
data.columns = [i for i in range(16)]
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
1,北京市,4637531.0,2552400.0,2085131.0,111952.0,55237.0,56715.0,1922063.0,938029.0,984034.0,140299.0,72885.0,67414.0,2463217.0,1486249.0,976968.0
2,天津市,2181623.0,1141344.0,1040279.0,95093.0,48091.0,47002.0,1295612.0,636448.0,659164.0,55885.0,27412.0,28473.0,735033.0,429393.0,305640.0
3,河北省,4881712.0,2615523.0,2266189.0,1379257.0,734839.0,644418.0,1370717.0,674143.0,696574.0,1201283.0,669772.0,531511.0,930455.0,536769.0,393686.0
4,山西省,3720553.0,2007133.0,1713420.0,1009501.0,532366.0,477135.0,1251585.0,631775.0,619810.0,792110.0,422998.0,369112.0,667357.0,419994.0,247363.0
5,内蒙古自治区,3827825.0,1993393.0,1834432.0,1173746.0,585327.0,588419.0,880668.0,435720.0,444948.0,1225488.0,645936.0,579552.0,547923.0,326410.0,221513.0
7,辽宁省,6482242.0,3292571.0,3189671.0,1056227.0,527791.0,528436.0,3119608.0,1535191.0,1584417.0,1261242.0,635994.0,625248.0,1045165.0,593595.0,451570.0
8,吉林省,2949320.0,1505045.0,1444275.0,960959.0,474048.0,486911.0,1044081.0,514070.0,530011.0,635675.0,326254.0,309421.0,308605.0,190673.0,117932.0
9,黑龙江省,3768411.0,1924960.0,1843451.0,767182.0,378621.0,388561.0,1207507.0,592124.0,615383.0,1407081.0,723937.0,683144.0,386641.0,230278.0,156363.0
11,上海市,5384589.0,2888435.0,2496154.0,142138.0,71016.0,71122.0,882377.0,434902.0,447475.0,1225152.0,606455.0,618697.0,3134922.0,1776062.0,1358860.0
12,江苏省,9099849.0,4720838.0,4379011.0,2045770.0,1038503.0,1007267.0,2046696.0,1026481.0,1020215.0,2470494.0,1326695.0,1143799.0,2536889.0,1329159.0,1207730.0


In [15]:
area_list_full = data[0].values.tolist()
area_list = [area[:-1] if area not in change_dict.keys() else change_dict[area] for area in area_list_full]
outside_pop = data[13].values.tolist()

In [16]:
c = (
    Map()
    .add(
        "population",
        [list(z) for z in zip(area_list, outside_pop)],
        "china"
    )
#     .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
    .set_global_opts(
        visualmap_opts=opts.VisualMapOpts(max_ = 16000000,is_piecewise=True),
        title_opts=opts.TitleOpts(title="Outside_Population_Map_For_2000"),
    )
)
c.render_notebook()

In [18]:
sorted_area,sorted_out = change_area(area_list,outside_pop)
bar = (
        Bar(init_opts=opts.InitOpts(bg_color='rgba(51,51,51)'))
        .add_xaxis(xaxis_data=sorted_area)
        .add_yaxis(
            series_name="",
            y_axis=sorted_out,
            label_opts=opts.LabelOpts(
                is_show=True, position="right", formatter="{b} : {c}"
            ),
        )
        .reversal_axis()
        .set_global_opts(
            xaxis_opts=opts.AxisOpts(
                max_=20000000, min_ = 0, axislabel_opts=opts.LabelOpts(is_show=False)
            ),
            yaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(is_show=False)),
            tooltip_opts=opts.TooltipOpts(is_show=False),
            visualmap_opts=opts.VisualMapOpts(
                is_calculable=True,
                dimension=0,
                pos_left="10",
                pos_top="top",
                range_text=["High", "Low"],
                range_color=["lightskyblue", "yellow", "orangered"],
                textstyle_opts=opts.TextStyleOpts(color="#ddd"),
                min_=0,
                max_=16000000,
            ),
        )
    )

bar.render_notebook()

#### 受教育情况

In [19]:
full_view_path = root_path + 't0108.xls'
data = pd.read_excel(io=full_view_path,header=6).dropna()
data.columns = [i for i in range(31)]
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,21,22,23,24,25,26,27,28,29,30
1,北京市,13023990.0,6788237.0,6235753.0,581637.0,127773.0,453864.0,47255.0,13966.0,33289.0,...,511935.0,1029929.0,522487.0,507442.0,1082268.0,621697.0,460571.0,172653.0,114011.0,58642.0
2,天津市,9381201.0,4768665.0,4612536.0,513821.0,118661.0,395160.0,52641.0,15108.0,37533.0,...,411906.0,506548.0,273693.0,232855.0,355426.0,210338.0,145088.0,24040.0,15430.0,8610.0
3,河北省,62335115.0,31608495.0,30726620.0,3746461.0,1318509.0,2427952.0,1391599.0,567625.0,823974.0,...,965771.0,1241219.0,694453.0,546766.0,539833.0,330894.0,208939.0,18020.0,12153.0,5867.0
4,山西省,29741362.0,15367303.0,14374059.0,1543364.0,494072.0,1049292.0,367239.0,125764.0,241475.0,...,571939.0,784075.0,449507.0,334568.0,314548.0,195122.0,119426.0,12452.0,7946.0,4506.0
5,内蒙古自治区,21834015.0,11283138.0,10550877.0,2186638.0,702262.0,1484376.0,166045.0,62535.0,103510.0,...,424856.0,648891.0,363494.0,285397.0,234087.0,141821.0,92266.0,6851.0,4368.0,2483.0
7,辽宁省,39632542.0,20160932.0,19471610.0,2117529.0,585083.0,1532446.0,171572.0,54933.0,116639.0,...,777309.0,1692445.0,923053.0,769392.0,845654.0,513277.0,332377.0,47251.0,30942.0,16309.0
8,吉林省,25474107.0,13022912.0,12451195.0,1342416.0,435421.0,906995.0,189862.0,69127.0,120735.0,...,512322.0,813292.0,451474.0,361818.0,486979.0,290865.0,196114.0,21600.0,13499.0,8101.0
9,黑龙江省,34329762.0,17527645.0,16802117.0,1968874.0,612375.0,1356499.0,147023.0,52682.0,94341.0,...,613537.0,1199694.0,665200.0,534494.0,517847.0,313571.0,204276.0,24013.0,15856.0,8157.0
11,上海市,15806710.0,8115345.0,7691365.0,873696.0,176195.0,697501.0,216143.0,49493.0,166650.0,...,586702.0,934083.0,537926.0,396157.0,784698.0,493142.0,291556.0,76188.0,53249.0,22939.0
12,江苏省,69220051.0,34873894.0,34346157.0,4585521.0,1036621.0,3548900.0,1638610.0,396757.0,1241853.0,...,1027994.0,1857521.0,1168251.0,689270.0,946584.0,644092.0,302492.0,58165.0,42855.0,15310.0


In [20]:
area_list_full = data[0].values.tolist()
area_list = [area[:-1] if area not in change_dict.keys() else change_dict[area] for area in area_list_full]
no_school = data[4].values.tolist()
literacy = data[7].values.tolist()
primary = data[10].values.tolist()
middle = data[13].values.tolist()
high = data[16].values.tolist()
tech = data[19].values.tolist()
junior = data[22].values.tolist()
university = data[25].values.tolist()
MBA = data[28].values.tolist()

In [21]:
c = (
    Map()
    .add(
        "no_school",
        [list(z) for z in zip(area_list, no_school)],
        "china"
    )
    .add(
        "literacy",
        [list(z) for z in zip(area_list, literacy)],
        "china"
    )
    .add(
        "primary",
        [list(z) for z in zip(area_list, primary)],
        "china"
    )
    .add(
        "middle",
        [list(z) for z in zip(area_list, middle)],
        "china"
    )
    .add(
        "high",
        [list(z) for z in zip(area_list, high)],
        "china"
    )
    .add(
        "tech",
        [list(z) for z in zip(area_list, tech)],
        "china"
    )
    .add(
        "junior",
        [list(z) for z in zip(area_list, junior)],
        "china"
    )
    .add(
        "university",
        [list(z) for z in zip(area_list, university)],
        "china"
    )
    .add(
        "MBA",
        [list(z) for z in zip(area_list, MBA)],
        "china"
    )
#     .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
    .set_global_opts(
        visualmap_opts=opts.VisualMapOpts(max_ = 20000000,is_piecewise=True),
        title_opts=opts.TitleOpts(title="2000"),
    )
)
c.render_notebook()

In [22]:
def draw_bar(area_list, value_list, max_num = 1000000):
    sorted_area,sorted_out = change_area(area_list,value_list)
    bar = (
            Bar(init_opts=opts.InitOpts(bg_color='rgba(51,51,51)'))
            .add_xaxis(xaxis_data=sorted_area)
            .add_yaxis(
                series_name="",
                y_axis=sorted_out,
                label_opts=opts.LabelOpts(
                    is_show=True, position="right", formatter="{b} : {c}"
                ),
            )
            .reversal_axis()
            .set_global_opts(
                xaxis_opts=opts.AxisOpts(
                    max_=max_num, min_ = 0, axislabel_opts=opts.LabelOpts(is_show=False)
                ),
                yaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(is_show=False)),
                tooltip_opts=opts.TooltipOpts(is_show=False),
                visualmap_opts=opts.VisualMapOpts(
                    is_calculable=True,
                    dimension=0,
                    pos_left="10",
                    pos_top="top",
                    range_text=["High", "Low"],
                    range_color=["lightskyblue", "yellow", "orangered"],
                    textstyle_opts=opts.TextStyleOpts(color="#ddd"),
                    min_=0,
                    max_=max_num,
                ),
            )
        )

    return bar

In [23]:
bar = draw_bar(area_list, no_school, 8000000)
bar.render_notebook()

In [76]:
bar = draw_bar(area_list, literacy, 2000000)
bar.render_notebook()

In [78]:
bar = draw_bar(area_list, primary, 40000000)
bar.render_notebook()

In [80]:
bar = draw_bar(area_list, middle, 40000000)
bar.render_notebook()

In [83]:
bar = draw_bar(area_list, high, 9000000)
bar.render_notebook()

In [86]:
bar = draw_bar(area_list, tech, 4000000)
bar.render_notebook()

In [87]:
bar = draw_bar(area_list, junior, 2000000)
bar.render_notebook()

In [89]:
bar = draw_bar(area_list, university, 2000000)
bar.render_notebook()

In [90]:
bar = draw_bar(area_list, MBA, 2000000)
bar.render_notebook()

### (3) 住房问题

In [91]:
full_view_path = root_path + 't0114.xls'
data = pd.read_excel(io=full_view_path,header=6).dropna()
data.columns = [i for i in range(5)]
data

Unnamed: 0,0,1,2,3,4
1,北京市,4096844.0,11922945.0,2.75,21.03
2,天津市,2976741.0,9218220.0,2.23,19.09
3,河北省,17934977.0,64296922.0,3.29,22.29
4,山西省,8650261.0,31448836.0,3.26,19.28
5,内蒙古自治区,6784470.0,22618741.0,1.9,16.38
7,辽宁省,12866262.0,40597750.0,2.08,19.67
8,吉林省,7848446.0,26076948.0,2.03,17.88
9,黑龙江省,10955750.0,35457466.0,1.84,17.53
11,上海市,5299068.0,14787225.0,2.1,24.0
12,江苏省,21375726.0,69372929.0,2.49,29.17


In [92]:
area_list_full = data[0].values.tolist()
area_list = [area[:-1] if area not in change_dict.keys() else change_dict[area] for area in area_list_full]
average_house = data[3].values.tolist()
average_area = data[4].values.tolist()

In [94]:
c = (
    Map()
    .add(
        "average_house",
        [list(z) for z in zip(area_list, average_house)],
        "china"
    )
#     .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
    .set_global_opts(
        visualmap_opts=opts.VisualMapOpts(max_ = 4,is_piecewise=True),
        title_opts=opts.TitleOpts(title="Average_household_Map_For_2000"),
    )
)
c.render_notebook()

In [97]:
c = (
    Map()
    .add(
        "average_housing_area",
        [list(z) for z in zip(area_list, average_area)],
        "china"
    )
#     .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
    .set_global_opts(
        visualmap_opts=opts.VisualMapOpts(max_ = 40,is_piecewise=True),
        title_opts=opts.TitleOpts(title="Average_housing_area_Map_For_2000"),
    )
)
c.render_notebook()