In [276]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

from pylab import rcParams ##matplotlib
rcParams['figure.figsize'] = 12, 8

from pyecharts import options as opts
from pyecharts.charts import Bar, Line, Pie, Map, Page, ThemeRiver, Timeline, Grid
from pyecharts.commons.utils import JsCode

In [277]:
train_sale = pd.read_csv('data/train_sales_data.csv')
train_search = pd.read_csv('data/train_search_data.csv')
train_usr = pd.read_csv('data/train_user_reply_data.csv')

In [278]:
train_sale_search = train_sale.merge(
    train_search, on=['province','adcode','regYear','regMonth','model'],how='left')


In [279]:
model_tag = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N',
            'O','P','Q','R','S','T','U','V','W','X','Y','Z',
             'AA','BB','CC','DD','EE','FF','GG','HH','II','JJ','KK','LL',
            'MM','NN','OO','PP','QQ','RR','SS','TT','UU','VV','WW','XX','YY','ZZ',
            'ABC','DEF','HIJ','KLM','OPQ','RST','UVW','XYZ']
model_dict = {x:y for x, y in zip(set(train_sale_search['model']), model_tag)}
train_sale_search['model'] = train_sale_search['model'].map(model_dict)

In [482]:
train_usr['model'] = train_usr['model'].map(model_dict)

In [5]:
train_sale_search.head()

Unnamed: 0,province,adcode,model,bodyType,regYear,regMonth,salesVolume,popularity
0,上海,310000,F,SUV,2016,1,292,1479
1,云南,530000,F,SUV,2016,1,466,1594
2,内蒙古,150000,F,SUV,2016,1,257,1479
3,北京,110000,F,SUV,2016,1,408,2370
4,四川,510000,F,SUV,2016,1,610,3562


In [6]:
train_usr.head()

Unnamed: 0,model,regYear,regMonth,carCommentVolum,newsReplyVolum
0,02aab221aabc03b9,2016,1,132,399
1,02aab221aabc03b9,2016,2,160,3043
2,02aab221aabc03b9,2016,3,357,798
3,02aab221aabc03b9,2016,4,243,3821
4,02aab221aabc03b9,2016,5,283,933


## 各省总销售量

In [None]:
province_sale = train_sale.groupby(['province'],as_index=False)['salesVolume'].sum()

def pop_timeline_map() -> Timeline:

    c = (
        Map()
        .add(
            "", [list(z) for z in zip(
                province_sale.province, province_sale.salesVolume
            )], "china"
        )
        .set_global_opts(
            title_opts=opts.TitleOpts(title="各省总销售量",subtitle='https://lambda-xmu.club'),
            toolbox_opts=opts.ToolboxOpts(),
            visualmap_opts=opts.VisualMapOpts(max_=2145301,min_=358664),
        )
    )
    return c

In [273]:
c = pop_timeline_map()
c.render_notebook()

## 全国汽车月销售量和搜索量

In [316]:
def sale_pop_bar_line(data, column1, column2, title) -> Bar:
    bar = (
        Bar()
        .add_xaxis(list(range(1,13)))
        .add_yaxis("2016 购买量", list(data[data['regYear']==2016][column1]), gap="0%")
        .add_yaxis("2017 购买量", list(data[data['regYear']==2017][column1]), gap="0%")
        .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
        .extend_axis(
            yaxis=opts.AxisOpts()
        )
        .set_global_opts(
            title_opts=opts.TitleOpts(title=title,subtitle='https://lambda-xmu.club'),
            yaxis_opts=opts.AxisOpts(
                axislabel_opts=opts.LabelOpts(formatter="{value} /量")
            ),
            xaxis_opts=opts.AxisOpts(
                axislabel_opts=opts.LabelOpts(formatter="{value} /月")
            ),
            toolbox_opts=opts.ToolboxOpts(),
        ) 
    )
    line = (
        Line()
        .add_xaxis(list(range(13)))
        .add_yaxis("2017 搜索量", list(data[data['regYear']==2016][column2]), yaxis_index=1)
        .add_yaxis("2016 搜索量", list(data[data['regYear']==2017][column2]), yaxis_index=1)
    )
    bar.overlap(line)
    return bar

In [148]:
pop_sale = train_sale_search.groupby(['regYear','regMonth'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_line(pop_sale, 'salesVolume', 'popularity', '全国汽车月销售量与搜索量')
c.render_notebook()

## 车型销售量占比

In [451]:
def pie_rosetype_sale(data, column, title) -> Pie:
    v = list(set(data['model']))
    c = (
        Pie()
        .add(
            "2016年",
            [list(z) for z in zip(
                v, list(data[data['regYear']==2016][column]))],
            radius=["30%", "75%"],
            center=["25%", "50%"],
            rosetype="radius",
            label_opts=opts.LabelOpts(is_show=False),
        )
        .add(
            "2017年",
            [list(z) for z in zip(
                v, list(data[data['regYear']==2017][column]))],
            radius=["30%", "75%"],
            center=["75%", "50%"],
            rosetype="radius",
            label_opts=opts.LabelOpts(is_show=False),
        )
        .set_global_opts(
            title_opts=opts.TitleOpts(title=title,subtitle='https://lambda-xmu.club'),
            legend_opts=opts.LegendOpts(type_="", pos_top="80%"),
            toolbox_opts=opts.ToolboxOpts(),
        )
    )
    return c

In [452]:
model_sale = train_sale_search.groupby(['model','regYear'],as_index=False)['salesVolume'].sum()

c = pie_rosetype_sale(model_sale, 'salesVolume', '各品牌汽车销售占比（2016 & 2017）')
c.render_notebook()

In [453]:
def pie_rosetype_sale(data, column, title) -> Pie:
    v = list(set(data['bodyType']))
    c = (
        Pie()
        .add(
            "2016年",
            [list(z) for z in zip(
                v, list(data[data['regYear']==2016][column]))],
            radius=["30%", "75%"],
            center=["25%", "50%"],
            rosetype="radius",
            label_opts=opts.LabelOpts(is_show=False),
        )
        .add(
            "2017年",
            [list(z) for z in zip(
                v, list(data[data['regYear']==2017][column]))],
            radius=["30%", "75%"],
            center=["75%", "50%"],
            rosetype="radius",
            label_opts=opts.LabelOpts(is_show=False),
        )
        .set_global_opts(
            title_opts=opts.TitleOpts(title=title,subtitle='https://lambda-xmu.club'),
            legend_opts=opts.LegendOpts(type_="", pos_top="90%"),
            toolbox_opts=opts.ToolboxOpts(),
        )
    )
    return c

In [454]:
model_sale = train_sale_search.groupby(['bodyType','regYear'],as_index=False)['salesVolume'].sum()

c = pie_rosetype_sale(model_sale, 'salesVolume', '各类型汽车销售占比（2016 & 2017）')
c.render_notebook()

## 各省份销售情况

In [491]:
def themeriver_plot(data, column, title) -> ThemeRiver:
    c = (
        ThemeRiver(
            init_opts = opts.InitOpts(height = '800px',width='1000px')
        )
        .add(
            list(data['province']),
            [list(z) for z in zip(data['regYear'].map(lambda x: str(x))+ '/' +data['regMonth'].map(lambda x: str(x)), data[column], data['province'])],
            singleaxis_opts=opts.SingleAxisOpts(name='时间',type_="time", pos_bottom="8%"),
        )
        .set_series_opts(label_opts=opts.LabelOpts(is_show=True,font_size=10,margin=30))
        .set_global_opts(
            title_opts=opts.TitleOpts(title=title,subtitle='https://lambda-xmu.club'),
            legend_opts=opts.LegendOpts(type_="plain", pos_top="7%"),
            toolbox_opts=opts.ToolboxOpts(),
        )
    )
    return c

In [492]:
province_year_month_sale = train_sale_search.groupby(
    ['province','regYear','regMonth'],as_index=False)['salesVolume'].sum()

c = themeriver_plot(province_year_month_sale, 'salesVolume', '汽车销售量')
c.render_notebook()

In [493]:
year_month_pop = train_sale_search.groupby(['province','regYear','regMonth'],as_index=False)['popularity'].sum()

c = themeriver_plot(year_month_pop, 'popularity', '汽车搜索量')
c.render_notebook()

In [73]:
pop_sale = train_sale_search[train_sale_search['province']=='福建'].groupby(['regYear','regMonth'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_line(pop_sale, 'salesVolume', 'popularity','福建汽车月销售量与搜索量')
c.render_notebook()

In [74]:
pop_sale = train_sale_search[train_sale_search['province']=='山东'].groupby(['regYear','regMonth'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_line(pop_sale, 'salesVolume', 'popularity','山东汽车月销售量与搜索量')
c.render_notebook()

In [75]:
pop_sale = train_sale_search[train_sale_search['province']=='黑龙江'].groupby(['regYear','regMonth'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_line(pop_sale, 'salesVolume', 'popularity','黑龙江汽车月销售量与搜索量')
c.render_notebook()

In [76]:
pop_sale = train_sale_search[train_sale_search['province']=='江西'].groupby(['regYear','regMonth'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_line(pop_sale, 'salesVolume', 'popularity','江西汽车月销售量与搜索量')
c.render_notebook()

In [77]:
pop_sale = train_sale_search[train_sale_search['province']=='湖南'].groupby(['regYear','regMonth'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_line(pop_sale, 'salesVolume', 'popularity','湖南汽车月销售量与搜索量')
c.render_notebook()

In [78]:
pop_sale = train_sale_search[train_sale_search['province']=='上海'].groupby(['regYear','regMonth'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_line(pop_sale, 'salesVolume', 'popularity','上海汽车月销售量与搜索量')
c.render_notebook()

In [79]:
pop_sale = train_sale_search[train_sale_search['province']=='河南'].groupby(['regYear','regMonth'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_line(pop_sale, 'salesVolume', 'popularity','河南汽车月销售量与搜索量')
c.render_notebook()

In [80]:
pop_sale = train_sale_search[train_sale_search['province']=='陕西'].groupby(['regYear','regMonth'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_line(pop_sale, 'salesVolume', 'popularity','陕西汽车月销售量与搜索量')
c.render_notebook()

In [81]:
pop_sale = train_sale_search[train_sale_search['province']=='湖北'].groupby(['regYear','regMonth'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_line(pop_sale, 'salesVolume', 'popularity','湖北汽车月销售量与搜索量')
c.render_notebook()

In [82]:
pop_sale = train_sale_search[train_sale_search['province']=='广西'].groupby(['regYear','regMonth'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_line(pop_sale, 'salesVolume', 'popularity','广西汽车月销售量与搜索量')
c.render_notebook()

In [83]:
pop_sale = train_sale_search[train_sale_search['province']=='山西'].groupby(['regYear','regMonth'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_line(pop_sale, 'salesVolume', 'popularity','山西汽车月销售量与搜索量')
c.render_notebook()

In [84]:
pop_sale = train_sale_search[train_sale_search['province']=='四川'].groupby(['regYear','regMonth'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_line(pop_sale, 'salesVolume', 'popularity','四川汽车月销售量与搜索量')
c.render_notebook()

In [85]:
pop_sale = train_sale_search[train_sale_search['province']=='安徽'].groupby(['regYear','regMonth'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_line(pop_sale, 'salesVolume', 'popularity','安徽汽车月销售量与搜索量')
c.render_notebook()

In [86]:
pop_sale = train_sale_search[train_sale_search['province']=='河北'].groupby(['regYear','regMonth'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_line(pop_sale, 'salesVolume', 'popularity','河北汽车月销售量与搜索量')
c.render_notebook()

In [87]:
pop_sale = train_sale_search[train_sale_search['province']=='浙江'].groupby(['regYear','regMonth'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_line(pop_sale, 'salesVolume', 'popularity','浙江汽车月销售量与搜索量')
c.render_notebook()

In [88]:
pop_sale = train_sale_search[train_sale_search['province']=='内蒙古'].groupby(['regYear','regMonth'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_line(pop_sale, 'salesVolume', 'popularity','内蒙古汽车月销售量与搜索量')
c.render_notebook()

In [89]:
pop_sale = train_sale_search[train_sale_search['province']=='重庆'].groupby(['regYear','regMonth'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_line(pop_sale, 'salesVolume', 'popularity','重庆汽车月销售量与搜索量')
c.render_notebook()

In [90]:
pop_sale = train_sale_search[train_sale_search['province']=='云南'].groupby(['regYear','regMonth'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_line(pop_sale, 'salesVolume', 'popularity','云南汽车月销售量与搜索量')
c.render_notebook()

In [91]:
pop_sale = train_sale_search[train_sale_search['province']=='辽宁'].groupby(['regYear','regMonth'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_line(pop_sale, 'salesVolume', 'popularity','辽宁汽车月销售量与搜索量')
c.render_notebook()

In [92]:
pop_sale = train_sale_search[train_sale_search['province']=='北京'].groupby(['regYear','regMonth'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_line(pop_sale, 'salesVolume', 'popularity','北京汽车月销售量与搜索量')
c.render_notebook()

In [93]:
pop_sale = train_sale_search[train_sale_search['province']=='江苏'].groupby(['regYear','regMonth'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_line(pop_sale, 'salesVolume', 'popularity','江苏汽车月销售量与搜索量')
c.render_notebook()

In [94]:
pop_sale = train_sale_search[train_sale_search['province']=='广东'].groupby(['regYear','regMonth'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_line(pop_sale, 'salesVolume', 'popularity','广东汽车月销售量与搜索量')
c.render_notebook()

## 每个省销售量占比情况

In [167]:
def timeline_pie(data) -> Timeline:
    attr = list(set(data['province']))
    tl = Timeline().add_schema(is_auto_play=True)
    
    for i in range(2016, 2018):
        for j in range(1, 13):
            sub_data = data[(data['regYear']==i)&(data['regMonth']==j)]
            pie = (
                Pie()
                .add(
                    "",
                    [list(z) for z in zip(attr, list(sub_data['salesVolume']))],
                    rosetype="radius",
                    radius=["30%", "55%"],
                )
                .set_global_opts(
                    title_opts=opts.TitleOpts(
                        title="各省份{}年{}月汽车销售量占比".format(i,j),
                        subtitle='https://lambda-xmu.club'
                    ),
                    legend_opts=opts.LegendOpts(is_show=False,type_="plain", pos_top="9%"),
                    toolbox_opts=opts.ToolboxOpts(),
                )
            )
            tl.add(pie, "{}年{}月".format(i,j))
    return tl

In [168]:

c = timeline_pie(province_year_month_sale)
c.render_notebook()

In [106]:
province_year_month_sale['sale_sum'] = province_year_month_sale.groupby(['regYear','regMonth'],as_index=False)['salesVolume'].transform('sum')
province_year_month_sale['sale_per'] = province_year_month_sale['salesVolume']/province_year_month_sale['sale_sum']


In [196]:
def line_sale(data) -> Line:
    month = ['2016/'+str(i) for i in range(1,13)] + ['2017/'+str(i) for i in range(1,13)]
    c = (
        Line()
        .add_xaxis(month)
        .add_yaxis("上海", list(data[data['province']=='上海']['sale_per']))
        .add_yaxis("广东", list(data[data['province']=='广东']['sale_per']))
        .add_yaxis("广西", list(data[data['province']=='广西']['sale_per']))
        .add_yaxis("黑龙江", list(data[data['province']=='黑龙江']['sale_per']))
        .add_yaxis("浙江", list(data[data['province']=='浙江']['sale_per']))
        .add_yaxis("福建", list(data[data['province']=='福建']['sale_per']))
        .add_yaxis("重庆", list(data[data['province']=='重庆']['sale_per']))
        .add_yaxis("山西", list(data[data['province']=='山西']['sale_per']))
        .add_yaxis("辽宁", list(data[data['province']=='辽宁']['sale_per']))
        .add_yaxis("北京", list(data[data['province']=='北京']['sale_per']))
        .add_yaxis("山东", list(data[data['province']=='山东']['sale_per']))
        .add_yaxis("安徽", list(data[data['province']=='安徽']['sale_per']))
        .add_yaxis("云南", list(data[data['province']=='云南']['sale_per']))
        .add_yaxis("湖南", list(data[data['province']=='湖南']['sale_per']))
        .add_yaxis("河北", list(data[data['province']=='河北']['sale_per']))
        .add_yaxis("陕西", list(data[data['province']=='陕西']['sale_per']))
        .add_yaxis("河南", list(data[data['province']=='河南']['sale_per']))
        .add_yaxis("内蒙古", list(data[data['province']=='内蒙古']['sale_per']))
        .add_yaxis("江苏", list(data[data['province']=='江苏']['sale_per']))
        .add_yaxis("江西", list(data[data['province']=='江西']['sale_per']))
        .add_yaxis("湖北", list(data[data['province']=='湖北']['sale_per']))
        .add_yaxis("四川", list(data[data['province']=='四川']['sale_per']))
        .set_global_opts(
            title_opts=opts.TitleOpts(title="各省销售量占比情况",subtitle='https://lambda-xmu.club'),
            legend_opts=opts.LegendOpts(pos_left='20%'),
            toolbox_opts=opts.ToolboxOpts(),
            xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-20)),
        )
        .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
        
    )
    return c

In [197]:
c = line_sale(province_year_month_sale)
c.render_notebook()

In [245]:
def line_year_sale(data) -> Line:
    month = range(1,14)
    c = (
        Line()
        .add_xaxis(month)
        .add_yaxis("上海 2016", list(data[(data['province']=='上海')&(data['regYear']==2016)]['sale_per']))
        .add_yaxis("上海 2017", list(data[(data['province']=='上海')&(data['regYear']==2017)]['sale_per']))
        .add_yaxis("广东 2016", list(data[(data['province']=='广东')&(data['regYear']==2016)]['sale_per']))
        .add_yaxis("广东 2017", list(data[(data['province']=='广东')&(data['regYear']==2017)]['sale_per']))
        .add_yaxis("广西 2016", list(data[(data['province']=='广西')&(data['regYear']==2016)]['sale_per']))
        .add_yaxis("广西 2017", list(data[(data['province']=='广西')&(data['regYear']==2017)]['sale_per']))
        .add_yaxis("黑龙江 2016", list(data[(data['province']=='黑龙江')&(data['regYear']==2016)]['sale_per']))
        .add_yaxis("黑龙江 2017", list(data[(data['province']=='黑龙江')&(data['regYear']==2017)]['sale_per']))
        .add_yaxis("浙江 2016", list(data[(data['province']=='浙江')&(data['regYear']==2016)]['sale_per']))
        .add_yaxis("浙江 2017", list(data[(data['province']=='浙江')&(data['regYear']==2017)]['sale_per']))
        .add_yaxis("福建 2016", list(data[(data['province']=='福建')&(data['regYear']==2016)]['sale_per']))
        .add_yaxis("福建 2017", list(data[(data['province']=='福建')&(data['regYear']==2017)]['sale_per']))
        .add_yaxis("重庆 2016", list(data[(data['province']=='重庆')&(data['regYear']==2016)]['sale_per']))
        .add_yaxis("重庆 2017", list(data[(data['province']=='重庆')&(data['regYear']==2017)]['sale_per']))
        .add_yaxis("山西 2016", list(data[(data['province']=='山西')&(data['regYear']==2016)]['sale_per']))
        .add_yaxis("山西 2017", list(data[(data['province']=='山西')&(data['regYear']==2017)]['sale_per']))
        .add_yaxis("辽宁 2016", list(data[(data['province']=='辽宁')&(data['regYear']==2016)]['sale_per']))
        .add_yaxis("辽宁 2017", list(data[(data['province']=='辽宁')&(data['regYear']==2017)]['sale_per']))
        .add_yaxis("北京 2016", list(data[(data['province']=='北京')&(data['regYear']==2016)]['sale_per']))
        .add_yaxis("北京 2017", list(data[(data['province']=='北京')&(data['regYear']==2017)]['sale_per']))
        .add_yaxis("山东 2016", list(data[(data['province']=='山东')&(data['regYear']==2016)]['sale_per']))
        .add_yaxis("山东 2017", list(data[(data['province']=='山东')&(data['regYear']==2017)]['sale_per']))
        .add_yaxis("安徽 2016", list(data[(data['province']=='安徽')&(data['regYear']==2016)]['sale_per']))
        .add_yaxis("安徽 2017", list(data[(data['province']=='安徽')&(data['regYear']==2017)]['sale_per']))
        .add_yaxis("云南 2016", list(data[(data['province']=='云南')&(data['regYear']==2016)]['sale_per']))
        .add_yaxis("云南 2017", list(data[(data['province']=='云南')&(data['regYear']==2017)]['sale_per']))
        .add_yaxis("湖南 2016", list(data[(data['province']=='湖南')&(data['regYear']==2016)]['sale_per']))
        .add_yaxis("湖南 2017", list(data[(data['province']=='湖南')&(data['regYear']==2017)]['sale_per']))
        .add_yaxis("河北 2016", list(data[(data['province']=='河北')&(data['regYear']==2016)]['sale_per']))
        .add_yaxis("河北 2017", list(data[(data['province']=='河北')&(data['regYear']==2017)]['sale_per']))
        .add_yaxis("陕西 2016", list(data[(data['province']=='陕西')&(data['regYear']==2016)]['sale_per']))
        .add_yaxis("陕西 2017", list(data[(data['province']=='陕西')&(data['regYear']==2017)]['sale_per']))
        .add_yaxis("河南 2016", list(data[(data['province']=='河南')&(data['regYear']==2016)]['sale_per']))
        .add_yaxis("河南 2017", list(data[(data['province']=='河南')&(data['regYear']==2017)]['sale_per']))
        .add_yaxis("内蒙古 2016", list(data[(data['province']=='内蒙古')&(data['regYear']==2016)]['sale_per']))
        .add_yaxis("内蒙古 2017", list(data[(data['province']=='内蒙古')&(data['regYear']==2017)]['sale_per']))
        .add_yaxis("江苏 2016", list(data[(data['province']=='江苏')&(data['regYear']==2016)]['sale_per']))
        .add_yaxis("江苏 2017", list(data[(data['province']=='江苏')&(data['regYear']==2017)]['sale_per']))
        .add_yaxis("江西 2016", list(data[(data['province']=='江西')&(data['regYear']==2016)]['sale_per']))
        .add_yaxis("江西 2017", list(data[(data['province']=='江西')&(data['regYear']==2017)]['sale_per']))
        .add_yaxis("湖北 2016", list(data[(data['province']=='湖北')&(data['regYear']==2016)]['sale_per']))
        .add_yaxis("湖北 2017", list(data[(data['province']=='湖北')&(data['regYear']==2017)]['sale_per']))
        .add_yaxis("四川 2016", list(data[(data['province']=='四川')&(data['regYear']==2016)]['sale_per']))
        .add_yaxis("四川 2017", list(data[(data['province']=='四川')&(data['regYear']==2017)]['sale_per']))
        .set_global_opts(
            title_opts=opts.TitleOpts(title="各省销售量占比情况",subtitle='https://lambda-xmu.club'),
            legend_opts=opts.LegendOpts(pos_left='20%'),
            xaxis_opts=opts.AxisOpts(
                axislabel_opts=opts.LabelOpts(formatter="{value} /月")
            )
#             legend_opts=opts.LegendOpts(type_='scroll',pos_top='95%'),
#             toolbox_opts=opts.ToolboxOpts(),
#             xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-20)),
        )
        .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
        
    )
    return c

In [246]:
c = line_year_sale(province_year_month_sale)
c.render_notebook()

In [378]:
def sale_pop_bar_stack_line(data, column1, column2, title, max_) -> Bar:
    bar = (
        Bar()
        .add_xaxis(list(range(1,13)))
        .add_yaxis("2016 Sedan 购买量", list(data[(data['regYear']==2016)&(data['bodyType']=='Sedan')][column1]), gap="0%", stack="stack1")
        .add_yaxis("2016 SUV 购买量", list(data[(data['regYear']==2016)&(data['bodyType']=='SUV')][column1]), gap="0%", stack="stack1")
        .add_yaxis("2016 MPV 购买量", list(data[(data['regYear']==2016)&(data['bodyType']=='MPV')][column1]), gap="0%", stack="stack1")
        .add_yaxis("2016 Hatchback 购买量", list(data[(data['regYear']==2016)&(data['bodyType']=='Hatchback')][column1]), gap="0%", stack="stack1")
        .add_yaxis("2017 Sedan 购买量", list(data[(data['regYear']==2017)&(data['bodyType']=='Sedan')][column1]), gap="0%", stack="stack2")
        .add_yaxis("2017 SUV 购买量", list(data[(data['regYear']==2017)&(data['bodyType']=='SUV')][column1]), gap="0%", stack="stack2")
        .add_yaxis("2017 MPV 购买量", list(data[(data['regYear']==2017)&(data['bodyType']=='MPV')][column1]), gap="0%", stack="stack2")
        .add_yaxis("2017 Hatchback 购买量", list(data[(data['regYear']==2017)&(data['bodyType']=='Hatchback')][column1]), gap="0%", stack="stack2")
#         .add_yaxis("2017 购买量", list(data[data['regYear']==2017][column1]), gap="0%")
        .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
        .extend_axis(
            yaxis=opts.AxisOpts()
        )
        .set_global_opts(
            title_opts=opts.TitleOpts(title=title,subtitle='https://lambda-xmu.club'),
            legend_opts=opts.LegendOpts(pos_left='30%'),
            yaxis_opts=opts.AxisOpts(
                axislabel_opts=opts.LabelOpts(formatter="{value} /量"),
                max_= max_,
            ),
            xaxis_opts=opts.AxisOpts(
                axislabel_opts=opts.LabelOpts(formatter="{value} /月")
            ),
            toolbox_opts=opts.ToolboxOpts(),
        ) 
    )
    
    tmp_data = data.groupby(['regYear','regMonth'],as_index=False)['popularity','salesVolume'].sum()
    line = (
        Line()
        .add_xaxis(list(range(13)))
        .add_yaxis("2017 搜索量", list(tmp_data[tmp_data['regYear']==2016][column2]), yaxis_index=1)
        .add_yaxis("2016 搜索量", list(tmp_data[tmp_data['regYear']==2017][column2]), yaxis_index=1)
    )
    bar.overlap(line)
    return bar

In [379]:
pop_type_sale = train_sale_search.groupby(['regYear','regMonth','bodyType'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_stack_line(pop_type_sale, 'salesVolume', 'popularity', '全国汽车月销售量与搜索量stack', max_=1400000)
c.render_notebook()

In [380]:
pop_type_sale = train_sale_search[train_sale_search['province']=='福建'].groupby(['regYear','regMonth','bodyType'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_stack_line(pop_type_sale, 'salesVolume', 'popularity', '福建汽车月销售量与搜索量stack', 35000)
c.render_notebook()

In [381]:
pop_type_sale = train_sale_search[train_sale_search['province']=='山东'].groupby(['regYear','regMonth','bodyType'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_stack_line(pop_type_sale, 'salesVolume', 'popularity', '山东汽车月销售量与搜索量stack', 120000)
c.render_notebook()

In [382]:
pop_type_sale = train_sale_search[train_sale_search['province']=='湖南'].groupby(['regYear','regMonth','bodyType'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_stack_line(pop_type_sale, 'salesVolume', 'popularity', '湖南汽车月销售量与搜索量stack', 60000)
c.render_notebook()

In [383]:
pop_type_sale = train_sale_search[train_sale_search['province']=='江西'].groupby(['regYear','regMonth','bodyType'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_stack_line(pop_type_sale, 'salesVolume', 'popularity', '江西汽车月销售量与搜索量stack', 40000)
c.render_notebook()

In [384]:
pop_type_sale = train_sale_search[train_sale_search['province']=='湖北'].groupby(['regYear','regMonth','bodyType'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_stack_line(pop_type_sale, 'salesVolume', 'popularity', '湖北汽车月销售量与搜索量stack', 60000)
c.render_notebook()

In [385]:
pop_type_sale = train_sale_search[train_sale_search['province']=='四川'].groupby(['regYear','regMonth','bodyType'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_stack_line(pop_type_sale, 'salesVolume', 'popularity', '四川汽车月销售量与搜索量stack', 70000)
c.render_notebook()

In [386]:
pop_type_sale = train_sale_search[train_sale_search['province']=='江苏'].groupby(['regYear','regMonth','bodyType'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_stack_line(pop_type_sale, 'salesVolume', 'popularity', '江苏汽车月销售量与搜索量stack', 120000)
c.render_notebook()

In [387]:
pop_type_sale = train_sale_search[train_sale_search['province']=='安徽'].groupby(['regYear','regMonth','bodyType'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_stack_line(pop_type_sale, 'salesVolume', 'popularity', '安徽汽车月销售量与搜索量stack', 60000)
c.render_notebook()

In [388]:
pop_type_sale = train_sale_search[train_sale_search['province']=='内蒙古'].groupby(['regYear','regMonth','bodyType'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_stack_line(pop_type_sale, 'salesVolume', 'popularity', '内蒙古汽车月销售量与搜索量stack', 25000)
c.render_notebook()

In [389]:
pop_type_sale = train_sale_search[train_sale_search['province']=='北京'].groupby(['regYear','regMonth','bodyType'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_stack_line(pop_type_sale, 'salesVolume', 'popularity', '北京汽车月销售量与搜索量stack', 50000)
c.render_notebook()

In [390]:
pop_type_sale = train_sale_search[train_sale_search['province']=='浙江'].groupby(['regYear','regMonth','bodyType'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_stack_line(pop_type_sale, 'salesVolume', 'popularity', '浙江汽车月销售量与搜索量stack', 100000)
c.render_notebook()

In [391]:
pop_type_sale = train_sale_search[train_sale_search['province']=='重庆'].groupby(['regYear','regMonth','bodyType'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_stack_line(pop_type_sale, 'salesVolume', 'popularity', '重庆汽车月销售量与搜索量stack', 30000)
c.render_notebook()

In [392]:
pop_type_sale = train_sale_search[train_sale_search['province']=='云南'].groupby(['regYear','regMonth','bodyType'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_stack_line(pop_type_sale, 'salesVolume', 'popularity', '云南汽车月销售量与搜索量stack', 40000)
c.render_notebook()

In [393]:
pop_type_sale = train_sale_search[train_sale_search['province']=='陕西'].groupby(['regYear','regMonth','bodyType'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_stack_line(pop_type_sale, 'salesVolume', 'popularity', '陕西汽车月销售量与搜索量stack', 50000)
c.render_notebook()

In [394]:
pop_type_sale = train_sale_search[train_sale_search['province']=='上海'].groupby(['regYear','regMonth','bodyType'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_stack_line(pop_type_sale, 'salesVolume', 'popularity', '上海汽车月销售量与搜索量stack', 40000)
c.render_notebook()

In [395]:
pop_type_sale = train_sale_search[train_sale_search['province']=='河南'].groupby(['regYear','regMonth','bodyType'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_stack_line(pop_type_sale, 'salesVolume', 'popularity', '河南汽车月销售量与搜索量stack', 120000)
c.render_notebook()

In [396]:
pop_type_sale = train_sale_search[train_sale_search['province']=='广西'].groupby(['regYear','regMonth','bodyType'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_stack_line(pop_type_sale, 'salesVolume', 'popularity', '广西汽车月销售量与搜索量stack', 40000)
c.render_notebook()

In [397]:
pop_type_sale = train_sale_search[train_sale_search['province']=='山西'].groupby(['regYear','regMonth','bodyType'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_stack_line(pop_type_sale, 'salesVolume', 'popularity', '山西汽车月销售量与搜索量stack', 40000)
c.render_notebook()

In [398]:
pop_type_sale = train_sale_search[train_sale_search['province']=='河北'].groupby(['regYear','regMonth','bodyType'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_stack_line(pop_type_sale, 'salesVolume', 'popularity', '河北汽车月销售量与搜索量stack', 100000)
c.render_notebook()

In [399]:
pop_type_sale = train_sale_search[train_sale_search['province']=='黑龙江'].groupby(['regYear','regMonth','bodyType'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_stack_line(pop_type_sale, 'salesVolume', 'popularity', '黑龙江汽车月销售量与搜索量stack', 25000)
c.render_notebook()

In [400]:
pop_type_sale = train_sale_search[train_sale_search['province']=='辽宁'].groupby(['regYear','regMonth','bodyType'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_stack_line(pop_type_sale, 'salesVolume', 'popularity', '辽宁汽车月销售量与搜索量stack', 40000)
c.render_notebook()

In [401]:
pop_type_sale = train_sale_search[train_sale_search['province']=='广东'].groupby(['regYear','regMonth','bodyType'],as_index=False)['popularity','salesVolume'].sum()

c = sale_pop_bar_stack_line(pop_type_sale, 'salesVolume', 'popularity', '广东汽车月销售量与搜索量stack', 150000)
c.render_notebook()

In [423]:
def pie_province_rosetype_sale(data, column, title) -> Pie:
    v = list(set(data['province']))
    c = (
        Pie()
        .add(
            "",
            [list(z) for z in zip(
                v, list(data[column]))],
            radius=["30%", "75%"],
            rosetype="radius",
            label_opts=opts.LabelOpts(is_show=True),
        )
        .set_global_opts(
            title_opts=opts.TitleOpts(title=title,subtitle='https://lambda-xmu.club'),
            legend_opts=opts.LegendOpts(is_show=False,type_="", pos_top="90%"),
            toolbox_opts=opts.ToolboxOpts(),
        )
    )
    return c

In [427]:
province_model_sale = train_sale_search.groupby(['province','bodyType'],as_index=False)['salesVolume'].sum()
province_model_sale['bodyType_per'] = \
    province_model_sale.salesVolume/province_model_sale.groupby('province')['salesVolume'].transform('sum')

c = pie_province_rosetype_sale(province_model_sale[province_model_sale.bodyType=='Sedan'], 'bodyType_per', 'Sedan在各省销售占比')
c.render_notebook()

In [430]:
c = pie_province_rosetype_sale(province_model_sale[province_model_sale.bodyType=='SUV'], 'bodyType_per', 'SUV在各省销售占比')
c.render_notebook()

In [431]:
c = pie_province_rosetype_sale(province_model_sale[province_model_sale.bodyType=='MPV'], 'bodyType_per', 'MPV在各省销售占比')
c.render_notebook()

In [432]:
c = pie_province_rosetype_sale(province_model_sale[province_model_sale.bodyType=='Hatchback'], 'bodyType_per', 'Hatchback在各省销售占比')
c.render_notebook()

In [452]:
def line_type_sale(data, title) -> Line:
    month = ['2016/'+str(i) for i in range(1,13)] + ['2017/'+str(i) for i in range(1,13)]
    c = (
        Line()
        .add_xaxis(month)
        .add_yaxis("上海", list(data[data['province']=='上海']['sale_per']))
        .add_yaxis("广东", list(data[data['province']=='广东']['sale_per']))
        .add_yaxis("广西", list(data[data['province']=='广西']['sale_per']))
        .add_yaxis("黑龙江", list(data[data['province']=='黑龙江']['sale_per']))
        .add_yaxis("浙江", list(data[data['province']=='浙江']['sale_per']))
        .add_yaxis("福建", list(data[data['province']=='福建']['sale_per']))
        .add_yaxis("重庆", list(data[data['province']=='重庆']['sale_per']))
        .add_yaxis("山西", list(data[data['province']=='山西']['sale_per']))
        .add_yaxis("辽宁", list(data[data['province']=='辽宁']['sale_per']))
        .add_yaxis("北京", list(data[data['province']=='北京']['sale_per']))
        .add_yaxis("山东", list(data[data['province']=='山东']['sale_per']))
        .add_yaxis("安徽", list(data[data['province']=='安徽']['sale_per']))
        .add_yaxis("云南", list(data[data['province']=='云南']['sale_per']))
        .add_yaxis("湖南", list(data[data['province']=='湖南']['sale_per']))
        .add_yaxis("河北", list(data[data['province']=='河北']['sale_per']))
        .add_yaxis("陕西", list(data[data['province']=='陕西']['sale_per']))
        .add_yaxis("河南", list(data[data['province']=='河南']['sale_per']))
        .add_yaxis("内蒙古", list(data[data['province']=='内蒙古']['sale_per']))
        .add_yaxis("江苏", list(data[data['province']=='江苏']['sale_per']))
        .add_yaxis("江西", list(data[data['province']=='江西']['sale_per']))
        .add_yaxis("湖北", list(data[data['province']=='湖北']['sale_per']))
        .add_yaxis("四川", list(data[data['province']=='四川']['sale_per']))
        .set_global_opts(
            title_opts=opts.TitleOpts(title=title,subtitle='https://lambda-xmu.club'),
            legend_opts=opts.LegendOpts(pos_left='38%'),
            toolbox_opts=opts.ToolboxOpts(),
            xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-20)),
        )
        .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
        
    )
    return c

In [437]:
province_year_month_type_sale = \
    train_sale_search.groupby(['province','regYear','regMonth','bodyType'],as_index=False)['salesVolume'].sum()

province_year_month_type_sale['sale_per'] = \
    province_year_month_type_sale.salesVolume/province_year_month_type_sale.groupby(['province','regYear','regMonth'])['salesVolume'].transform('sum')


In [453]:
c = line_type_sale(province_year_month_type_sale[province_year_month_type_sale.bodyType=='Hatchback'],'各省Hatchbackle类型车销售量占比情况')
c.render_notebook()

In [454]:
c = line_type_sale(province_year_month_type_sale[province_year_month_type_sale.bodyType=='MPV'],'各省MPV类型车销售量占比情况')
c.render_notebook()

In [455]:
c = line_type_sale(province_year_month_type_sale[province_year_month_type_sale.bodyType=='SUV'],'各省SUV类型车销售量占比情况')
c.render_notebook()

In [456]:
c = line_type_sale(province_year_month_type_sale[province_year_month_type_sale.bodyType=='Sedan'],'各省Sedan类型车销售量占比情况')
c.render_notebook()

In [478]:
def line_type_sale(data, title) -> Line:
    month = ['2016/'+str(i) for i in range(1,13)] + ['2017/'+str(i) for i in range(1,13)]
    c = (
        Line()
        .add_xaxis(month)
        .add_yaxis("A", list(data[data['model']=='A']['sale_per']))
        .add_yaxis("B", list(data[data['model']=='B']['sale_per']))
        .add_yaxis("C", list(data[data['model']=='C']['sale_per']))
        .add_yaxis("D", list(data[data['model']=='D']['sale_per']))
        .add_yaxis("E", list(data[data['model']=='E']['sale_per']))
        .add_yaxis("F", list(data[data['model']=='F']['sale_per']))
        .add_yaxis("G", list(data[data['model']=='G']['sale_per']))
        .add_yaxis("H", list(data[data['model']=='H']['sale_per']))
        .add_yaxis("I", list(data[data['model']=='I']['sale_per']))
        .add_yaxis("J", list(data[data['model']=='J']['sale_per']))
        .add_yaxis("K", list(data[data['model']=='K']['sale_per']))
        .add_yaxis("L", list(data[data['model']=='L']['sale_per']))
        .add_yaxis("M", list(data[data['model']=='M']['sale_per']))
        .add_yaxis("N", list(data[data['model']=='N']['sale_per']))
        .add_yaxis("O", list(data[data['model']=='O']['sale_per']))
        .add_yaxis("P", list(data[data['model']=='P']['sale_per']))
        .add_yaxis("Q", list(data[data['model']=='Q']['sale_per']))
        .add_yaxis("R", list(data[data['model']=='R']['sale_per']))
        .add_yaxis("S", list(data[data['model']=='S']['sale_per']))
        .add_yaxis("T", list(data[data['model']=='T']['sale_per']))
        .add_yaxis("U", list(data[data['model']=='U']['sale_per']))
        .add_yaxis("V", list(data[data['model']=='V']['sale_per']))
        .add_yaxis("W", list(data[data['model']=='W']['sale_per']))
        .add_yaxis("X", list(data[data['model']=='X']['sale_per']))
        .add_yaxis("Y", list(data[data['model']=='Y']['sale_per']))
        .add_yaxis("Z", list(data[data['model']=='Z']['sale_per']))
        .add_yaxis("AA", list(data[data['model']=='AA']['sale_per']))
        .add_yaxis("BB", list(data[data['model']=='BB']['sale_per']))
        .add_yaxis("CC", list(data[data['model']=='CC']['sale_per']))
        .add_yaxis("DD", list(data[data['model']=='DD']['sale_per']))
        .add_yaxis("EE", list(data[data['model']=='EE']['sale_per']))
        .add_yaxis("FF", list(data[data['model']=='FF']['sale_per']))
        .add_yaxis("GG", list(data[data['model']=='GG']['sale_per']))
        .add_yaxis("HH", list(data[data['model']=='HH']['sale_per']))
        .add_yaxis("II", list(data[data['model']=='II']['sale_per']))
        .add_yaxis("JJ", list(data[data['model']=='JJ']['sale_per']))
        .add_yaxis("KK", list(data[data['model']=='KK']['sale_per']))
        .add_yaxis("LL", list(data[data['model']=='LL']['sale_per']))
        .add_yaxis("MM", list(data[data['model']=='MM']['sale_per']))
        .add_yaxis("NN", list(data[data['model']=='NN']['sale_per']))
        .add_yaxis("OO", list(data[data['model']=='OO']['sale_per']))
        .add_yaxis("PP", list(data[data['model']=='PP']['sale_per']))
        .add_yaxis("QQ", list(data[data['model']=='QQ']['sale_per']))
        .add_yaxis("RR", list(data[data['model']=='RR']['sale_per']))
        .add_yaxis("SS", list(data[data['model']=='SS']['sale_per']))
        .add_yaxis("TT", list(data[data['model']=='TT']['sale_per']))
        .add_yaxis("UU", list(data[data['model']=='UU']['sale_per']))
        .add_yaxis("VV", list(data[data['model']=='VV']['sale_per']))
        .add_yaxis("WW", list(data[data['model']=='WW']['sale_per']))
        .add_yaxis("XX", list(data[data['model']=='XX']['sale_per']))
        .add_yaxis("YY", list(data[data['model']=='YY']['sale_per']))
        .add_yaxis("ZZ", list(data[data['model']=='ZZ']['sale_per']))
        .add_yaxis("ABC", list(data[data['model']=='ABC']['sale_per']))
        .add_yaxis("DEF", list(data[data['model']=='DEF']['sale_per']))
        .add_yaxis("HIJ", list(data[data['model']=='HIJ']['sale_per']))
        .add_yaxis("KLM", list(data[data['model']=='KLM']['sale_per']))
        .add_yaxis("RST", list(data[data['model']=='RST']['sale_per']))
        .add_yaxis("UVW", list(data[data['model']=='UVW']['sale_per']))
        .add_yaxis("XYZ", list(data[data['model']=='XYZ']['sale_per']))
        .add_yaxis("OPQ", list(data[data['model']=='OPQ']['sale_per']))
        .set_global_opts(
            title_opts=opts.TitleOpts(title=title,subtitle='https://lambda-xmu.club'),
            legend_opts=opts.LegendOpts(pos_left='25%'),
            toolbox_opts=opts.ToolboxOpts(),
            xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-20)),
        )
        .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
        
    )
    return c

In [479]:
model_sale = train_sale_search.groupby(['model','regYear','regMonth'],as_index=False)['salesVolume'].sum()
model_sale['sale_per'] = model_sale.salesVolume/model_sale.groupby(['regYear','regMonth'])['salesVolume'].transform('sum')


In [481]:
c = line_type_sale(model_sale,'各模型车销售量占比情况')
c.render_notebook()