In [1]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine

### 读取数据

In [2]:
import os

In [3]:
# 数据库地址：数据库放在上一级目录下
db_path = os.path.join(os.path.dirname(os.getcwd()),"data.db")
engine_path = "sqlite:///"+db_path

In [4]:
# 获取数据函数，根据输入的SQL语句返回 DataFrame 类型数据
def link_sqlite(sql):
    engine = create_engine(engine_path)
    df = pd.read_sql(sql,con=engine)
    return df

In [5]:
sql = "select * from salesSummary"
df = link_sqlite(sql)

### 数据描述

In [6]:
df.shape

(30, 3)

In [7]:
df.sample(5)

Unnamed: 0,商品,淘宝,京东
2,商品3,36430286,35423924
17,商品18,33776459,31924303
8,商品9,11854916,11164280
15,商品16,47493063,46536876
0,商品1,10126430,9485938


In [8]:
# 数据描述
df.describe()

Unnamed: 0,淘宝,京东
count,30.0,30.0
mean,22368630.0,21331200.0
std,14167290.0,13661770.0
min,1542652.0,1459513.0
25%,12894730.0,12028800.0
50%,19356840.0,18463840.0
75%,30031950.0,28902840.0
max,54400540.0,49919920.0


In [9]:
# 将科学计数法修改为正常显示
np.set_printoptions(suppress=True)
pd.set_option("display.float_format",lambda x:"%.f"%x)
df.describe()

Unnamed: 0,淘宝,京东
count,30,30
mean,22368632,21331203
std,14167289,13661774
min,1542652,1459513
25%,12894730,12028804
50%,19356840,18463844
75%,30031946,28902836
max,54400538,49919921


In [10]:
# 添加一列总销售额
df["总销售额"] = df["淘宝"]+df["京东"]

In [11]:
# 淘宝店总销售额
df["淘宝"].sum()

671058957

In [12]:
# 京东店总销售额
df["京东"].sum()

639936097

In [13]:
# 销售额最高的商品
df[df["总销售额"]==df["总销售额"].max()]

Unnamed: 0,商品,淘宝,京东,总销售额
18,商品19,54400538,49919921,104320459


In [14]:
# 销售额最低的商品
df[df["总销售额"]==df["总销售额"].min()]

Unnamed: 0,商品,淘宝,京东,总销售额
25,商品26,1542652,1459513,3002165


### 柱状图对比

In [15]:
from pyecharts import options as opts
from pyecharts.charts import Bar

x_names = df["商品"].tolist()
tao_bao = [round(x/10000,2) for x in df["淘宝"].tolist()]
jing_dong = [round(x/10000,2) for x in df["京东"].tolist()]

c = (
    Bar()
    .add_xaxis(x_names)
    .add_yaxis("淘宝", tao_bao)
    .add_yaxis("京东", jing_dong)
    .set_global_opts(
        xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-15)),
        title_opts=opts.TitleOpts(title="商品销售额分析"),
    )
    .set_series_opts(
        label_opts=opts.LabelOpts(is_show=False),
        markpoint_opts=opts.MarkPointOpts(
            data=[
                opts.MarkPointItem(type_="max", name="最大值"),
                opts.MarkPointItem(type_="min", name="最小值"),
            ]
        ),
        markline_opts=opts.MarkLineOpts(
            data=[
                opts.MarkLineItem(type_="average", name="平均值"),
            ]
        ),
    )
)
c.render_notebook()