In [1]:
import os

# for rendering
from IPython.core.display import HTML, Markdown

# for data analysis
import numpy as np
import datetime

# for plotting
import plotly.graph_objects as go
import svgutils.compose as sc
# run orca with xvfb
import plotly.io as pio
pio.orca.config.use_xvfb = True
# import matplotlib for complex figures
import matplotlib.pyplot as plt
# config chinese font
import matplotlib.font_manager as mfm

# figure/table number counter
from smartydoc.decorator import FigCounter

# for long table
import smartydoc.matplot as sdplot

In [58]:
# var for iteration
# this ID must be an unique index of a student / school,
# you can fetch ALL needed data used in this notebook with this ID. 
#iter_id = os.getenv('ITERID')
iter_id = 'sample12345'

# helper var for development
# if False, only display image embeded in the notebook,
# if True, the code would save the image as a SVG file for final report.
PRODUCTION_PHASE = True

# figure/table counter
fig_counter = FigCounter('图', font_size=15, font_family='SimHei')
tbl_counter = FigCounter('表', font_size=15, font_family='SimHei')


In [3]:
# image dir config
material_dir = os.path.join(os.path.curdir, 'imgs', 'common')
img_dir = os.path.join(os.path.curdir, 'imgs', iter_id)
if not os.path.exists(img_dir):
    os.makedirs(img_dir, mode=0o755)

# helpers for image display
def display_image(img_file, cls='medium'):
    """
    img_file: a image file path
    cls: small, medium, large, sign, or qrcode
    """
    # os.path.relpath(path) -> 返回path的相对路径，保证文件整体移动后仍可以找到图片
    if cls:
        html_str = '<div class="%s"><img src="%s" alt="image"></div>\n'%(cls, os.path.relpath(img_file))
    else: 
        html_str = '<div class="medium"><img src="%s" alt="image"></div>\n'%(os.path.relpath(img_file))
    display(HTML(html_str))

In [4]:
# helper for cover generation
# XXX: add more info
def gen_cover(cover_cfg):
    html_content = []
    if 'main_cn' in cover_cfg:
        html_content.append('<h1 id="cover">%s</h1>'%(cover_cfg['main_cn']))
    if 'school' in cover_cfg:
        html_content.append('<school>%s</school>'%(cover_cfg['school']))
    if 'stu_name' in cover_cfg:
        html_content.append('<stuname>%s</stuname>'%(cover_cfg['stu_name']))
    if 'stu_class' in cover_cfg:
        html_content.append('<stuclass>%s</stuclass>'%(cover_cfg['stu_class']))
    if 'company' in cover_cfg:
        html_content.append('<address>%s</address>'%(cover_cfg['company']))
    if 'test_date' in cover_cfg:
        html_content.append('<testdate>%s</testdate>'%(cover_cfg['test_date']))

    display(HTML('\n'.join(html_content)))

In [5]:
# cover config
today = datetime.datetime.today()
cover_cfg = {'main_cn': '使用Jupyter Notebook<br>撰写数据分析报告',
             'company': 'SmartyDoc项目组',
             'test_date': str(today.year)+'年'+str(today.month)+'月',
            }
gen_cover(cover_cfg)

## 前言

> Jupyter Notebook是基于网页的用于交互计算的应用程序，其可被应用于全过程计算：开发、文档编写、运行代码和展示结果。
>
> <rightalign> — Jupyter Notebook 官方介绍 </rightalign>

In [6]:
display_image(os.path.join(material_dir, 'jupyter_home.png'))

简而言之，Jupyter Notebook是以网页的形式存储，可以在网页页面中直接编写和运行代码，代码的运行结果也会直接在代码块下显示。如在编程过程中需要编写说明文档，可在同一个页面中直接编写，便于作及时的说明和解释。

基于Jupyter Notebook具备富文档和标准化的特性，我们也希望可以利用它生成美观的数据分析报告。**SmartyDoc**就是为此而生。利用**SmartyDoc**提供的文本标准化流程，我们可以将包含图文的Jupyter Notebook文件转存为具备特定层级结构的html文件，配合适当的css文件以及**weasyprint**工具，即可产生PDF格式的报告文档。

要顺利使用**SmartyDoc**完成报告，要求使用者掌握Python和MarkDown两种语言。

下面将具体介绍如何利用这套工具撰写报告文档。

## 规划报告的逻辑结构

在撰写数据分析报告时，把报告的逻辑结构规划好，将报告内容划分为几个相对独立、且逻辑连贯的章节，会让后续工作更加顺利。如整篇报告可以包含几大**章**，每**章**可以包含多个**小节**，每个**小节**可以进一步划分为多个**子节**。

在**SmartyDoc**的框架内，文档的层级结构基于MarkDown语言中的*Heading*实现，即报告的题目为一级标题，用 `# 报告题目` 这种形式实现，各个章节的标题为二级标题，用 `## 章节标题` 的形式实现，以下可以继续出现三级、四级...等不同等级的章节。

在撰写报告时，报告题目的样式以一段程序自动生成，用户只需提供报告封面的内容即可，具体可以参考此ipynb文件的*Cell 4*和*Cell 5*中的内容。需要用户自己写的报告章节结构，以二级标题为最高层级。

在每个章节下，可以出现文字、表格和图片等信息。为了保证用MarkDown语言撰写的报告内容可以正常显示，需要将对应*Cell*的语言设置为`MarkDown`。

### 显示Python程序中输出的文本

在进行数据分析时，我们经常需要根据分析结果打印对应的数据和结果描述，因此需要保证Python程序中打印的文字也能够正常显示在报告正文中。在**SmartyDoc**的框架下，我们可以使用如下方式显示Python程序中输出的文本。

In [7]:
some_score = 78
display(Markdown('打印Python程序中的输出文本 ... 变量的数值是 %s。'%(some_score)))

打印Python程序中的输出文本 ... 变量的数值是 78。

### 使用toc2插件管理文档目录

Jupyter Notebook是一个很棒的教学、探索和编程环境，但其功能仍存在很多不足。幸好，它允许我们使用一些插件来扩展它的功能。其中有一个插件叫Table of Contents (2) (toc2)，可以为Jupyter Notebook提供目录。可以在Jupyter的Nbextensions扩展管理页内，通过勾选框启用扩展。

点击按钮栏最后部的图标，目录列表会以左侧边栏的形式显示，toc2的使用效果见下图。


In [8]:
display_image(os.path.join(material_dir, 'jupyter_toc2.png'))

为了与**SmartyDoc**的框架兼容，需要对toc2扩展进行一些设置，可以通过点击目录列表上部的`齿轮图标`进行设置(如下图)，并将`h1`级标题排除出目录范畴（在**SmartyDoc**的框架下，章节结构以二级标题为最高层级）。

In [9]:
display_image(os.path.join(material_dir, 'jupyter_toc2_config.png'))

## 在报告中插入图表

图表可以让报告内容清晰明了。

在Jupyter Notebook中，用户可以直接将图片或表格嵌入到文本中。但为了便于将报告文本中所包含的文字、图片和表格都顺利地转换为HTML或PDF等格式，在**SmartyDoc**框架下，要求用户将图片和表格保存成图片文件，并在ipynb文本中进行引用，或直接在ipynb文件中以html格式保存。

为了方便报告内容的撰写，可以使用变量`PRODUCTION_PHASE`来控制图片在ipynb文件中保存的形式。具体可以参考*Cell 2*中的变量定义方式和下面的例子。

### 使用的工具包

为了得到美观且样式丰富的数据图，这里建议主要使用*plotly*工具包作图，并将图片保存为SVG格式，以保证在不同设备和缩放尺度下的图片质量。

对于一些非常具有设计感但难以通过*plotly*简单实现的图片，可以使用*svgutils*工具包中的图片组合功能，对图片进行重组和编辑。

具体使用样例请参考下面的示例。

### 自动添加图表编号

在撰写报告时会产生大量的图片，要准确标注每张图的编号将会消耗很大的人力，因此我们将这项工作交给程序自动完成。

在**SmartyDoc**中的`decorator`工具包中，提供了`FigCounter`类用来对图表编号进行自动计数。这个类的初始化方法请见 *Cell 2*，在初始化时需要提供图表名称的起始词，如图1，图2，...中的"图"字，以及设置文字的字号和字体等。初始化后，具体的使用方法请参考下文的示例。

### 图表示例 - 在报告中插入图片

In [10]:
compose_fig = fig_counter.add_title(os.path.join(material_dir, 'jupyter_home.png'), '插入的png图片')
# you can append this part into your code
if PRODUCTION_PHASE:
    compose_fig.save(os.path.join(img_dir, 'inserted_png.svg'))
    display_image(os.path.join(img_dir, 'inserted_png.svg'))
else:
    display(compose_fig)

### 图表示例 - 表格

#### 表格1

In [11]:
# 表头
tbl_header = [
    '<b>学校名称</b>',
    '<b>总人数</b>',
    '<b>男生</b>',
    '<b>女生</b>',
    '<b>班级数</b>'
]
# 内容
tbl_values = [
    ['A学校', 'B学校', 'C学校'], # 1st column
    ['100人', '90人', '80人'],  # 2nd column
    ['60人', '30人', '70人'],   # 3rd column
    ['40人', '60人', '10人'],   # 4th column
    [10, 15, 6],               # 5th column
]

fig = go.Figure(data=[go.Table(
    header=dict(values=tbl_header,
                line_color='darkslategray',
                fill_color='lightskyblue',
                font_family='SimHei',
                font=dict(color='white', size=14),
                height=30,
                align='center'),
    cells=dict(values=tbl_values,
               line_color='darkslategray',
               fill_color='lightcyan',
               font_family='SimHei',
               font_size=14,
               height=30,
               align='left'))
])

# 这里为表格添加表名和表的编号
fig.update_layout(width=600, height=320)
fig = tbl_counter.add_title(fig, '人数统计', y_pos=0)

# 为了方便撰写文档时，实时看到修改的结果，建议通过变量PRODUCTION_PHASE设置撰写的状态，
# 将PRODUCTION_PHASE设为False，直接将生成的图片嵌在Notebook中，设为True则保存为图片
# 由于浏览器自己的缓存功能，如果总是保存为图片文件，会出现无论如何修改，显示
# 出的图都不变的情况
# 注意：建议在每个画图函数后都添加这一段，通过文件开头的PRODUCTION_PHRASE变量设置全局的状态
if PRODUCTION_PHASE:
    img_file = os.path.join(img_dir, 'img1.svg')
    fig.write_image(img_file, scale=1)
    display_image(img_file)
else:
    fig.show()


#### 表格2

In [12]:
# 表头
tbl_header = [
    '<b>平均<br>等级</b>',
    '<b>文理<br>倾向</b>',
    '<b>学科发展<br>均衡程度</b>',
    '<b>A等级<br>学生人数</b>',
    '<b>B等级<br>学生人数</b>',
    '<b>全市<br>排名</b>',
    '<b>全省<br>排名</b>',
]
# 内容
tbl_values = [
    ['B', 'A'],
    ['综合', '文'],
    ['较为均衡', '均衡'],
    ['54人', '100人'],
    ['0人', '10人'],
    ['9/61', '12/61'],
    ['22/287', '40/287'],
]

fig = go.Figure(data=[go.Table(
    header=dict(values=tbl_header,
                font_family='SimHei',
                font=dict(color='black', size=14),
                height=30,
                align='center'),
    cells=dict(values=tbl_values,
               font_family='SimSun',
               font_size=14,
               height=30,
               align='center'))
])
fig.update_layout(width=800, height=400)
fig = tbl_counter.add_title(fig, '结果统计', y_pos=0)

if PRODUCTION_PHASE:
    img_file = os.path.join(img_dir, 'img2.svg')
    fig.write_image(img_file, scale=1)
    display_image(img_file)
else:
    fig.show()


#### 可跨页的长表格

在报告中，如果需要呈现比较长的表格，如会跨越多页的表格，可以使用html形式来实现长表格，如下例。

In [13]:
import random

rand_values = []
school_idx = 1
for i in range(100):
    tmp = []
    tmp.append('学校'+str(school_idx))
    for j in range(3):
        tmp.append(str(random.randint(10, 200))+'人')
    tmp.append(random.randint(3, 20))
    rand_values.append(tmp)
    school_idx += 1

tbl_html = sdplot.draw_table(head=['学校名称', '总人数', '男生', '女生', '班级数'],
                             cells=[['A学校', '100人', '60人', '40人', 10],
                                    ['B学校',  '90人', '30人', '60人', 15],
                                    ['C学校',  '80人', '70人', '10人',  6],
                                   ] + rand_values,
                             foot=['合计', '270人', '160人', '110人', 21]
                            )
display(HTML(tbl_html))

学校名称,总人数,男生,女生,班级数
A学校,100人,60人,40人,10
B学校,90人,30人,60人,15
C学校,80人,70人,10人,6
学校1,45人,129人,50人,9
学校2,113人,24人,195人,6
学校3,148人,13人,72人,3
学校4,199人,132人,23人,18
学校5,39人,68人,49人,18
学校6,195人,67人,33人,13
学校7,150人,180人,39人,19


#### 含有等级说明的5点量表

In [14]:
def level_table_chart(level_text, raw_high_text, raw_low_text, output_name):
    score_text = ['低', '较低', '中等', '较高', '高']
    score_sub = score_text.index(level_text)
    text_color = ['rgb(125, 125, 125)'] * 5
    #text_color[0]= 'rgb(8, 208, 206)'
    text_color[score_sub]= 'rgb(255, 255, 255)'
    fill_color = ['rgb(255, 255, 255)'] * 5
    fill_color[score_sub]= 'rgb(8, 208, 206)'

    fig = go.Figure(data = [go.Table(
        header = dict(
            values = ['<b>低', '<b>较低', '<b>中等', '<b>较高', '<b>高'],
            line_color = 'rgb(8, 208, 206)',
            fill_color = fill_color,
            align = 'center',
            font_family = 'PingFang SC',
            font = dict(color=text_color, size=14)
        )
    )])

    # add annotations
    fig.add_annotation(
        x = -0.1,
        y = 0.975,
        xref = 'paper',
        yref = 'paper',
        text = '<b>得分',
        showarrow = False,
        font = dict(
            family = 'PingFang SC',
            size = 15,
            color = 'rgb(8, 208, 206)',
        ),
    )
    fig.add_annotation(
        x = 0.5,
        y = 0.7,
        xref = 'paper',
        yref = 'paper',
        text = '<b>低分行为倾向    ',
        showarrow = True,
        font = dict(
            family = 'PingFang SC',
            size = 14,
            color = 'rgb(125, 125, 125)',
        ),
        arrowside = 'start',
        arrowhead = 8,
        startarrowhead = 2,
        arrowwidth = 1.25,
        arrowcolor = 'rgb(125, 125, 125)',
        ax = -120,
        ay = 0,
    )
    fig.add_annotation(
        x = 0.5,
        y = 0.7,
        xref = 'paper',
        yref = 'paper',
        text = '<b>    高分行为倾向',
        showarrow = True,
        font = dict(
            family = 'PingFang SC',
            size = 14,
            color = 'rgb(18, 207, 212)',
        ),
        arrowside = 'start',
        arrowhead = 8,
        startarrowhead = 2,
        arrowwidth = 1.25,
        arrowcolor = 'rgb(18, 207, 212)',
        ax = 120,
        ay = 0,
    )

    word_count = 0
    low_text = ''
    for w in raw_low_text:
        low_text += w
        word_count += 1
        if word_count==12:
            low_text += '<br>'
            word_count = 0
    fig.add_annotation(
        x = 0,
        y = 0.6,
        yanchor = 'top',
        xref = 'paper',
        yref = 'paper',
        text = low_text,
        align = 'left',
        showarrow = False,
        font = dict(
            family = 'PingFang SC',
            size = 13.5,
            color = 'rgb(125, 125, 125)',
        ),
        bgcolor = 'rgba(240, 240, 240, 1)',
        borderpad = 5,
    )
    
    word_count = 0
    high_text = ''
    for w in raw_high_text:
        high_text += w
        word_count += 1
        if word_count==12:
            high_text += '<br>'
            word_count = 0
    fig.add_annotation(
        x = 1,
        y = 0.6,
        yanchor = 'top',
        xref = 'paper',
        yref = 'paper',
        text = high_text,
        align = 'left',
        showarrow = False,
        font = dict(
            family = 'PingFang SC',
            size = 13.5,
            color = 'rgb(18, 207, 212)',
        ),
        bgcolor = 'rgba(214, 254, 255, 1)',
        borderpad = 5,
    )
    
    fig.update_layout(
        autosize = False,
        width = 490,
        height = 280,
        margin = dict(
            l = 40,
            r = 20,
            b = 5,
            t = 20,
        ),
    )

    if PRODUCTION_PHASE:
        img_file = os.path.join(img_dir, output_name+'.svg')
        fig.write_image(img_file, scale=1)
        display_image(img_file)
    else:
        fig.show()

In [15]:
# level_text can be one of ['低', '较低', '中等', '较高', '高']
level_text = '较高'
low_text = '为了达到目的会采用一些违心的或不正当方式；通过奉承或讨好他人来获取支持；通过欺骗或偷窃的方式来获取好处；通过撒谎等方式获得利益。'
high_text = '不愿意通过手段改变他人的看法；即使明知道会给自己带来好处也不会勉强自己作出违心的事。'
level_table_chart(level_text, high_text, low_text, 'likert_scale_example')

### 图表示例 - 数据图

#### 雷达图

In [16]:
def radar_chart(data, labels, annot_text, fig_title, output_name):
    """
    输入一组长度为n的数字序列，以及长度为n的字符串标签，输出一个雷达图。
    
    调用方法：
    data = [0.5, 0.7, 0.5, 0.4, 0.2]
    labels = ['BMI','锻炼','睡眠','疾病','视力']
    annots = '注：睡眠情况指学生的平均睡眠时间/天，≥8小时为合格。'
    fig_title = '学生综合素质指标'
    output_name = 'radar_example'
    radar_chart(data, labels, annots, fig_title, output_name)
    """
    # plot radar chart
    trace0 = go.Scatterpolar(
        r = data,
        theta = labels,
        fill = 'toself',
        # 填充区域的颜色
        fillcolor = 'rgba(155, 211, 217, 0.5)',
        mode = 'lines+markers+text',
        text = data,
        marker = dict(
            # 标记点的类型
            symbol = 0,
            # 线段及标记点的颜色
            color = 'rgb(155, 211, 217)',
       ),
    )
    
    # set layout
    layout = go.Layout(
        polar=dict(
            radialaxis=dict(
                visible=True,
                # 坐标刻度范围和刻度数量
                #range=(0, 0.8),            
                #nticks = 5,
                angle = 90,
                tickangle =90,
                # 是否显示坐标刻度标记
                showticklabels = False,
            ),
            angularaxis=dict(
                tickfont=dict(family='SimHei', size=14),
            ),
        ),
        width = 600,
        height = 450,
        showlegend= False,
    )

    fig = go.Figure(data=trace0, layout=layout)
    
    # add annotation texts
    if annot_text:
        annotation_list = []
        annot0 = go.layout.Annotation(
                    x = 0.5,
                    y = -0.15,
                    text = annot_text,
                    align = 'center',
                    font = dict(size=12, family='SimHei'),
                    showarrow = False,
                    #arrowwidth = 0.1,
                    xref = 'paper',
                    yref = 'paper',
        )
        annotation_list.append(annot0)
        fig.update_layout(annotations=annotation_list)    

    fig = fig_counter.add_title(fig, fig_title, y_pos=-0.2)

    if PRODUCTION_PHASE:
        img_file = os.path.join(img_dir, output_name+'.svg')
        fig.write_image(img_file, scale=1)
        display_image(img_file)
    else:
        fig.show()


In [17]:
data = [0.5, 0.7, 0.5, 0.4, 0.2]
labels = ['BMI','锻炼','睡眠','疾病','视力']
annot_text = '注：睡眠情况指学生的平均睡眠时间/天，≥8小时为合格。'
fig_title = '学生综合素质指标'
output_name = 'radar_example'
radar_chart(data, labels, annot_text, fig_title, output_name)

#### 呈现多组数据的雷达图

In [18]:
def enhanced_radar_chart(data_arrays, data_names, labels, annot_text, fig_title, output_name):
    """
    输入一个二维嵌套列表或者2D-array，每个list表示一组数据，输出一个雷达图。
    
    调用方法：
    data_arrays = [
        [0.5, 0.7, 0.5, 0.4, 0.2],
        [0.8, 0.1, 0.1, 0.8, 0.7],
    ]
    data_names = ['本校', '全区']
    labels = ['BMI','锻炼','睡眠','疾病','视力']
    annots = '注：睡眠情况指学生的平均睡眠时间/天，≥8小时为合格。'
    fig_title = '学生综合素质指标比较'
    output_name = 'enhanced_radar_example'
    enhanced_radar_chart(data_arrays, data_names, labels, annots, fig_title, output_name)
    """
    # set layout
    layout = go.Layout(
        polar=dict(
            radialaxis=dict(
                visible=True,
                # 坐标刻度范围和刻度数量
                #range=(0, 1.0),            
                #nticks = 5,
                angle = 90,
                tickangle =90,
                # 是否显示坐标刻度标记
                showticklabels = True,
            ),
            angularaxis=dict(
                tickfont=dict(family='SimHei', size=14),
            ),
        ),
        width = 600,
        height = 450,
        showlegend=True,
        legend_orientation = "h",
        legend = dict(x=0.35, y=1.2, font_family='SimHei'),
    )
    
    # plot radar chart
    fig = go.Figure(layout=layout)
    
    for i, (data, name) in enumerate(zip(data_arrays, data_names)):
        fig.add_trace(
            go.Scatterpolar(
                r = data,
                theta = labels,
                fill = 'toself',
                # 填充区域的颜色
                #fillcolor = 'rgba(155, 211, 217, 0.5)',
                mode = 'lines+markers',
                #text = data,
                name = name,
                #marker = dict(
                #    # 标记点的类型
                #    symbol = 0,
                #    # 线段及标记点的颜色
                #    color = 'rgb(155, 211, 217)',
                #),
            )
        )

    # add annotation texts
    if annot_text:
        annotation_list = []
        annot0 = go.layout.Annotation(
                    x = 0.5,
                    y = -0.15,
                    text = annot_text,
                    align = 'center',
                    font = dict(size=12, family='SimHei'),
                    showarrow = False,
                    #arrowwidth = 0.1,
                    xref = 'paper',
                    yref = 'paper',
        )
        annotation_list.append(annot0)
        fig.update_layout(annotations=annotation_list)    

    fig = fig_counter.add_title(fig, fig_title, y_pos=-0.2)

    if PRODUCTION_PHASE:
        img_file = os.path.join(img_dir, output_name+'.svg')
        fig.write_image(img_file, scale=1)
        display_image(img_file)
    else:
        fig.show()

In [19]:
data_arrays = [
    [0.5, 0.7, 0.5, 0.4, 0.2],
    [0.8, 0.1, 0.1, 0.8, 0.7],
]
data_names = ['本校', '全区']
labels = ['BMI','锻炼','睡眠','疾病','视力']
annots = '注：睡眠情况指学生的平均睡眠时间/天，≥8小时为合格。'
fig_title = '学生综合素质指标比较'
output_name = 'enhanced_radar_example'
enhanced_radar_chart(data_arrays, data_names, labels, annots, fig_title, output_name)

#### 使用matplotlib制作雷达图

在`matplotlib`中并没有内置的雷达图函数，因此需要使用更底层的函数实现雷达图，这给使用者增加了不少麻烦，但也赋予了使用者更大的自由度去自定义图的样式。

In [20]:
def radar_chart_from_matplotlib(data_arrays, data_names, labels, output_name):
    """输入一个二维嵌套列表或者2D-array，每个list表示一组数据，输出一个雷达图。
    
    调用方法：
    data_arrays = [
        [5, 20, 18, 10, 27],
        [9, 15, 27, 17, 16],
    ]
    data_names = ['全区', '个人']
    labels = ['BMI','锻炼','睡眠','疾病','视力']
    output_name = 'matplotlib_radar_example'
    radar_chart_from_matplotlib(data_arrays, data_names, labels, output_name)
    """
    # config chinese font
    font_path = "./fonts/SimHei.ttf"
    prop = mfm.FontProperties(fname=font_path)
    plt.ioff()

    # ------- PART 1: Create background

    # number of variable
    categories = labels
    N = len(categories)

    # What will be the angle of each axis in the plot? (we divide the plot / number of variable)
    angles = [n / float(N) * 2 * np.pi for n in range(N)]
    angles += angles[:1]

    # Initialise the spider plot
    plt.figure(figsize=(15, 10))
    ax = plt.subplot(111, polar=True)
 
    # If you want the first axis to be on top:
    ax.set_theta_offset(np.pi / 4)
    ax.set_theta_direction(-1)
 
    # Draw one axe per variable + add labels labels yet
    plt.xticks(angles[:-1], categories, color='black', fontproperties=prop, size=12)
    ax.axes.xaxis.set_visible(False)

    # Draw ylabels
    ax.set_rlabel_position(0)
    plt.yticks([60, 80, 100], ["", "", ""], color="grey", fontproperties=prop)
    plt.ylim(30, 145)
    ax.axes.spines['polar'].set_visible(False)
    
    # ------- PART 2: Add plots
 
    # Plot each individual = each line of the data
    # I don't do a loop, because plotting more than 3 groups makes the chart unreadable
 
    # data series 1
    values = data_arrays[0]
    values += values[:1]
    ax.plot(angles, values, color=(254/255, 133/255, 110/255, 1), linewidth=1, linestyle='-', marker='o', label=data_names[0])
    #ax.fill(angles, values, 'b', alpha=0.1)
 
    # data series 2
    values = data_arrays[1]
    values += values[:1]
    ax.plot(angles, values, color=(116/255, 223/255, 222/255, 1), linewidth=1, linestyle='-', marker='o', label=data_names[1])
    ax.fill(angles, values, color=(116/255, 223/255, 222/255, 0.3))
    # add dim name
    for i in range(N):
        plt.text(
            i/float(N)*2*np.pi, 125,
            '%s：%s'%(labels[i], values[i]),
            fontproperties=prop,
            ha='center',
            color='black',
            fontsize=18,
        )

    # Annotation
    plt.text(
        0, 30,
        '待发展',
        fontproperties=prop,
        ha='center',
        color=(254/255, 133/255, 110/255, 1),
        fontsize=15,
    )
    plt.text(
        np.pi*1.25, 70,
        '良好',
        fontproperties=prop,
        ha='center',
        color=(255/255, 169/255, 0, 1),
        fontsize=15,
    )
    plt.text(
        np.pi*1.25, 90,
        '优秀',
        fontproperties=prop,
        ha='center',
        color=(91/255, 206/255, 220/255, 1),
        fontsize=15,
    )
        
    # Add legend
    prop.set_size(18)
    plt.legend(bbox_to_anchor=(1.3, 0.5), prop=prop, frameon=False)
    
    if PRODUCTION_PHASE:
        img_file = os.path.join(img_dir, output_name+'.png')
        plt.savefig(img_file, dpi=300)
        display_image(img_file)

In [21]:
data_arrays = [
    [65, 80, 68, 60],
    [69, 75, 97, 77],
]
data_names = ['全国同年级平均水平', '你的等级']
labels = ['空间能力', '计算思维', '推理能力', '信息提取']
output_name = 'matplotlib_radar_example'
radar_chart_from_matplotlib(data_arrays, data_names, labels, output_name)

#### 横向柱形图

In [22]:
def h1barchart(data, ticks_text, fig_title, output_name):
    """
    输入一组长度为n的数字序列，结果以柱形图显示。
    
    调用方法：
    data = [30, 40, 50, 60, 70]
    ticks_text = ['足球', '篮球', '跑步', '羽毛球', '舞蹈']
    fig_title = '本校学生选择的运动种类比例'
    output_name='simpleBar'
    h1barchart(data, ticks_text, fig_title, output_name)
    """
    # check input
    assert len(data)==len(ticks_text)
    
    # get max ticks along x-axis
    max_x = max(data) * 1.1
    
    # plot bars
    data = go.Bar(
        x = data,
        y = ticks_text,      
        marker = dict(
            color = 'rgb(88, 182, 192)',
        ),
        text = data,
        textposition = 'outside',
        orientation = 'h',
    )

    # layout config
    layout = go.Layout(
        # background color config
        paper_bgcolor = 'rgba(0,0,0,0)',
        plot_bgcolor = 'rgba(255, 255, 255, 0.5)',
        
        width = 600,
        height = 450,
        xaxis = dict(
            showticklabels = True,
            showline = True,
            linecolor = 'rgb(127,127,127)',
            mirror = True,
            range = (0, max_x),
        ),
        yaxis = dict(
            tickfont_family='SimHei',
            showticklabels = True,
            showline = True,
            mirror = True,
            linecolor = 'rgb(127,127,127)',
            range= (-0.7,len(ticks_text)-0.3) 
        ),
        margin = dict(
            l = 50,    #左侧留白大小
            r = 50,    #右侧空白大小
            t = 80,    #上方空白大小
            b = 110,   #下方空白大小
        ),
    )

    fig = go.Figure(data=data, layout=layout)
    fig = fig_counter.add_title(fig, fig_title, y_pos=-0.19)

    if PRODUCTION_PHASE:
        img_file = os.path.join(img_dir, output_name+'.svg')
        fig.write_image(img_file, scale=1)
        display_image(img_file)
    else:
        fig.show()

In [23]:
data = [30, 40, 50, 60, 70]
ticks_text = ['足球', '篮球', '跑步', '羽毛球', '舞蹈']
fig_title = '本校学生选择的运动种类比例'
output_name='simple_bar_example'
h1barchart(data, ticks_text, fig_title, output_name)

#### 横向柱形图 - 两组数据比较

In [24]:
def h2barschart(data1_name, data1, data2_name, data2, ticks_text, annot_text,
                fig_title, output_name, p_values=None):
    """输入两个长度为n的数字序列，每个序列是对n维变量的一次采样，结果以柱形图显示。"""
    # check input
    assert len(data1)==len(data2)
    assert len(data1)==len(ticks_text)
    if isinstance(p_values, list):
        assert len(data1)==len(p_values)
    
    # get max ticks along x-axis
    max_x = max(max(data1), max(data2)) * 1.2
    
    # plot bars
    trace1 = go.Bar(
        x = data1,
        y = ticks_text,      
        name = data1_name,
        marker = dict(
            color = 'rgb(88,182,192)'
        ),
        text = data1,
        textposition = 'outside',
        orientation = 'h',
        showlegend = True,
    )

    trace2 = go.Bar(
        x = data2,
        y = ticks_text,
        name = data2_name,
        marker = dict(
            color = 'rgb(188,226,230)'
        ),
        orientation = 'h',
        showlegend = True,
    )
    
    data = [trace2, trace1]
    
    # layout config
    layout = go.Layout(
        barmode = 'group',
        #bargroupgap = .1,
        # background color config
        paper_bgcolor = 'rgba(0,0,0,0)',
        plot_bgcolor = 'rgba(255, 255, 255, 0.5)',
        showlegend = True,
        # 水平显示图例
        legend = dict(x=0.3, y=1.15, font_family='SimHei'),
        legend_orientation = "h",
        # 反转图例顺序
        legend_traceorder = "reversed",
        width = 600,
        height = 450,
        xaxis = dict(
            showticklabels = True,
            showline = True,
            linecolor = 'rgb(127,127,127)',
            mirror = True,
            range = (0, max_x),
        ),
        yaxis = dict(
            tickfont_family='SimHei',
            showticklabels = True,
            showline = True,
            mirror = True,
            linecolor = 'rgb(127,127,127)',
        ),
        margin = dict(
            l = 50,    #左侧留白大小
            r = 50,    #右侧空白大小
            t = 80,    #上方空白大小
            b = 110,   #下方空白大小
        ),
    )

    fig = go.Figure(data=data, layout=layout)
    
    # add annotation
    annotation_list = []
    if isinstance(p_values, list) and len(p_values):
        # add significance signs
        for i in range(len(p_values)):
            if p_values[i]>0 and p_values[i]<0.05:
                _text = '▲'
            elif p_values[i]<0 and abs(p_values[i])<0.05:
                _text = '▼'
            else:
                _text = '=='
            _annot = go.layout.Annotation(
                        x = max_x*0.97,
                        y = i,
                        xref = "x",
                        yref = "y",
                        text = _text,
                        font = dict(size=15),
                        showarrow = False,
            )
            annotation_list.append(_annot)
        
    # add annot texts
    if annot_text:
        _annot = go.layout.Annotation(
                        x = 0.5,
                        y = -0.17,
                        xref = 'paper',
                        yref = 'paper',
                        text = annot_text,
                        align = 'left',
                        font = dict(size=12, family='SimHei'),
                        showarrow = False,

        )
        annotation_list.append(_annot)
    fig.update_layout(annotations=annotation_list)

    fig = fig_counter.add_title(fig, fig_title, y_pos=-0.19)

    if PRODUCTION_PHASE:
        img_file = os.path.join(img_dir, output_name+'.svg')
        fig.write_image(img_file, scale=1)
        display_image(img_file)
    else:
        fig.show()


In [25]:
data1_name = '学校A'
data1 = [90, 40, 20, 70, 55]
data2_name = '常模'
data2 = [88, 50, 40, 90, 34]
ticks_text = ['1年级', '2年级', '3年级', '4年级', '5年级']
p_values = [0.2, -0.02, -0.001, -0.001, 0.002]
annot_text = '说明：▲代表本校的得分在常模以上，▼代表本校的得分在常模以下，==代表本校和常模之间没有差异'
fig_title = '学校A与常模的对比'
output_name = 'two_bars_comp_example'
h2barschart(data1_name, data1, data2_name, data2, ticks_text, annot_text,
            fig_title, output_name, p_values=p_values)

#### 堆叠柱形图

In [26]:
def hstackbarchart(data, names, ticks_text, fig_title, output_name, color=None):
    """
    输入一个二维嵌套列表或者2D-array，和一个等长的字符串列表。结果以堆叠的柱形图显示。
    
    调用方法：
    data1_name =  '少于8小时'
    data1 = [10, 20, 43.8]
    data2_name = '8-9小时'
    data2 = [80, 65, 39.6]
    data3_name = '9小时及以上'
    data3 = [10, 15, 16.6]
    data=[data1, data2, data3]
    names = [data1_name, data2_name, data3_name]
    ticks_text = ['国家八年级', '全区', '本校']
    fig_title = '本校学生睡眠情况与全区、国家八年级的比较'
    output_name = 'stackbar_example'
    #color = ['rgb(15,205,229)', 'rgb(250,12,144)']
    hstackbarchart(data, names, ticks_text, fig_title, output_name)
    """
        
    # check input
    assert len(data)==len(names)
    assert len(data[0])==len(ticks_text)
            
    # get max ticks along x-axis
    max_x = sum([d[0] for d in data])
    
    # plot bars
    traces = []
    
    # set color of stack bar 
    colors = [
        'rgb(185,205,229)',
        'rgb(250,192,144)',
        'rgb(147,205,221)',
        'lightsalmon',
        'lightslategray',
    ]
    # add customized color
    if isinstance(color, list):
        for item in color:
            colors.append(item)

    for data_n, name, color in zip(data, names, colors):
        trace = go.Bar(
            x = data_n,
            y = ticks_text,
            name = name,
            marker = dict(
                color = color,
                #line=dict(color='rgb(248, 248, 249)', width=1),
            ),
            # 文本后缀及显示位置
            text = ['{}%'.format(dn) for dn in data_n],
            textposition = 'inside',         
            insidetextanchor = 'middle',   
            orientation = 'h',
            showlegend = True,
        )
        traces.append(trace)
    
    # layout config
    layout = go.Layout(
        barmode = 'stack',
        # background color config
        paper_bgcolor = 'rgba(0,0,0,0)',
        plot_bgcolor = 'rgba(255, 255, 255, 0.5)',
        showlegend = True,
        # 水平显示图例
        legend_orientation = "h",
        legend = dict(x=0.2, y=1.15, font_family='SimHei'),
        # 图例顺序
        legend_traceorder = "normal",
        
        width = 600,
        height = 450,
        xaxis = dict(
            showticklabels = True,
            showline = True,
            linecolor = 'rgb(127,127,127)',
            mirror = True,
            ticksuffix = "%",
            range = (0, max_x),
        ),
        yaxis = dict(
            tickfont_family='SimHei',
            showticklabels = True,
            showline = True,
            mirror = True,
            linecolor = 'rgb(127,127,127)',
        ),
        margin = dict(
            l = 50,    #左侧留白大小
            r = 50,    #右侧空白大小
            t = 80,    #上方空白大小
            b = 110,   #下方空白大小
        ),
    )

    fig = go.Figure(data=traces, layout=layout)
    fig = fig_counter.add_title(fig, fig_title, y_pos=-0.19)

    if PRODUCTION_PHASE:
        img_file = os.path.join(img_dir, output_name+'.svg')
        fig.write_image(img_file, scale=1)
        display_image(img_file)
    else:
        fig.show()

In [27]:
data1_name =  '少于8小时'
data1 = [10, 20, 43.8]
data2_name = '8-9小时'
data2 = [80, 65, 39.6]
data3_name = '9小时及以上'
data3 = [10, 15, 16.6]
data = [data1, data2, data3]
names = [data1_name, data2_name, data3_name]
ticks_text = ['国家八年级', '全区', '本校']
fig_title = '本校学生睡眠情况与全区、国家八年级的比较'
output_name = 'stackbar_example'
# 添加自定义颜色，可不写
#color = ['rgb(15,205,229)', 'rgb(250,12,144)']
hstackbarchart(data, names, ticks_text, fig_title, output_name)

#### 堆叠柱形图 - 划分正负性评价

In [28]:
def hstackbarchart_align2zero1(data_arrays, data_names, ticks_text, fig_title, output_name, color=None):
    """
    输入一个二维嵌套列表或者2D-array，和每组数据对应的名称。结果以堆叠的柱形图显示。
    0左侧的数值需要输入负值。
    
    调用方法：
    data_names = ['不感兴趣', '一般感兴趣', '很感兴趣', '非常感兴趣']
    data_arrays = [
        [-10, -20, -43.8],
        [80, 65, 39.6],
        [10, 15, 16.6],
        [30, 24, 11],
    ]
    ticks_text = ['艺术设计', '绘画', '文学艺术']
    fig_title = '艺术类上不同兴趣水平学生的人数占比'
    output_name = 'stackbar_align2zero_example'
    hstackbarchart_aligned2zero1(data_arrays, data_names, ticks_text, fig_title, output_name)
    """
        
    # check input
    assert len(data_arrays)==len(data_names)
    assert len(data[0])==len(ticks_text)
    
    # plot bars
    traces = []
    
    # set color of stack bar
    # 5 colors by default
    colors = [
        'rgba(38, 24, 74, 0.8)',
        'rgba(71, 58, 131, 0.8)',
        'rgba(122, 120, 168, 0.8)',
        'rgba(164, 163, 204, 0.85)',
        'rgba(190, 192, 213, 1)', 
    ]
    # add customized color
    if isinstance(color, list):
        for item in color:
            colors.append(item)

    # plot bars
    for data_n, name, color in zip(data_arrays, data_names, colors):
        trace = go.Bar(
            x = data_n,
            y = ticks_text,
            name = name,
            marker = dict(
                color = color,
                #line=dict(color='rgb(248, 248, 249)', width=1),
            ),
            # 文本后缀及显示位置
            text = ['{}%'.format(abs(dn)) for dn in data_n],
            textposition = 'inside',         
            insidetextanchor = 'middle',   
            orientation = 'h',
            showlegend = True,
        )
        traces.append(trace)
    
    # layout config
    layout = go.Layout(
        barmode = 'relative',
        # background color config
        paper_bgcolor = 'rgba(0,0,0,0)',
        plot_bgcolor = 'rgba(255, 255, 255, 0.5)',
        showlegend = True,
        # 水平显示图例
        legend_orientation = "h",
        legend = dict(x=0.1, y=1.15, font_family='SimHei'),
        # 图例顺序
        legend_traceorder = "normal",
        
        width = 600,
        height = 450,
        xaxis = dict(
            tickmode = 'array',
            tickvals = [-50, -25, 0, 25, 50, 75,100],
            ticktext = ['50%','25%','0%','25%','50%','75%','100%'],
            showticklabels = True,
            showline = True,
            linecolor = 'rgb(127,127,127)',
            mirror = True,
            ticksuffix = "%",
            
        ),
        yaxis = dict(
            tickfont_family='SimHei',
            showticklabels = True,
            showline = True,
            mirror = True,
            linecolor = 'rgb(127,127,127)',
        ),
        margin = dict(
            l = 50,    #左侧留白大小
            r = 50,    #右侧空白大小
            t = 80,    #上方空白大小
            b = 110,   #下方空白大小
        ),
    )

    fig = go.Figure(data=traces, layout=layout)
    fig = fig_counter.add_title(fig, fig_title, y_pos=-0.19)

    if PRODUCTION_PHASE:
        img_file = os.path.join(img_dir, output_name+'.svg')
        fig.write_image(img_file, scale=1)
        display_image(img_file)
    else:
        fig.show()

In [29]:
data_names = ['不感兴趣', '一般感兴趣', '很感兴趣', '非常感兴趣']
data_arrays = [
    [-10, -20, -43.8],
    [80, 65, 39.6],
    [10, 15, 16.6],
    [30, 24, 11],
]
ticks_text = ['艺术设计', '绘画', '文学艺术']
fig_title = '艺术类上不同兴趣水平学生的人数占比'
output_name = 'stackbar_align2zero_example'
hstackbarchart_align2zero1(data_arrays, data_names, ticks_text, fig_title, output_name)

#### 堆叠柱形图 - 多组数据的分布比较

In [30]:
def hstackbarchart_align2zero2(data_arrays, data_names, ticks_text, fig_title, output_name):  
    """输入不同类别，及其对应分数，和分数对应的名称，输出叠放的直方图.
    
    调用方法：
        data_arrays = [
            [490, 495, 490],
            [480, 485, 480],
            [470, 475, 470],
            [460, 455, 465],
            [450, 430, 440],
        ]
        data_names = ['前10%', '前25%', '均值', '后10%','后25%' ]
        ticks_text = ['学校A ', '学校B ' ,'本校']
        fig_title = '本校与本区其他学校学习能力的比较'
        output_name = 'hstackbarchart_align2zero2_example'
        hstackbarchart_align2zero2(data_arrays, data_names, ticks_text, fig_title, output_name)
    """
    
    # get max ticks and min ticks along x-axis
    max_ticks =  np.max(np.array(data_arrays)) + 0.1 * (np.max(np.array(data_arrays)) -  np.min(np.array(data_arrays)))
    min_ticks =  np.min(np.array(data_arrays)) - 0.1 * (np.max(np.array(data_arrays)) -  np.min(np.array(data_arrays)))
    
    # color pattle  
    colors = [
        'rgba(130,109,186,1)',
        'rgba(159,130,206,1)', 
        'rgba(199,173,228,1)',
        'rgba(227,207,241,1)',
        'rgba(255,255,255,1)',
    ]

    fig = go.Figure()
    
    # plot bars - until the last 2 item
    for color, name, score in zip(colors[:-1], data_names[:-1], data_arrays[:-1]):
        fig.add_trace(
            go.Bar(
                x = score,
                y = ticks_text,
                name = name,
                orientation = 'h',
                marker = dict(
                    color = color,
                    line = dict(
                        color = 'rgba(255,255,255,1)',
                        width = 1,
                    ),
                ),
            )
        )

    # plot the last bar and the colorbar    
    fig.add_trace(
        go.Bar(
            x = data_arrays[-1],
            y = ticks_text,
            orientation = 'h',
            marker = dict(
                color = colors[-1],
                line = dict(color='rgb(255,255,255)', width=2),
                colorbar = dict(
                    title = "百分位数", #标题
                    titleside = 'top', #标题位置
                    thickness = 15,
                    tickmode = "array",
                    outlinecolor = 'rgb(0,0,0)', #colorbar的线条颜色
                    outlinewidth =  0,
                    tickvals = list(range(1, len(data_names)+1)),   #注释的坐标 ticklen默认为5
                    ticktext = data_names[::-1],
                    ticks = "outside",
                    tick0 = 0,
                    dtick = 1,
                ),
                colorscale = [
                    [0.00, 'rgba(227,207,241,1)'],
                    [0.25, 'rgba(227,207,241,1)'],
                    [0.25, 'rgba(199,173,228,1)'],
                    [0.5, 'rgba(199,173,228,1)'],
                    [0.5, 'rgba(159,130,206,1)'],
                    [0.75, 'rgba(159,130,206,1)'],
                    [0.75, 'rgba(130,109,186,1)'],
                    [1, 'rgba(130,109,186,1)'],
                ],
            )
        )
    )

    # plot gray background
    fig.add_trace(go.Bar(
        x = [max_ticks],
        y = [ticks_text[-1]],
        orientation = 'h',
        marker = dict(
            color = 'rgba(127,127,127,0.4)',
            line = dict(color='rgb(255,255,255)', width=0),
        ),
    ))

    # layout config
    fig.update_layout(
        barmode = 'overlay',
        xaxis = dict(
            showgrid = False,
            showline = True,
            #gridcolor = 'rgb(127,127,127)',
            linecolor = 'rgb(127,127,127)',
            showticklabels = True,
            zeroline = True,
            domain = [0.15, 1],
            range = (min_ticks, max_ticks),
            dtick = 50,
            gridcolor = 'rgb(217,217,217)',
            mirror = True,
        ),
        yaxis = dict(
            showgrid = False,
            showline = True,
            showticklabels = True,   ##不显示坐标轴
            zeroline = False,
            linecolor = 'rgb(127,127,127)',
            mirror = True,
        ),
        paper_bgcolor = 'rgba(0,0,0,0)',      ##设置绘制图形的纸张颜色
        plot_bgcolor = 'rgba(255, 255, 255, 0.5)',   ##设置x轴和y轴之间的绘图区域的颜色
        margin = dict(l=5, r=20, t=40, b=80),
        showlegend = False,
        width = 600,
        height = 500,
        bargroupgap = 0.4,     #设置相同位置条形图之间的间隙 
        font= dict(family = 'SimHei'),  #定义全局字体
        #bargap = 0.5,
    )

    # add score as annotations
    annotations = []
    #x_data的定位
    loc = [0, 2, -1]
    for i in loc:
        for y, yd in enumerate(ticks_text):  
            x = data_arrays[i][y]
            annotations.append(dict(
                xref = 'x',
                yref = 'y',  #坐标轴辅助参数，使用x、y数据数组的坐标参数作为注释字符串的坐标
                x = x,
                y = yd,
                text = int(round(x, 0)),
                font = dict(
                    family = 'SimHei',
                    size = 12,
                    color = 'rgb(0,64,64)',
                ),
                xanchor = 'center',
                yanchor = 'top',
                ax = 0,
                ay = -50,
                showarrow = True,
                arrowcolor = 'rgb(255,255,255)',
                arrowwidth = 0.1,
            ))

    # labeling the Likert scale
    fig.update_layout(annotations=annotations)
    # add title
    fig = fig_counter.add_title(fig, fig_title, y_pos=-0.1)

    if PRODUCTION_PHASE:
        img_file = os.path.join(img_dir, output_name+'.svg')
        fig.write_image(img_file, scale=1)
        display_image(img_file)
    else:
        fig.show()

In [31]:
data_arrays = [
    [490, 495, 490],
    [480, 485, 480],
    [470, 475, 470],
    [460, 455, 465],
    [450, 430, 440],
]
data_names = ['前10%', '前25%', '均值', '后10%','后25%' ]
ticks_text = ['学校A ', '学校B ' ,'本校']
fig_title = '本校与本区其他学校学习能力的比较'
output_name = 'hstackbarchart_align2zero2_example'
hstackbarchart_align2zero2(data_arrays, data_names, ticks_text, fig_title, output_name)

#### 饼图

In [32]:
def piechart(names, values, fig_title, output_name):
    """
    输入一组序列和一组匹配的标签，序列中是每个标签占比的百分比。结果以饼图展示。
    
    调用方法：
    names= ['2小时','4小时','8小时']
    values= [0.2, 0.3, 0.5]
    fig_title = '本校学生锻炼时间分布情况'
    output_name= 'pie_example'
    fig_pie(names, values, fig_title, output_name)
    """
    # check input
    assert len(names) == len(values)

    # layout config
    layout = go.Layout(
        # background color config
        paper_bgcolor = 'rgba(0,0,0,0)',
        plot_bgcolor = 'rgba(255, 255, 255, 0.5)',
        showlegend = True,
        legend = dict(font_family='SimHei'),
        width = 600,
        height = 450,
        margin = dict(
            l = 50,    ##左侧留白大小
            r = 50,    ##右侧空白大小
            t = 50,    ##上方空白大小
            b = 110,    ##下方空白大小
        ),
    )

    # plot pie chart
    fig = go.Figure(data=[go.Pie(labels=names, values=values)], layout=layout)

    fig = fig_counter.add_title(fig, fig_title, y_pos=-0.19)
    
    if PRODUCTION_PHASE:
        img_file = os.path.join(img_dir, output_name+'.svg')
        fig.write_image(img_file, scale=1)
        display_image(img_file)
    else:
        fig.show()

In [33]:
names= ['2小时','4小时','8小时']
values= [0.2, 0.3, 0.5]
fig_title = '本校学生锻炼时间分布情况'
output_name= 'pie_example'
piechart(names, values, fig_title, output_name)

#### 用方块面积表示百分比

In [34]:
def percentage_square_chart(section_names, section_data, output_name):
    _labels = ['total'] + section_names
    _parents = [''] + ['total']*len(section_names)
    _vals = [sum(section_data)] + section_data
    
    layout = go.Layout(
        # background color config
        paper_bgcolor = 'rgba(0,0,0,0)',
        plot_bgcolor = 'rgba(255, 255, 255, 0.5)',
        showlegend = False,
        width = 600,
        height = 350,
        margin = dict(
            l = 20,    ##左侧留白大小
            r = 20,    ##右侧空白大小
            t = 20,    ##上方空白大小
            b = 20,    ##下方空白大小
        ),
    )
    
    fig = go.Figure(
        go.Treemap(
            branchvalues = 'total',
            labels = _labels,
            parents = _parents,
            values = _vals,
            textinfo = 'label + value',
            outsidetextfont = {'size': 1, 'color': 'white'},
            insidetextfont = {'size': 15, 'family': 'PingFang SC'},
            marker = {"line": {"width": 2}},
            pathbar = {"visible": False},
        ),
        layout = layout,
    )
    
    if PRODUCTION_PHASE:
        img_file = os.path.join(img_dir, output_name+'.svg')
        fig.write_image(img_file, scale=1)
        display_image(img_file)
    else:
        fig.show()


In [35]:
section_names = ["类别1", "类别2", "类别3", "类别4", "类别5", "类别6", "类别7", "类别8", "类别9"]
section_data = [54, 61, 33, 7, 90, 12, 34, 23, 18]
percentage_square_chart(section_names, section_data, 'treemap_example')

#### 分布位点图

In [36]:
def mileston(fig, x, y, name, legend):
    # 输入一个Figure对象fig，和x，y坐标。在fig 对象上画散点和直线组成的里程碑棒，并添加对应的legend 和 annotation
    if name is 'mean':
        color = 'rgba(0, 204, 150, .8)'
        group = "group"
        group_name = '均值'
        y_annot = y + 0.35
        text_annot = '均值：{}'.format(x)
        if legend==1:
            showlegend = True
        else:
            showlegend = False
    else:
        color = 'royalblue'
        group = "group2"
        group_name = '百分位数'
        y_annot = y - 0.15
        text_annot = '{}:{}'.format(name,x)
        if legend == 1:
            showlegend = True
        else:
            showlegend = False

    fig.add_trace(
        go.Scatter(
            x = [x],
            y = [y+0.2],
            mode = 'markers',
            marker = dict(
                size = 12,
                color = color,
                line = dict(
                    width = 2,
                    color = 'DarkSlateGrey',
                )
            ),
            showlegend = showlegend,
            legendgroup = group,
            name = group_name,
        )  
    )
    
    fig.add_trace(
        go.Scatter(
            x = [x]*2,
            y = [y-0.01, y+0.16],
            line = dict(
                color = 'royalblue',
                width = 4
            ),
            mode = 'lines',
            showlegend = False,
        )
    )

    fig.add_annotation(
        x = x,
        y = y_annot,
        text = text_annot,
    )

    fig.update_annotations(dict(
        xref = "x",
        yref = "y",
        ax = 0,
        ay = 0,
    ))
    

def range_line(fig, max_value, y):
    # 输入一个Figure对象fig，y坐标，最大值x上限。在fig 对象上画出一条虚线表示y坐标的分数范围
    
    fig.add_trace(
        go.Scatter(
            x = list(range(0, max_value)),
            y = [y] * max_value,
            line = dict(color='rgba(190, 192, 213, 1)', width=4),
            showlegend = False,
        )
    )
              
def milestone_chart(grp_names, grp_avgs, fig_title, output_name,
                    percentiles=None, percentile_names=None):
    """
    输入不同类别的name，以及对应的平均数，百分比数，画出对应的分布位点。
    
    调用方法：
        grp_names = ['本校', '全区']
        grp_avgs = [520, 460]
        percentiles = [
            [],
            [400, 570],
        ]
        percentile_names = ['后25%', '前25%']
        fig_title = '本校学生体验美得分与全区比较'
        output_name = 'milestone_example'
        milestone_chart(grp_names, grp_avgs, fig_title, output_name,
                        percentiles, percentile_names)
    """
    
    # check data
    assert len(grp_names) == len(grp_avgs)
    if isinstance(percentiles, (list, tuple)):
        assert len(grp_names) == len(percentiles)
    
    y_data = list(range(len(grp_names)))
    
    fig = go.Figure()
    
    # get the max
    max_values = []
    if isinstance(percentiles, (list, tuple)):
        for i in range(len(percentiles)):
            if percentiles[i]:
                max_values.append(max(percentiles[i]))
            else:
                max_values.append(grp_avgs[i])
    
    # plot the milestone of mean and the line of score line 
    i = 1 
    for x, y, max_value in zip(grp_avgs, y_data, max_values):
        range_line(fig, max_value, y)
        mileston(fig, x, y, 'mean', i)
        i += i
    
    # plot the milestone of percentile
    if isinstance(percentiles, (list, tuple)): 
        i = 1
        for p, y in zip(percentiles, y_data):
            if p:
                for x, name in zip(p, percentile_names):
                    mileston(fig, x, y, name, i)
                    i += 1
    
    # layout config
    fig.update_layout(go.Layout(
        # background color config
        paper_bgcolor = 'rgba(0, 0, 0, 0)',
        plot_bgcolor = 'rgba(255, 255, 255, 0.5)',
        #showlegend = True,
        
        # 水平显示图例
        legend = dict(font_family='SimHei'),
        # 图例顺序
        legend_traceorder = "normal",
        
        xaxis = dict(
            tickfont_family = 'SimHei',
            showticklabels = True,
            showline = True,
            linecolor = 'rgb(127,127,127)',
            mirror = True,
        ),
        yaxis = dict(
            tickmode = 'array',
            tickvals = y_data,
            ticktext = grp_names,
            tickfont_size= 15,
            tickfont_family='SimHei',
            showticklabels = True,
            showline = True,
            mirror = True,
            linecolor = 'rgb(127,127,127)',
            range = (y_data[0]-0.3, y_data[-1] + 0.5)
        ),
        margin = dict(
            l = 50,    #左侧留白大小
            r = 50,    #右侧空白大小
            t = 80,    #上方空白大小
            b = 110,   #下方空白大小
        ),
      ))

    fig = fig_counter.add_title(fig, fig_title, y_pos=-0.19)

    if PRODUCTION_PHASE:
        img_file = os.path.join(img_dir, output_name+'.svg')
        fig.write_image(img_file, scale=1)
        display_image(img_file)
    else:
        fig.show()

In [37]:
grp_names = ['本校', '全区']
grp_avgs = [520, 460]
percentiles = [
    [],
    [400, 570],
]
percentile_names = ['后25%', '前25%']
fig_title = '本校学生体验美得分与全区比较'
output_name = 'milestone_example'
milestone_chart(grp_names, grp_avgs, fig_title, output_name,
                percentiles, percentile_names)

#### 玫瑰图

In [76]:
def rose_chart1(dim_names, dim_vals, dim_colors, max_val, center_word, output_name):
    # check data
    if max(dim_vals)>max_val:
        print('Warning! Input values is greater than input max value.')
        max_val = max(dim_vals)

    assert len(dim_names)==len(dim_vals)
    assert len(dim_names)==len(dim_colors)
        
    traces = []
    for ind in range(len(dim_vals)):
        bar_val = [0]*ind + [dim_vals[ind]] + [0]*(len(dim_vals)-1-ind)
        label_val = [0]*ind + [0.5] + [0]*(len(dim_vals)-1-ind)
        # plot bar
        trace = {
            "name": dim_names[ind],
            "r": bar_val, 
            "type": "barpolar", 
            "theta": dim_names, 
            "hoverinfo": "r+theta",
            "opacity": 0.9,
            "marker_color": dim_colors,
            "showlegend": False,
        }
        traces.append(trace)
        # plot label
        trace = {
            "name": dim_names[ind],
            "r": label_val, 
            "type": "barpolar", 
            "theta": dim_names, 
            #"hoverinfo": "r+theta",
            "opacity": 1,
            "base": max_val+0.5,
            "marker_color": dim_colors,
            "showlegend": False,
        }
        traces.append(trace)

    layout = {
        "font": {
            "size": 16, 
            "family": "SimHei"
        }, 
        "polar": {
            "hole": 0.3,  ## 空洞大小
            "bargap": 0.02,  ## % del total que no se toma en cada area
            "radialaxis": {
                "visible": True,
                "type": "linear", 
                "title": {"text": "<br>"}, 
                "tickmode": "array",
                "tickvals": list(range(max_val+1)),
                'ticktext': [str(e) for e in range(max_val+1)],
                "tickfont": {"size": 14},
                #"autorange": True, 
                "gridwidth": 2,  # ancho de las lineas radiales
                "linewidth": 0,
            }, 
            "angularaxis": {
                'visible': True,
                "type": "category", 
                "ticklen": 12, 
                "tickmode": "auto",
                "tickfont": {"color": "rgb(22, 22, 22)"}, 
                "direction": "counterclockwise", 
                "showline": False,
                "showgrid": False,
                "gridwidth": 1, 
                "tickwidth": 1, 
                "tickprefix": "",
                "showticklabels": True,
            },
        }, 
        #"title": {"text": "标题"}, 
        "xaxis": {
            "range": [-1, 6], 
            "autorange": True
        }, 
        "yaxis": {
            "range": [-1, 4], 
            "autorange": True
        }, 
#         "legend": {
#             "x": 1, 
#             "y": 1, 
#             "font": {
#                 "size": 14, 
#                 "family": "Roboto"
#             }, 
#             "title": {
#                 "font": {
#                     "size": 16, 
#                     "color": "rgb(67, 36, 167)", 
#                     "family": "Overpass"
#                 }, 
#                 "text": "Variables"
#             }, 
#             "xanchor": "auto", 
#             "itemsizing": "trace", 
#             "traceorder": "normal", 
#             "borderwidth": 0, 
#             "orientation": "v"
#         }, 
        "modebar": {
            "color": "rgba(68, 68, 68, 0.3)", 
            "bgcolor": "rgba(118, 6, 6, 0.5)", 
            "orientation": "h"
        }, 
        "autosize": True, 
        "template": {
            "data": {
                "bar": [{
                    "type": "bar", 
                    "marker": {"colorbar": {
                        "len": 0.2, 
                        "ticks": "inside", 
                        "ticklen": 6, 
                        "tickcolor": "rgb(237,237,237)", 
                        "outlinewidth": 0,
                    }}
                }], 
                "carpet": [{
                    "type": "carpet", 
                    "aaxis": {
                        "gridcolor": "white", 
                        "linecolor": "white", 
                        "endlinecolor": "rgb(51,51,51)", 
                        "minorgridcolor": "white", 
                        "startlinecolor": "rgb(51,51,51)",
                    }, 
                    "baxis": {
                        "gridcolor": "white", 
                        "linecolor": "white", 
                        "endlinecolor": "rgb(51,51,51)", 
                        "minorgridcolor": "white", 
                        "startlinecolor": "rgb(51,51,51)"
                    }
                }], 
            }, 
            "layout": {
                "geo": {
                    "bgcolor": "white", 
                    "showland": True, 
                    "lakecolor": "white", 
                    "landcolor": "rgb(237,237,237)", 
                    "showlakes": True, 
                    "subunitcolor": "white"
                },
                "font": {"color": "rgb(51,51,51)", "family": "SimHei"}, 
                "polar": {
                    "bgcolor": "rgb(237,237,237)", 
                    "radialaxis": {
                        "ticks": "outside", 
                        "showgrid": True, 
                        "gridcolor": "white", 
                        "linecolor": "white", 
                        "tickcolor": "rgb(51,51,51)"
                    },
                },   
                "hovermode": "closest", 
                "plot_bgcolor": "rgb(237,237,237)", 
                "paper_bgcolor": "white", 
                "shapedefaults": {
                    "line": {"width": 0}, 
                    "opacity": 0.3, 
                    "fillcolor": "black",
                }, 
                "annotationdefaults": {
                    "arrowhead": 0, 
                    "arrowwidth": 1,
                }
            }
        }, 
        "radialaxis": {"ticksuffix": "%"}, 
        "separators": ", ", 
        "orientation": 0
    }    

    fig = go.Figure(traces, layout)
    
    fig.add_annotation(
        text=center_word,
        xref="paper",
        yref="paper",
        x=0.5,
        y=0.5,
        showarrow=False,
        font = dict(
            family = 'SimHei',
            size = 15,
            color = 'rgb(8, 208, 206)',
        ),
    )

    if PRODUCTION_PHASE:
        img_file = os.path.join(img_dir, output_name+'.svg')
        fig.write_image(img_file, scale=1)
        display_image(img_file)
    else:
        fig.show()

In [77]:
dim_names = ['维度1', '维度2', '维度3', '维度4', '维度5', '维度6']
dim_colors = ["#E4FF87", '#709BFF', '#709BFF', '#FFAA70', '#FFAA70', '#FFDF70']
dim_vals = [1, 2, 3, 4, 2, 1]
max_val = 4
#fig_title = '图片标题'
center_word = '商业<br>素养'
output_name = 'rose_chart_example'

rose_chart1(dim_names, dim_vals, dim_colors, max_val, center_word, output_name)

### 图表示例 - 较复杂的复合图

有时一般的数据图表无法满足直观和美观等要求，因此需要通过美术设计来制作比较复杂的图表。这里我们可以使用*svgutils*工具包对多个图片进行组合，以及添加图形、文字等元素，进一步丰富图片的内容。

可参考下面这个例子制作图片。

#### 综合能力得分

In [38]:
# general report
general_score = 356
general_percentage = 59
grade_stage = [0, 10, 25, 75, 90]
general_grade = np.sum(np.array(grade_stage)<general_percentage)  

# plot pie
rot = 0
color_maps = ['rgb(255,187,26)', 'rgb(181,229,255)']
label = ['超过','未超过']
pie_value = [general_percentage, 100-general_percentage]
sub_per = str(int(general_percentage))
if general_percentage< 50:
    rot = 360*general_percentage/100
else:
    rot = 0

layout = go.Layout(
    paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(0,0,0,0)'
)
fig = go.Figure(data=[go.Pie(labels=label, 
                             values=pie_value,
                             hole=.8,
                             marker_colors=color_maps,
                             textinfo='none',
                             rotation = rot
                             )],
                layout=layout)
fig.update_layout(showlegend=False,
                  width=350,
                  height=350,
                 )
img_file = os.path.join(img_dir, 'general_pie_tmp.svg')
fig.write_image(img_file, scale=1)
#display_image(img_file)

heart_loc = [0, 93, 138, 183, 227, 274]
# general plot
compose_fig = sc.Figure(490, 150,
                  sc.SVG(os.path.join(material_dir, 'score_background.svg')),
                  sc.SVG(os.path.join(material_dir, 'xin3.svg')).scale(1.0).move(heart_loc[general_grade], 70),
                  sc.SVG(os.path.join(img_dir, 'general_pie_tmp.svg')).scale(0.8).move(275, -70),
                  sc.Text(str(int(general_percentage))+'%', 392, 120, size=20, font='PingFang SC', weight='bold', color='rgb(255, 187, 26)'),
                  sc.Text(str(int(general_score)), 153, 57, size=25, font='PingFang SC', weight='bold', color='rgb(245, 73, 70)'),
                  #sc.Grid(20, 20),
                  )

compose_fig = fig_counter.add_title(compose_fig, '一张合成的复杂图')

# you can append this part into your code
if PRODUCTION_PHASE:
    compose_fig.save(os.path.join(img_dir, 'general_report_final.svg'))
    display_image(os.path.join(img_dir, 'general_report_final.svg'))
else:
    display(compose_fig)


#### 利用$\pi$图展示得分

In [39]:
def ability_fig(ability_name, sub_s, sub_l, sub_per):
    """
    Ability name:  ability_name = 'reading'
    Score:         sub_s = 598
    Level:         sub_l = 13
    Percentage:     sub_per = 89
    """

    #-- pie
    # color config
    color_maps = ['rgb(6, 208, 206)', 'rgb(166, 253, 253)']
    # label
    label = ['超过','未超过']
    
    #旋转角度 小于50%的时，角度需要调整
    if sub_per< 50:
        rot = 360*sub_per/100
    else:
        rot = 0

    if not isinstance(sub_per, int):
        pie_value = [0,100]
        sub_per = '——'
    else:
        pie_value = [sub_per,100-sub_per]
        sub_per = str(int(sub_per))

    layout = go.Layout(
        paper_bgcolor='rgba(0,0,0,0)',
        plot_bgcolor='rgba(0,0,0,0)'
    )

    # plot  
    trace_pie = go.Pie(
        labels= label, 
        values= pie_value, 
        hole = .8,
        marker_colors = color_maps,
        textinfo = 'none',
        rotation = rot
    )

    fig = go.Figure(data=[trace_pie],
                    layout=layout)
    fig.update_layout(showlegend=False,
                      width=350,
                      height=350,
                     )

    img_file = os.path.join(img_dir, ability_name+'_img.svg')
    fig.write_image(img_file, scale=1)

    #-- ability plot
    if not isinstance(sub_s, int):
        compose_fig = sc.Figure(150, 190,
                      sc.SVG(os.path.join(material_dir, 'small_score_background.svg')),
                      sc.SVG(os.path.join(img_dir, ability_name+'_img.svg')).scale(0.6).move(-35, 0),
                      sc.Text('—%',  50, 118, size=16, weight='bold', color='rgb(6, 208, 206)'),
                      sc.Text('——',55, 45, size=14, weight='bold', color='rgb(6, 208, 206)'),    
                      sc.Text('——', 55, 12, size=14, weight='bold', color='rgb(6, 2088, 206)'),
                      #sc.Grid(20, 20),
                      )
    else:
        compose_fig = sc.Figure(150, 190,
                      sc.SVG(os.path.join(material_dir, 'small_score_background.svg')),
                      sc.SVG(os.path.join(img_dir, ability_name+'_img.svg')).scale(0.6).move(-35, 0),
                      sc.Text(str(int(sub_per))+'%', 50, 118, size=16, weight='bold', color='rgb(6, 208, 206)'),
                      sc.Text(str(int(sub_l)), 55, 45, size=14, weight='bold', color='rgb(6, 208, 206)'),
                      sc.Text(str(int(sub_s)), 55, 12, size=14, weight='bold', color='rgb(6, 208, 206)'),
                      #sc.Grid(20, 20),
                      )

    if PRODUCTION_PHASE:
        compose_fig.save(os.path.join(img_dir, ability_name+'_img_final.svg'))
        display_image(os.path.join(img_dir, ability_name+'_img_final.svg'))
    else:
        display(compose_fig)

In [40]:
ability_fig('reading', 598, 13, 89)

## 生成PDF格式的报告文档

在使用Jupyter Notebook完成报告内容后，可以使用**SmartyDoc**提供的处理流程，将ipynb格式的报告文本转换成PDF格式。

### PDF文档转换流程

在**SmartyDoc**的框架下，Jupyter Notebook的文档首先转换为html网页，之后配合css文件（即样式设计文件），转换为最终的PDF文件。

### 需要准备哪些文件

1. 编写好的Jupyter Notebook文件（ipynb文件）。

2. ipynb文件转换为html文件时所需的转换模版，具体地，该文件保存于**SmartyDoc**目录下的`templates`目录内，您可以将该目录直接拷贝到与ipynb文件同级的目录内。用户不需要对`templates`目录下的文件做任何修改。

3. css文件，用于描述文字、图片等内容的显示样式

4. 文档内要插入的图片等文件

### 使用printview2插件实现PDF文档生成

为了方便用户在Jupyter的操作界面下方便地完成PDF文件生成的操作，我们专门开发了一个Jupyter的扩展插件**printview2**，为用户提供图形化的操作界面。用户可以在Jupyter的Nbextensions扩展管理页内，通过勾选框启用扩展。并在勾选框下方的页面设置PDF文件生成所需的参数。具体节目如下图。

In [41]:
display_image(os.path.join(material_dir, 'jupyter_printview2_config.png'))

在准备好格式转换所需的文件后，根据要生成的PDF文件的样式，对**printview2**的参数进行设置，具体包括：

1. `add index before headings`：若勾选此项，则系统会自动在章节名前面加上`1.`、`2.`、`2.1`这样的序列号；

2. `the document has a foreword section before TOC`：若勾选此项，则正文第一个`h2`所包含的内容会以*前言*的形势出现在目录之前，且该部分内容不计入页码；

3. `maximum level of nested sections to display on the TOC`：在目录中要呈现的目录级别，如果设置为`1`，则只显示`h2`所表示的章名；

4. `display an article before each article, or not`：对各章的起始页样式进行设置，具体包括三种形式，分别为`none`、`toc`和`intro`，`none`表示章节首页不做特殊处理，`toc`表示章节首页包含章名和该章内的二级标题列表，`intro`表示章节首页包含章名和该章的内容简介。内容简介部分须在正文中以标签`<article_summary></article_summary>`括起来；

5. `nbconvert options`：ipynb文件转换为html文件时的参数设置，一般不需要用户做修改；

6. `open a new tab in the brower to display nbconvert output`：做格式转换完成后自动在浏览器打开一个新的标签页进行显示，Chrome浏览器查看PDF文件会出现问题，请直接下载文件查看转换结果；

7. `export to pdf format`：若勾选此项，生成PDF文件，否则只生成html网页文件。

完成以上设置后，在浏览器中切换到要进行格式转换的notebook的页面（注意：如果该页面在上述设置前已经打开，请对该页面进行刷新，以更新参数），点击工具栏中的打印图标，自动开始文件格式转换。

### 使用命令行操作实现PDF文档生成

待补充，敬请期待 ...