In [106]:
# 所有库的import请在此cell进行！
import pandas
import pickle
import numpy
import os
from pyecharts import options as opts
from pyecharts.charts import Bar
from pyecharts.charts import Radar
from pyecharts.charts import Pie
from pyecharts.charts import Line
from pyecharts.commons.utils import JsCode
from scipy.stats import pearsonr
from scipy.stats import chi2_contingency

print('Loading dataset...')

Loading dataset...


In [107]:
# 清洗数据
if os.path.exists('./cooked.csv'):
    data = pandas.read_csv('./cooked.csv')
    data = data.drop('Unnamed: 0', axis=1)
    with open('./COLS_NAME.dat', 'rb') as f:
        COLS_NAME = pickle.load(f)
    data.columns = [idx for idx in range(len(COLS_NAME))]
    print('Data loading from cooked.csv.')
else:
    dataset = pandas.read_csv('dataset.csv')
    print(f'Total data: {dataset.shape[0]}')
    # 洗掉不必要的列
    data = dataset.drop(
        [
            'Unnamed: 0',
            '提交答卷时间',
            '所用时间',
            '来源',
            '来源详情',
            '来自IP',
            '1、请问您的学校所在地区和类别：'
        ],
        axis=1
    )

    # 列名含义，接下来使用下标访问
    # 下标为整型int64 不是str!
    COLS_NAME = data.columns.values.tolist()
    data.columns = [idx for idx in range(len(COLS_NAME))]

    # 地区有两个-3的异常值 清洗
    data = data[data[91] != -3]

    # 14题数据为1or2 因此全体-1 统一处理
    data.iloc[:,53:58] = data.iloc[:,53:58] - 1

    # 线上学习时间有错误值及空值 0~15
    data = data[data[22].isin([x for x in range(16)])].astype(int)
    print(f'Data cleaning completed')

NUMS = data.shape[0] # 样本总数 用于单选题！不可用于多选题统计数据！！
print(f'Total: {NUMS}.')

Data loading from cooked.csv.
Total: 750841.


In [108]:
# 导出数据csv
if not os.path.exists('./cooked.csv'):
    data.to_csv('./cooked.csv')
    with open('./COLS_NAME.dat', 'wb') as f:
        pickle.dump(COLS_NAME, f)
    print('cooked.csv saved.')
print('cooked.csv has already existed.')

cooked.csv has already existed.


In [109]:
# 创建雷达图包装函数
def create_radar(schema, data, minv, maxv, title=''):
    radar = (
        Radar()
        .set_colors(["#4587E7"])
        .add_schema(
            schema=schema,
            shape="circle",
            center=["50%", "50%"],
            radius="80%",
            angleaxis_opts=opts.AngleAxisOpts(
                min_=0,
                max_=360,
                is_clockwise=False,
                interval=5,
                axistick_opts=opts.AxisTickOpts(is_show=False),
                axislabel_opts=opts.LabelOpts(is_show=False),
                axisline_opts=opts.AxisLineOpts(is_show=False),
                splitline_opts=opts.SplitLineOpts(is_show=False),
            ),
            radiusaxis_opts=opts.RadiusAxisOpts(
                min_=minv,
                max_=maxv,
                interval=0.1,
                splitarea_opts=opts.SplitAreaOpts(
                    is_show=True, areastyle_opts=opts.AreaStyleOpts(opacity=1)
                ),
            ),
            polar_opts=opts.PolarOpts(),
            splitarea_opt=opts.SplitAreaOpts(is_show=False),
            splitline_opt=opts.SplitLineOpts(is_show=False),
        )
        .add(
            series_name="",
            data=data,
            areastyle_opts=opts.AreaStyleOpts(opacity=0.1),
            linestyle_opts=opts.LineStyleOpts(width=1),
        )
        .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
        .set_global_opts(
            title_opts=opts.TitleOpts(title=title),
            legend_opts=opts.LegendOpts()
        )
    )
    return radar

# 创建柱状图包装函数
def create_bar(keys, values, title='', label='', isRotated=False, isPercent=False):
    assert isinstance(values, list)
    ROTATE_OPT=None
    PERCENT_OPT=None
    if isRotated:
        ROTATE_OPT=opts.AxisOpts(
            axislabel_opts=opts.LabelOpts(rotate=30)
        )
    if isPercent:
        PERCENT_OPT=opts.LabelOpts(
            position="top",
            formatter=JsCode(
                "function(x){return Number(x.data*100).toFixed() + '%';}"
            ),
        )
    bar = (
        Bar()
        .add_xaxis(keys)
        .add_yaxis(label, values)
        .set_global_opts(
            xaxis_opts=ROTATE_OPT,
            title_opts=opts.TitleOpts(title=title),
            legend_opts=opts.LegendOpts(is_show=True)
        )
        .set_series_opts(
            label_opts=PERCENT_OPT
        )
    )
    return bar

# 创建饼状图包装函数
def create_pie(values, title=''):
    pie = (
        Pie()
        .add(
            "",
            values,
            radius=["40%", "55%"],
            label_opts=opts.LabelOpts(
                position="outside",
                formatter="{b|{b}: }{per|{d}%}  ",
                background_color="#eee",
                border_color="#aaa",
                border_width=1,
                border_radius=4,
                rich={
                    "a": {"color": "#999", "lineHeight": 22, "align": "center"},
                    "abg": {
                        "backgroundColor": "#e3e3e3",
                        "width": "100%",
                        "align": "right",
                        "height": 22,
                        "borderRadius": [4, 4, 0, 0],
                    },
                    "hr": {
                        "borderColor": "#aaa",
                        "width": "100%",
                        "borderWidth": 0.5,
                        "height": 0,
                    },
                    "b": {"fontSize": 16, "lineHeight": 33},
                    "per": {
                        "color": "#eee",
                        "backgroundColor": "#334455",
                        "padding": [2, 4],
                        "borderRadius": 2,
                    },
                },
            )
        )
        .set_global_opts(
            title_opts=opts.TitleOpts(title=title)
        )
    )
    return pie

# 创建折线图包装函数
def create_line(keys, values, title='', label='', isSmooth=False):
    line = (
        Line()
        .add_xaxis(keys)
        .add_yaxis(label, values, is_smooth=isSmooth)
        .set_series_opts(
            areastyle_opts=opts.AreaStyleOpts(opacity=0.5),
            label_opts=opts.LabelOpts(is_show=True),
        )
        .set_global_opts(
            title_opts=opts.TitleOpts(title=title),
            xaxis_opts=opts.AxisOpts(
                axistick_opts=opts.AxisTickOpts(is_align_with_label=True),
                is_scale=False,
                boundary_gap=False,
            ),
        )
    )
    return line

print('create_xxx func defined!')

create_xxx func defined!


In [110]:
# 分析参加调查的学生的年级分布（输出柱状图）
keys = ["一年级", "二年级", "三年级", "四年级", "五年级", "六年级", "初一", "初二", "初三", "高一", "高二", "高三"]
values = [0 for _ in range(12)]
for _, grade in data[1].items():
    values[grade-1] = values[grade-1] + 1

bar = create_bar(keys, values, "学生的年级分布", label="问卷人数")
bar.render_notebook()

In [111]:
# 统计学生使用设备情况
keys = ['电视', '台式电脑', '平板', '手机', '音频', '纸质学习资料']
res = [data[idx].value_counts()[1] for idx in range(2,8)]
res = numpy.array(res)
res = res / res.sum()
res = [list(x) for x in zip(keys, res)]

pie = create_pie(res, "学生上课设备使用情况统计")
pie.render_notebook()

In [112]:
# 统计平台功能使用情况
keys = ['回看课程视频', '作业提交', '随堂测试', '视频会议', '作业批改反馈', '课堂发言', '班级通知', '班级圈', '优秀作业查看', '学科竞赛游戏', '屏幕共享', '弹幕', '讨论']
values = [int(data[idx].value_counts()[1]) for idx in range(8,20)]


bar = create_bar(keys, values, label='使用人数', title='平台功能使用情况', isRotated=True)
bar.render_notebook()

In [113]:

keys = ['20分钟', '20～30分钟', '30～45分钟', '45分钟以上']
res = [data[21].value_counts()[idx] for idx in range(1,5)]
res = numpy.array(res)
res = res / res.sum()
res = [list(x) for x in zip(keys, res)]

pie = create_pie(res, '学生上课时长情况统计')
pie.render_notebook()

In [114]:
# 每天在线学习时间分析
keys = [str(hour)+'小时' for hour in range(0,16)]
res = [int(data[22].value_counts()[idx]) for idx in range(0,16)]

bar = create_bar(
    keys=keys,
    values=res,
    title='学生每天在线学习时间统计',
    label='人数',
    isRotated=True
)
bar.render_notebook()

In [115]:
keys = ["能","监督下能","有时能,有时不能","基本不能","不适应"]
res = [data[23].value_counts()[idx] for idx in range(1,6)]
res = numpy.array(res)
res = res / res.sum()
res = [list(x) for x in zip(keys,res)]

pie = create_pie(res, '学生状态统计')
pie.render_notebook()

In [116]:
keys = ["完全不需要","有时需要","完全需要"]
res = [data[24].value_counts()[idx] for idx in range(1,4)]
res = numpy.array(res)
res = res / res.sum()
res = [list(x) for x in zip(keys, res)]

pie = create_pie(res, '学习需要家人陪伴统计')
pie.render_notebook()

In [117]:
keys = ["直播","录播","资源包","电视课堂","直播+录播","直播+资源包","录播+资源包","直播+录播+资源包","录播+资源包+线上辅导答疑"]
res = [int(data[idx].value_counts()[1]) for idx in range(25,34)]
res = numpy.array(res)
res = res / res.sum()

bar = create_bar(
    keys=keys,
    values=list(res),
    title='统计学生喜欢的课堂组织形式',
    isRotated=True,
    isPercent=True
)
bar.render_notebook()

In [118]:
keys = ["学科课程新课","学科课程复习","音美体劳教育","专题教育"]
res = [data[idx].value_counts()[1] for idx in range(34,38)]
res = numpy.array(res)
res = res / res.sum()

bar = create_bar(
    keys=keys,
    values=list(res),
    title='统计学生对线上课程内容的喜爱情况',
    isRotated=True,
    isPercent=True
)
bar.render_notebook()

In [119]:
keys = ["查阅线上资源","视频回放","教师线上答疑","社交软件咨询教师","同学交流","暂时放下"]
res = [data[idx].value_counts()[1] for idx in range(38,44)]
res = numpy.array(res)
res = res / res.sum()

bar = create_bar(
    keys,
    list(res),
    title='统计学生通过哪些方法解决未掌握知识点',
    isRotated=True,
    isPercent=True
)
bar.render_notebook()

In [120]:
keys = ["不回答","偶尔参与回答","大多数情况下能回答","积极发言","没有问答环节"]
res = [data[idx].value_counts()[1] for idx in range(44,49)]
res = numpy.array(res)
res = res / res.sum()
res = [list(x) for x in zip(keys, res)]

pie = create_pie(res, '统计学生线上互动频率')
pie.render_notebook()

In [121]:
keys = ["网络卡顿","线上软件缺陷","与老师沟通不便", "作业不合理","课程质量欠佳","眼睛疲劳","软件太多容易混淆","环境干扰"]
res = [data[idx].value_counts()[1] for idx in range(49,57)]
res = numpy.array(res)
res = res / res.sum()

bar = create_bar(
    keys,
    list(res),
    title='线上学习问题统计',
    isRotated=True,
    isPercent=True
)
bar.render_notebook()

In [122]:
# 培养能力
res = [data[idx].value_counts()[1] for idx in range(57,63)]
res = numpy.array(res)
res = res / res.sum()
res = [{"value": res.tolist(), "name": "培养能力"}]

MAX_VALUE = 0.35
radar_schema = [
    {"name": "自主学习能力", "max": MAX_VALUE},
    {"name": "自控能力", "max": MAX_VALUE},
    {"name": "数字化资源的利用能力", "max": MAX_VALUE},
    {"name": "表达沟通", "max": MAX_VALUE},
    {"name": "生活实践", "max": MAX_VALUE},
    {"name": "其他", "max": MAX_VALUE},
]

radar = create_radar(
    schema=radar_schema,
    data=res,
    minv=0,
    maxv=MAX_VALUE,
    title="统计学生线上学习培养能力"
)

radar.render_notebook()

In [123]:
keys = ["直播方式","录播方式","教师教学态度","教师教学水平","资源内容","线上学习平台","总体满意度"]

level1 = [
    {
        "value": data[idx].value_counts()[1] / NUMS,
        "percent": data[idx].value_counts()[1] / NUMS
    } 
    for idx in range(64,71)
]

level2 = [
    {
        "value":data[idx].value_counts()[2] / NUMS,
        "percent":data[idx].value_counts()[2] / NUMS
    }
    for idx in range(64,71)
]

level3 = [
    {
        "value":data[idx].value_counts()[3] / NUMS,
        "percent":data[idx].value_counts()[3] / NUMS
    } 
    for idx in range(64,71)
]

level4 = [
    {
        "value":data[idx].value_counts()[4] / NUMS,
        "percent":data[idx].value_counts()[4] / NUMS
    } 
    for idx in range(64,71)
]

bar = (
    Bar()
    .add_xaxis(keys)
    .add_yaxis("非常满意", level1, stack="stack1",category_gap="25%")
    .add_yaxis("满意",level2, stack="stack1", category_gap="25%")
    .add_yaxis("一般",level3, stack="stack1", category_gap="25%")
    .add_yaxis("不满意",level4, stack="stack1", category_gap="25%")
    .set_series_opts(
        label_opts=opts.LabelOpts(
            position="right",
            formatter=JsCode(
                "function(x){return Number(x.data.percent * 100).toFixed() + '%';}"
            ),
        )
    )
    .set_global_opts(
        title_opts=opts.TitleOpts(title="线上学习满意度统计"),
        xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=30)),
    )
)

bar.render_notebook()

In [124]:
keys = ["完全线下","线下为主，线上为辅","线上线下混合","完全线上"]
res = [data[90].value_counts()[idx]/NUMS for idx in range(1,5)]
res = [list(x) for x in zip(keys, res)]

pie = create_pie(res, '统计学生更喜欢哪种学习模式')
pie.render_notebook()

In [125]:
res = [data[idx].value_counts()[1] for idx in range(71,78)]
res = numpy.array(res)
res = res / res.sum()
res = [{"value": res.tolist(), "name": "线上学习优势"}]

MAX_VALUE = 0.35
radar_schema = [
    {"name": "更多名师优质课", "max": MAX_VALUE},
    {"name": "学习效果更好", "max": MAX_VALUE},
    {"name": "减轻学习负担", "max": MAX_VALUE},
    {"name": "增强学习自主性", "max": MAX_VALUE},
    {"name": "随时随地学习", "max": MAX_VALUE},
    {"name": "其他", "max": MAX_VALUE},
]

radar = create_radar(
    schema=radar_schema,
    data=res,
    minv=0,
    maxv=MAX_VALUE,
    title="线上学习优势分析"
)

radar.render_notebook()

In [126]:
keys = ["一年级", "二年级", "三年级", "四年级", "五年级", "六年级", "初一", "初二", "初三", "高一", "高二", "高三"]

counts = [0 for _ in range(12)] # 年级对应人数
values = [0 for _ in range(12)] # 年级学习时长总和

# 求平均
df = data.iloc[:,[1,22]]
df.columns = ['年级', '学习时长']
for grade in range(1,13):
    srl = df[df['年级'] == grade]['学习时长'] # 对应年级的学习时长列
    values[grade-1] = srl.to_numpy().sum() / srl.size

# 保留两位小数
values = numpy.round(values, 1)

line = create_line(
    keys=keys,
    values=values,
    title='学习时长和年龄关系分析',
    label='学习时长',
)
line.render_notebook()