In [None]:
import numpy as np
import pandas as pd
import warnings
import datetime as dt
import os

import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
%matplotlib inline

# from pandas.core.common import SettingWithCopyWarning
# warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)

# global dict and functions
# line_style_dict = {
#     '国网电动': '-',
#     '南网电动': '--',
#     '特来电': '-',
#     '星星充电': '-',
#     '阿维塔': '-',
# }

# def get_line_style(k):
#     return line_style_dict.get(k, '--')

color_style_dict = {
    '国网电动': '#0D8C8F',
    '南网电动': '#00367A',
    '特来电': '#00A6CA',
    '星星充电': '#F0830C',
    '阿维塔': '#000000',
}

def get_color_style(key):
    return color_style_dict.get(key, '#000000')

def get_weekend_index(pd_date_range):
    res = []
    for idx, d in enumerate(pd_date_range):
        if d.weekday() in (5, 6):
            res.append(idx)
    return res

# files and date range
toc_order_file = r'D:\自动生成报表\充电桩_战略_公共充电订单(C端).xlsx'
TODAY_DATE = dt.datetime.now().date()
FROM_DATE = dt.date(2023, 3, 1)
TO_DATE = TODAY_DATE + pd.Timedelta(-1, 'D')
# TO_DATE = dt.datetime.strptime('2023-03-13', '%Y-%m-%d')

pd_date_range = pd.date_range(start=FROM_DATE, end=TO_DATE, freq='D')
weekend_index = get_weekend_index(pd_date_range)

output_dir = r'D:\自动生成报表\%s' % dt.date.today()
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# toc order define
toc_order_column_names = [
    "充电订单号",
    "充电完成状态",
    "用户编码",
    "用户昵称",
    "手机号【加密】",
    "运营商名称",
    "充电站名称",
    "充电枪编号",
    "订单电量(度)",
    "订单总金额(元)",
    "订单电费(元)",
    "订单服务费(元)",
    "实收金额",
    "点数卡券id",
    "点数卡对外展示名称",
    "点数卡名称",
    "点数卡抵扣金额（元）",
    "点数卡抵扣点数",
    "异常代码/原因",
    "订单创建日期",
    "充电完成日期",
    "支付完成日期",
    "订单创建时间",
    "充电完成时间",
    "支付完成时间",
    "运营商id",
    "充电站id",
]

toc_order_data_types = dict.fromkeys(toc_order_column_names, np.str0)
toc_order_data_types["订单电量(度)"] = np.float32
toc_order_data_types["订单总金额(元)"] = np.float32
toc_order_data_types["订单服务费(元)"] = np.float32
toc_order_data_types["订单电费(元)"] = np.float32
toc_order_data_types["实收金额"] = np.float32
toc_order_data_types["点数卡抵扣金额（元）"] = np.float32
toc_order_data_types["点数卡抵扣点数"] = np.float32
toc_order_data_types["订单创建日期"] = np.datetime64
toc_order_data_types["充电完成日期"] = np.datetime64
toc_order_data_types["支付完成日期"] = np.datetime64
toc_order_data_types["订单创建时间"] = np.datetime64
toc_order_data_types["充电完成时间"] = np.datetime64
toc_order_data_types["支付完成时间"] = np.datetime64

In [None]:
# 读取源数据，预处理
df_toc_order = pd.read_excel(toc_order_file, names=toc_order_column_names, dtype=toc_order_data_types, engine='openpyxl')

# nan check
# df_toc_order.isna().any()

# nan字段补零
df_toc_order["实收金额"].fillna(0, inplace=True)
df_toc_order["点数卡抵扣金额（元）"].fillna(0, inplace=True)
df_toc_order["点数卡抵扣点数"].fillna(0, inplace=True)
# df_toc_order.describe()

# 去除电量为零
df_target = df_toc_order.copy()
df_target.drop(index=df_target[df_target["订单电量(度)"] == 0].index, inplace=True)
# df_target.describe()

# 分析数据的时间范围，[FROM_DATE, TO_DATE]
df_target = df_target[(df_target["充电完成日期"] >= pd.to_datetime(FROM_DATE)) & (df_target["充电完成日期"] <= pd.to_datetime(TO_DATE))]

In [None]:
plt.figure(figsize=(7 * 4, 6))

# 订单电量及总金额频度分布
df_copy = df_target[["订单电量(度)", "订单总金额(元)"]].copy()
df_copy.loc[:, "度电单价(元)"] = df_copy.loc[:, "订单总金额(元)"] / df_copy.loc[:, "订单电量(度)"]

ax = plt.subplot(1, 4, 1)
ax.hist(df_copy["订单电量(度)"], bins=50)
mean = df_copy["订单电量(度)"].mean()
ax.axvline(x=mean, color='green', linestyle='--')
ax.set_xlabel("订单电量(度)")
ax.set_ylabel("累计数(个)")
ax.set_title("订单电量分布\nmean = %.2f元" % mean)

ax = plt.subplot(1, 4, 2)
ax.hist(df_copy["订单总金额(元)"], bins=50)
mean = df_copy["订单总金额(元)"].mean()
ax.axvline(x=mean, color='green', linestyle='--')
ax.set_xlabel("订单总金额(元)")
ax.set_ylabel("累计数(个)")
ax.set_title("订单总金额分布\nmean = %.2f元" % mean)

ax = plt.subplot(1, 4, 3)
ax.hist(df_copy["度电单价(元)"], bins=50)
mean = df_copy["度电单价(元)"].mean()
ax.axvline(x=mean, color='green', linestyle='--')
ax.set_xlabel("度电单价(元)")
ax.set_ylabel("累计数(个)")
ax.set_title("度电单价分布\nmean = %.3f元" % mean)

# 用户充电习惯（时段）
df_copy = df_target[df_target["充电完成时间"] > df_target["订单创建时间"]][["充电完成时间"]].copy()
df_copy.loc[:, "充电完成时段"] = df_copy.loc[:, "充电完成时间"].dt.hour

ax = plt.subplot(1, 4, 4)
ax.hist(df_copy["充电完成时段"], bins=24, edgecolor='w')
# mean = df_copy["充电完成时段"].mean()
# ax.axvline(x=mean, color='green', linestyle='--')
ax.set_xticks(np.linspace(0, 23, 24))
ax.set_xlabel("充电完成时段(点)")
ax.set_ylabel("累计数(个)")
ax.set_title("用户充电习惯(时段)分布")

plt.savefig(os.path.join(output_dir, '11.数据分布.png'), dpi=200, bbox_inches='tight')
plt.show()

In [None]:
# 按【充电完成日期】分组统计
groups = df_target.groupby("充电完成日期", as_index=False)

# 电费/服务费
df_groups = groups[[
    "订单总金额(元)", 
    "订单电费(元)", 
    "订单服务费(元)", 
    ]].sum()

# 补空缺日期
idx = pd.Index(pd_date_range, name="充电完成日期")
df = df_groups.set_index("充电完成日期").reindex(idx, fill_value=0).reset_index()

_, ax = plt.subplots(figsize=(20, 4))
ax.plot(df["充电完成日期"], df["订单总金额(元)"], label='订单总金额(元)')
ax.plot(df["充电完成日期"], df["订单电费(元)"], label='订单电费(元)')
ax.plot(df["充电完成日期"], df["订单服务费(元)"], label='订单服务费(元)')
ax.set_xlabel("充电完成日期")
ax.set_ylabel("金额(元)")
ax.set_xticks(pd_date_range)
for idx in weekend_index:
    ax.get_xticklabels()[idx].set_color("grey")
ax.set_title('电费/服务费分析(%s - %s)' % (FROM_DATE, TO_DATE))
ax.legend()
plt.savefig(os.path.join(output_dir, '21.总金额-电费-服务费.png'), dpi=200, bbox_inches='tight')
plt.show()

# 实收/权益抵扣
df_groups = groups[[
    "订单总金额(元)", 
    "实收金额",
    "点数卡抵扣金额（元）", 
    ]].sum()

# 补空缺日期
idx = pd.Index(pd_date_range, name="充电完成日期")
df = df_groups.set_index("充电完成日期").reindex(idx, fill_value=0).reset_index()

_, ax = plt.subplots(figsize=(20, 4))
ax.plot(df["充电完成日期"], df["订单总金额(元)"], label='订单总金额(元)')
ax.plot(df["充电完成日期"], df["实收金额"], label='实收金额(元)')
ax.plot(df["充电完成日期"], df["点数卡抵扣金额（元）"], label='点数卡抵扣金额(元)')
ax.set_xlabel("充电完成日期")
ax.set_ylabel("金额(元)")
ax.set_xticks(pd_date_range)
for idx in weekend_index:
    ax.get_xticklabels()[idx].set_color("grey")
ax.set_title('实收/权益抵扣分析(%s - %s)' % (FROM_DATE, TO_DATE))
ax.legend()
plt.savefig(os.path.join(output_dir, '22.总金额-实收-抵扣.png'), dpi=200, bbox_inches='tight')
plt.show()

# 度电/抵扣点数
df_groups = groups[[
    "订单电量(度)", 
    "点数卡抵扣点数",
    ]].sum()

# 补空缺日期
idx = pd.Index(pd_date_range, name="充电完成日期")
df = df_groups.set_index("充电完成日期").reindex(idx, fill_value=0).reset_index()

_, ax = plt.subplots(figsize=(20, 4))
ax.plot(df["充电完成日期"], df["订单电量(度)"], label='订单电量(度)')
ax.plot(df["充电完成日期"], df["点数卡抵扣点数"], label='点数卡抵扣点数(度)')
ax.set_xlabel("充电完成日期")
ax.set_ylabel("电量(度)")
ax.set_xticks(pd_date_range)
for idx in weekend_index:
    ax.get_xticklabels()[idx].set_color("grey")
ax.set_title('总度数/权益抵扣分析(%s - %s)' % (FROM_DATE, TO_DATE))
ax.legend(loc='best')
plt.savefig(os.path.join(output_dir, '23.总度数-权益抵扣.png'), dpi=200, bbox_inches='tight')
plt.show()

In [None]:
# 按【运营商名称】、【充电完成日期】分组统计
groups = df_target.groupby(["运营商名称", "充电完成日期"], as_index=False)

# 运营商使用倾向
df_groups = groups[[
    "订单电量(度)", 
    # "点数卡抵扣点数",
    ]].sum()

# 补空缺日期
mux = pd.MultiIndex.from_product([df_groups["运营商名称"].unique(), pd_date_range], names=["运营商名称", "充电完成日期"])
df = df_groups.set_index(["运营商名称", "充电完成日期"]).reindex(mux, fill_value=0).reset_index()

_, ax = plt.subplots(figsize=(20, 4))
tmp = np.zeros(shape=((TO_DATE - FROM_DATE).days + 1))
for k in df_groups["运营商名称"].unique():
    d = df[df["运营商名称"] == k]
    ax.bar(d["充电完成日期"], d["订单电量(度)"], label=k, bottom=tmp, color=[get_color_style(k)])
    tmp += d["订单电量(度)"].to_numpy()
del tmp
ax.set_xlabel('充电完成日期')
ax.set_ylabel('订单电量(度)')
ax.set_xticks(pd_date_range)

for idx in weekend_index:
    ax.get_xticklabels()[idx].set_color("grey")

ax.set_title('用户充电偏好分析（运营商）(%s - %s)' % (FROM_DATE, TO_DATE))
ax.legend(loc='best')
plt.savefig(os.path.join(output_dir, '31.用户充电偏好（运营商）.png'), dpi=200, bbox_inches='tight')
plt.show()

In [None]:
from pptx import Presentation
from pptx.chart.data import CategoryChartData
from pptx.enum.shapes import MSO_AUTO_SHAPE_TYPE
from pptx.enum.shapes import MSO_CONNECTOR_TYPE
from pptx.enum.chart import XL_CHART_TYPE
from pptx.util import Inches,Cm,Pt
from pptx.enum.text import PP_PARAGRAPH_ALIGNMENT

from pptx.dml.color import RGBColor

prs = Presentation()
slide_1 = prs.slides.add_slide(prs.slide_layouts[0])

#add_chart添加图表功能
type = XL_CHART_TYPE.COLUMN_CLUSTERED
x,y,cx,cy = Inches(2),Inches(2),Inches(6),Inches(4.5)
chart_data = CategoryChartData()
chart_data.categories = ['East','West','Midwest']
chart_data.add_series('Series 1',(19.2,21.4,16.7))

slide_1.shapes.add_chart(type,x,y,cx,cy,chart_data)

connector_type = MSO_CONNECTOR_TYPE.ELBOW
connector = slide_1.shapes.add_connector(
    connector_type,Cm(2),Cm(2),Cm(10),Cm(10)
)

#add_picture插入图片功能
image_file = 'picture_1.jpg'
left,top = Cm(0),Cm(0)
width,height = Cm(25.4),Cm(19.0)
picture = slide_1.shapes.add_picture(image_file,left,top,width,height)

#增加一个auto shape
slide_1.shapes.add_shape(
   MSO_AUTO_SHAPE_TYPE.ROUNDED_RECTANGLE,
    Cm(2),Cm(2),Cm(2),Cm(2)
)
# 长度单位换算
length = Inches(1)
print(length)
print(length.inches,length.cm,length.pt)
fill = slide_1.shapes[2].fill
fill.solid()#实心填充
fill.fore_color.rgb = RGBColor(255,0,0)
for shape in slide_1.shapes:
    print(shape.is_placeholder)
print(slide_1.placeholders[0].name)
print(slide_1.shapes.title)
subtitle = slide_1.placeholders[1].placeholder_format.type
# 添加文本
text_frame = slide_1.shapes[0].text_frame
text_frame.clear()
text_frame.paragraphs[0].text = 'hello_world'#paragraphs是一个truple,现在没有元素，填写第一个元素，所有要用0
text_frame.add_paragraph().text = 'hello_hangzhou'#add_paragraph是text_frame的，填完就是paragraph的第2个元素
text_frame.paragraphs[0].add_run().text = 'fuck the world'
print(text_frame.paragraphs[1].text)
print(text_frame.paragraphs[0].text)

ppt_title = slide_1.shapes[0].text_frame
sub_title = slide_1.shapes[1].text_frame
ppt_title.clear()
sub_title.clear()
ppt_title.paragraphs[0].text = '办公自动化'
sub_title.paragraphs[0].text = 'python-pptx使用详解'

#段落编辑
ppt_title.paragraphs[0].alignment = PP_PARAGRAPH_ALIGNMENT.LEFT
sub_title.paragraphs[0].alignment = PP_PARAGRAPH_ALIGNMENT.CENTER

#字体编辑
ppt_title.paragraphs[0].font.name = '微软雅黑'
ppt_title.paragraphs[0].font.size = Pt(70)
ppt_title.paragraphs[0].font.bold = True
ppt_title.paragraphs[0].font.italic = True
ppt_title.paragraphs[0].font.color.rgb = RGBColor(255,0,0)

sub_title.paragraphs[0].font.name = '微软雅黑'
sub_title.paragraphs[0].font.size = Pt(40)
sub_title.paragraphs[0].font.bold = False
sub_title.paragraphs[0].font.italic = False
sub_title.paragraphs[0].font.color.rgb = RGBColor(255,255,0)
#保存ppt
prs.save(r'd:\hello_ppt.pptx')


In [None]:
from pptx import Presentation

prs = Presentation()
slide_1 = prs.slides.add_slide(prs.slide_layouts[0])

prs.save(r'd:\hello.pptx')