In [None]:
import numpy as np
import pandas as pd
import warnings
import datetime as dt
import os

import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
%matplotlib inline

# from pandas.core.common import SettingWithCopyWarning
# warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)

toc_order_file = 'D:\充电桩_战略_公共充电订单(C端)_20230313.xlsx'
FROM_DATE = dt.datetime.strptime("2023-03-01", "%Y-%m-%d")
TO_DATE = dt.datetime.strptime("2023-03-12", "%Y-%m-%d")

output_dir = 'D:\自动生成报表\%s' % dt.date.today()
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

toc_order_column_names = [
    "充电订单号",
    "充电完成状态",
    "用户编码",
    "用户昵称",
    "手机号【加密】",
    "运营商名称",
    "充电站名称",
    "充电枪编号",
    "订单电量(度)",
    "订单总金额(元)",
    "订单电费(元)",
    "订单服务费(元)",
    "实收金额",
    "点数卡券id",
    "点数卡对外展示名称",
    "点数卡名称",
    "点数卡抵扣金额（元）",
    "点数卡抵扣点数",
    "异常代码/原因",
    "订单创建日期",
    "充电完成日期",
    "支付完成日期",
    "订单创建时间",
    "充电完成时间",
    "支付完成时间",
    "运营商id",
    "充电站id",
]

toc_order_data_types = dict.fromkeys(toc_order_column_names, np.str0)
toc_order_data_types["订单电量(度)"] = np.float32
toc_order_data_types["订单总金额(元)"] = np.float32
toc_order_data_types["订单服务费(元)"] = np.float32
toc_order_data_types["订单电费(元)"] = np.float32
toc_order_data_types["实收金额"] = np.float32
toc_order_data_types["点数卡抵扣金额（元）"] = np.float32
toc_order_data_types["点数卡抵扣点数"] = np.float32
toc_order_data_types["订单创建日期"] = np.datetime64
toc_order_data_types["充电完成日期"] = np.datetime64
toc_order_data_types["支付完成日期"] = np.datetime64
toc_order_data_types["订单创建时间"] = np.datetime64
toc_order_data_types["充电完成时间"] = np.datetime64
toc_order_data_types["支付完成时间"] = np.datetime64

In [None]:
# 读取源数据，预处理
df_toc_order = pd.read_excel(toc_order_file, names=toc_order_column_names, dtype=toc_order_data_types, engine='openpyxl')

# nan check
# df_toc_order.isna().any()

# nan字段补零
df_toc_order["实收金额"].fillna(0, inplace=True)
df_toc_order["点数卡抵扣金额（元）"].fillna(0, inplace=True)
df_toc_order["点数卡抵扣点数"].fillna(0, inplace=True)

# 去除电量为零
df_toc_order.drop(index=df_toc_order[df_toc_order["订单电量(度)"] == 0].index, inplace=True)

# df_toc_order.describe()

# 分析数据的时间范围，[FROM_DATE, TO_DATE]
df_target = df_toc_order[(df_toc_order["充电完成日期"] >= FROM_DATE) & (df_toc_order["充电完成日期"] <= TO_DATE)]

In [None]:
# 订单电量及总金额频度分布
df_target[["订单电量(度)", "订单总金额(元)"]].hist(bins=50, figsize=(10, 4))
plt.savefig(os.path.join(output_dir, '11.订单电量、总金额分布.png'), dpi=200, bbox_inches='tight')
plt.show()

In [None]:
# 按【充电完成日期】分组统计
groups = df_target.groupby("充电完成日期", as_index=True)

# 电费/服务费
df_groups = groups[[
    "订单总金额(元)", 
    "订单电费(元)", 
    "订单服务费(元)", 
    ]].sum()
df_groups.plot(kind='line', figsize=(20, 4))
plt.xlabel("订单完成日期")
plt.ylabel("金额(元)")
plt.title('电费/服务费分析(%s - %s)' % (FROM_DATE.date(), TO_DATE.date()))
plt.savefig(os.path.join(output_dir, '21.总金额-电费-服务费.png'), dpi=200, bbox_inches='tight')
plt.show()

# 实收/权益抵扣
df_groups = groups[[
    "订单总金额(元)", 
    "实收金额",
    "点数卡抵扣金额（元）", 
    ]].sum()
df_groups.plot(kind='line', figsize=(20, 4))
plt.xlabel("订单完成日期")
plt.ylabel("金额(元)")
plt.title('实收/权益抵扣分析(%s - %s)' % (FROM_DATE.date(), TO_DATE.date()))
plt.savefig(os.path.join(output_dir, '22.总金额-实收-抵扣.png'), dpi=200, bbox_inches='tight')
plt.show()

# 度电/抵扣点数
df_groups = groups[[
    "订单电量(度)", 
    "点数卡抵扣点数",
    ]].sum()
df_groups.plot(kind='line', figsize=(20, 4))
plt.xlabel("订单完成日期")
plt.ylabel("度电")
plt.title('总度数/权益抵扣分析(%s - %s)' % (FROM_DATE.date(), TO_DATE.date()))
plt.savefig(os.path.join(output_dir, '23.总度数-权益抵扣.png'), dpi=200, bbox_inches='tight')
plt.show()

In [None]:
# 按【运营商名称】、【充电完成日期】分组统计
groups = df_target.groupby(["运营商名称", "充电完成日期"], as_index=False)

# 运营商使用倾向
df_groups = groups[[
    "订单电量(度)", 
    # "点数卡抵扣点数",
    ]].sum()

# 补空缺日期
mux = pd.MultiIndex.from_product([df_groups["运营商名称"].unique(), pd.date_range(start=FROM_DATE, end=TO_DATE, freq='D')], names=["运营商名称", "充电完成日期"])
data = df_groups.set_index(["运营商名称", "充电完成日期"]).reindex(mux, fill_value=0).reset_index()

plt.figure(figsize=(20, 4))
# tmp = pd.DataFrame([0 for i in range(0, (TO_DATE - FROM_DATE).days + 1)], columns=["订单电量(度)"])
tmp = np.zeros(shape=((TO_DATE - FROM_DATE).days + 1))
for k in df_groups["运营商名称"].unique():
    d = data[data["运营商名称"] == k]
    plt.bar(d["充电完成日期"], d["订单电量(度)"], label=k, bottom=tmp)
    tmp += d["订单电量(度)"].to_numpy()
plt.xlabel('充电完成日期')
plt.ylabel('订单电量(度)')
plt.title('用户充电偏好分析（运营商）(%s - %s)' % (FROM_DATE.date(), TO_DATE.date()))
plt.legend(loc='best')
plt.savefig(os.path.join(output_dir, '31.用户充电偏好（运营商）.png'), dpi=200, bbox_inches='tight')
plt.show()