In [3]:
import pandas as pd
from pandas.api.types import CategoricalDtype  # 用于DataFrame排序用。
import numpy as np

import time
from dateutil.relativedelta import relativedelta
import datetime
import calendar
from dateutil import rrule


class PandasAnalysis(object):
    # __doc__内容
    """
    作者：1979令狐冲
    E-mail：klmtldh@163.com

    pandas数据分析类
    类名称：pandasAnalysis
    函数：
        __init__(self,df)
        get_df(self) 返回类型pd.DataFrame
        cleaning(self,df,blank=None) 类返回型pd.DataFrame
        sort_list(self,obj,list_sort,column=None) 返回类型pd.DataFrame或类型pd.Series
        split_record(self,df: pd.DataFrame, sp_cloc: str, sign: str)返回类型pd.DataFrame
        plus_record(self,dfp: pd.DataFrame, pr_cloc: str, sign: str)返回类型pd.DataFrame
        split_period(self,df, start, end, interval)返回类型pd.DataFrame
        plus_period(self,dfp: pd.DataFrame, s_date,e_date,pr_cloc: str, sign: str)返回类型pd.DataFrame

    使用方法：
        1.实例化
        import pandas as pd
        import pandasAnalysis as pa
        df=pd.DataFrame(
                        [
                            ['A','B','C',pd.to_datetime('2020-12-31'),pd.to_datetime('2021-1-8'),'完成情况1'],
                            ['D','E','F',pd.to_datetime('2021-2-26'),pd.to_datetime('2021-3-7'),'完成情况2'],
                            ['C','E','F',pd.to_datetime('2021-1-20'),pd.to_datetime('2022-3-8'),'完成情况3'],
                        ],columns=['编号','责任人','关键任务','开始日期','结束日期','完成情况'])
        df_pa=pa.pandasAnalysis(df)
        2.get_df(self)函数
        获取df值，封装内部数据

    """

    def __init__(self, df, df_sort_list=None):
        self.df = df
        self.df_sort_list = df_sort_list
        self.df_cleaning = None


    # 数据清洗，主要删除重复值，删除缺失值，删除空格
    def cleaning(self, df=None, blank=None):
        if df is None:
            df=self.df
        else:
            pass
        # 删除重复值
        df.drop_duplicates(inplace=True)
        # 删除缺失值
        df.dropna(axis=0, how='all', inplace=True)
        # 删除空格
        if blank == 'both':
            df = df.applymap((lambda x: str.strip(x) if isinstance(x, str) else x))
        elif blank == 'all':
            df = df.applymap((lambda x: "".join(x.split()) if isinstance(x, str) else x))
        else:
            pass
        self.df_cleaning=df
        return df

    # 按照指定列及list排序，可以实现Series和DataFrame,也可以实现排序list少数排序列的值。
    @staticmethod
    def sort_list(obj, list_sort, column=None):
        object_=''
        if isinstance(obj, pd.Series):
            df = pd.DataFrame(obj)
            df = df.reset_index()
            df.columns = ['index', 'values']
            df1 = df[df['index'].isin(list_sort)].copy()
            df2 = df[~df['index'].isin(list_sort)].copy()
            df1['index'] = df1['index'].astype('category')
            df1['index'].cat.set_categories(list_sort, inplace=True)
            df1.sort_values('index', ascending=True, inplace=True)
            df = pd.concat([df1, df2])
            object_ = pd.Series(df['values'].values, index=df['index'])
        elif isinstance(obj, pd.DataFrame):
            for i in range(len(column)):
                list_sort[i] = list_sort[i] + list(set(obj[column[i]]).difference(set(list_sort[i])))
            for i in range(len(column)):
                cat_order = CategoricalDtype(
                    list_sort[i],
                    ordered=True
                )
                obj[column[i]] = obj[column[i]].astype(cat_order)
            object_ = obj.sort_values(column, axis=0, ascending=[True] * (len(column)))
        return object_

    def split_record(self, sp_cloc: str, sign: str, df=None):
        if df is None:
            df = self.df
        else:
            pass
        # 记录原始dataframe的长度，并作为后面追加数据的基准
        df.reset_index(inplace=True, drop=True)
        y = len(df)
        # 记录原始dataframe的长度
        len_df = len(df)
        for i in range(len(df)):
            # 将需要分列的列数据按照sign分列出来
            # print(i,df.loc[i, sp_cloc])
            list_str = str.split(df.loc[i, sp_cloc], sign)
            # 依据分列出来数据增加行
            for j in range(len(list_str)):
                y = y + j
                # 复制i行数据
                df.loc[y] = df.loc[i]
                # 将y行数据sp_cloc列数据设置为list_str中数据
                df.loc[y, sp_cloc] = list_str[j]
            y = y + 1
        # 取出新增加数据
        df = df.tail(len(df) - len_df)
        # 重置index
        df = df.reset_index(drop=True)
        return df

    # 将某列以外都相同的行以','进行合并
    def plus_record(self, dfp: pd.DataFrame, pr_cloc: str, sign: str):
        # 获取dfp的列名
        list_cl = list(dfp)
        # 去除需要合并列的列名
        list_cl.remove(pr_cloc)
        # 按照去除合并列名进行排序
        dfp.sort_values(list_cl, inplace=True)
        # 按照排序后行，重新设置index
        dfp = dfp.reset_index(drop=True)
        # 复制dfp，用以处理
        dfpc = dfp.copy()
        # 去除合并列
        dfpc.drop([pr_cloc], axis=1, inplace=True)
        # 进行查重处理
        list_dp = dfpc.duplicated()
        # 查找重复分界点
        x = list_dp[list_dp.isin([False])].index
        # 因为没有找到index插入的方法，将分界点index转为list
        list_x = []
        for q in range(len(x)):
            list_x.append(x[q])
        # 主要用于加入最后一条记录index
        list_x.append(len(dfp))

        # print(list_x)
        # x.append(int64(len(dfp)))
        yn = []
        # 循环获取重复记录段数据
        for i in range(len(list_x) - 1):
            # 判断是否有需要合并项
            if (list_x[i + 1] - list_x[i]) > 1:
                # 若有序号间隔大于1，则进入循环
                for j in range(list_x[i + 1] - list_x[i]):
                    # 取出需要合并数据，形成list
                    yn.append(dfp.loc[list_x[i] + j, pr_cloc])
                # 将list合并成以sign为分隔字符串。
                y = sign.join(yn)
                # 将字符串赋给dfp第一列
                dfp.loc[list_x[i], pr_cloc] = y
                # 删除多余项目
                for k in range(list_x[i + 1] - list_x[i] - 1):
                    dfp.drop(list_x[i] + 1 + k, axis=0, inplace=True)
                # 清空记录list
                yn = []
        # 重置index
        dfp = dfp.reset_index(drop=True)

        return dfp

    def get_n_day(self, date_time, n=1, m=0):
        # this_month_start = datetime.datetime(self.date_time.year, self.date_time.month, 1)
        this_month_nday = datetime.datetime(date_time.year, date_time.month, n)  # +datetime.timedelta(days=n)
        this_month_end = datetime.datetime(date_time.year, date_time.month,
                                           calendar.monthrange(date_time.year, date_time.month)[1])
        # n_month_start=this_month_start +relativedelta(months=m)
        n_month_end = this_month_end + relativedelta(months=m)
        n_month_nday = this_month_nday + relativedelta(months=m)
        return n_month_nday, n_month_end

    def get_current_week(self, date_time, n=1, w=0):
        monday, sunday = date_time, date_time
        one_day = datetime.timedelta(days=1)
        while monday.weekday() != 0:
            monday -= one_day
        while sunday.weekday() != 6:
            sunday += one_day
        # 返回当前的星期一和星期天的日期
        week_n = monday + datetime.timedelta(days=n)
        n_week_end = sunday + relativedelta(weeks=w)
        n_week_nday = week_n + relativedelta(weeks=w)
        return n_week_nday, n_week_end

    def split_period(self, df, start, end, interval):
        # 重置df.index，保证后面编号不覆盖。
        df = df.reset_index(drop=True)
        # 确定有几行
        df_scope = len(df)
        # 遍历所有行
        for i in range(df_scope):
            # 判断时间行是时间类型
            if isinstance(df[start][i], datetime.datetime) and isinstance(df[end][i], datetime.datetime):
                # 将开始时间行赋值给变量d_start;结束时间赋值给d_end。
                d_start = df[start][i]
                d_end = df[end][i]
                # 判断间隔值是m——月；w——月；d——日
                if interval == 'm':
                    delta = rrule.rrule(rrule.MONTHLY, dtstart=d_start, until=d_end).count()
                    # 解决跨月问题只要月份不同就判定为跨月
                    if self.get_n_day(df[start][i], m=delta - 1)[1] < df[end][i]:
                        delta = delta + 1
                    loop_delta = 0
                    if delta > 1:
                        loop_delta = delta
                        for j in range(loop_delta):
                            df_scope = j + df_scope
                            df.loc[df_scope] = df.loc[i]
                            # this_month_start,this_month_end = get_n_day(df[start][df_scope])
                            if j == 0:
                                df.loc[df_scope, end] = self.get_n_day(df[start][df_scope])[1]
                            elif j == loop_delta - 1:
                                df.loc[df_scope, start] = self.get_n_day(df[start][df_scope], m=j)[0]
                            else:
                                df.loc[df_scope, start], df.loc[df_scope, end] = self.get_n_day(df[start][df_scope],
                                                                                                m=j)
                        df.drop(index=[i], inplace=True)
                    df_scope = df_scope + 1
                # 判断间隔值是m——月；w——周；d——日
                if interval == 'w':
                    delta = rrule.rrule(rrule.WEEKLY, dtstart=d_start, until=d_end).count()
                    # 解决跨周问题,关键点是开始日期所在周推delta个周后的周末是否小于end日期
                    if self.get_current_week(df[start][i], w=delta - 1)[1] < df[end][i]:
                        delta = delta + 1
                    else:
                        pass

                    loop_delta = 0
                    if delta > 1:
                        loop_delta = delta
                        for j in range(loop_delta):
                            df_scope = j + df_scope
                            df.loc[df_scope] = df.loc[i]
                            if j == 0:
                                df.loc[df_scope, end] = self.get_current_week(df[start][df_scope])[1]
                            elif j == loop_delta - 1:
                                df.loc[df_scope, start] = self.get_current_week(df[start][df_scope], w=j)[0]


                            else:
                                df.loc[df_scope, start], df.loc[df_scope, end] = self.get_current_week(
                                    df[start][df_scope], w=j)

                        df.drop(index=[i], inplace=True)
                    df_scope = df_scope + 1
                if interval == 'd':
                    delta = rrule.rrule(rrule.DAILY, dtstart=d_start, until=d_end).count()
                    loop_delta = 0
                    if delta > 1:
                        loop_delta = delta
                        for j in range(loop_delta):
                            df_scope = j + df_scope
                            df.loc[df_scope] = df.loc[i]
                            df.loc[df_scope, start] = d_start + relativedelta(days=+j)
                            df.loc[df_scope, end] = d_start + relativedelta(days=+j)
                        df.drop(index=[i], inplace=True)
                    df_scope = df_scope + 1
        df.reset_index(inplace=True, drop=True)
        return df

    # 将某列以外都相同的行,按照时间','进行合并
    def plus_period(self, dfp: pd.DataFrame, s_date, e_date, pr_cloc: str, sign: str):
        # 获取dfp的列名
        list_cl = list(dfp)
        # 去除需要合并列的列名
        list_cl.remove(s_date)
        list_cl.remove(e_date)
        list_cl.remove(pr_cloc)
        # 按照去除合并列名进行排序
        dfp.sort_values(list_cl, inplace=True)
        # 按照排序后行，重新设置index
        dfp = dfp.reset_index(drop=True)
        # 复制dfp，用以处理
        dfpc = dfp.copy()
        # 去除合并列
        dfpc.drop([s_date, e_date, pr_cloc], axis=1, inplace=True)
        # 进行查重处理
        list_dp = dfpc.duplicated()
        # 查找重复分界点
        x = list_dp[list_dp.isin([False])].index
        # 因为没有找到index插入的方法，将分界点index转为list
        list_x = []
        for q in range(len(x)):
            list_x.append(x[q])
        # 主要用于加入最后一条记录index
        list_x.append(len(dfp))

        # print(list_x)
        # x.append(int64(len(dfp)))
        yn = []
        # 循环获取重复记录段数据
        for i in range(len(list_x) - 1):
            # 判断是否有需要合并项
            if (list_x[i + 1] - list_x[i]) > 1:
                # 若有序号间隔大于1，则进入循环

                for j in range(list_x[i + 1] - list_x[i]):
                    # 取出需要合并数据，形成list
                    yn.append(dfp.loc[list_x[i] + j, pr_cloc])
                # 将list合并成以sign为分隔字符串。
                y = sign.join(yn)
                # 排序
                dfp_d = dfp.loc[list_x[i]:list_x[i + 1] - 1, :].copy()
                dfp_d.sort_values(s_date, inplace=True)
                dfp_d.reset_index(drop=True, inplace=True)
                # 将字符串赋给dfp第一列
                dfp.loc[list_x[i], pr_cloc] = y
                dfp.loc[list_x[i], s_date] = dfp_d.loc[0, s_date]
                dfp.loc[list_x[i], e_date] = dfp_d.loc[list_x[i + 1] - list_x[i] - 1, e_date]

                # 删除多余项目
                for k in range(list_x[i + 1] - list_x[i] - 1):
                    dfp.drop(list_x[i] + 1 + k, axis=0, inplace=True)
                # 清空记录list
                yn = []
        # 重置index
        dfp = dfp.reset_index(drop=True)

        return dfp

    # 将pandas数据装换为文本
    def pandas_text(self, obj, drop_list=None, index_name=None, unit='项', punctuation=[',', '。']):
        tx = ''
        if isinstance(obj, pd.Series):
            text = ''
            for i in range(len(obj)):
                if i != len(obj) - 1:
                    text += str(obj.index[i]) + ' ' + str(obj[i]) + unit + punctuation[0]
                else:
                    text += str(obj.index[i]) + ' ' + str(obj[i]) + unit + punctuation[1]
            text = str(obj.name) + '：' + text
            tx = text

        if isinstance(obj, pd.DataFrame):
            text_list = []
            if index_name is None:

                for column in df.iteritems():
                    if column[0] not in drop_list:
                        text = ''
                        for i in range(len(column[1])):
                            if i != len(column[1]) - 1:
                                text += str(column[1].index[i]) + ' ' + str(column[1][i]) + unit + punctuation[0]
                            else:
                                text += str(column[1].index[i]) + ' ' + str(column[1][i]) + unit + punctuation[1]
                            # print('列名'+column[0],'\n',column[1])
                        text = str(column[1].name) + '：' + text
                        text_list.append(text)
            else:
                df.set_index(index_name, drop=True, inplace=True)
                for column in df.iteritems():
                    if column[0] not in drop_list:
                        text = ''
                        for i in range(len(column[1])):
                            if i != len(column[1]) - 1:
                                text += str(column[1].index[i]) + ' ' + str(column[1][i]) + unit + punctuation[0]
                            else:
                                text += str(column[1].index[i]) + ' ' + str(column[1][i]) + unit + punctuation[1]
                            # print('列名'+column[0],'\n',column[1])
                        text = str(column[1].name) + '：' + text
                        text_list.append(text)

            tx = text_list
        return tx

    # 获取DataFrame中null空缺值的个数，返回列表和文字；df为DataFrame，all默认值为1列出全部项，为0时只列出有null值的项。
    def null_item(self,df, all=1):
        null_ = df.isnull().sum()
        null_.sort_values(ascending=False, inplace=True)
        null_.name = '空缺值'
        if all:
            null_text = self.pandas_text(null_, '项')
        else:
            null_ = null_[null_.values != 0].copy()
            null_text = self.pandas_text(null_, '项', punctuation=[',', ',']) + '其余数据完整。'
        return null_, null_text


if __name__ == '__main__':
    #print(PandasAnalysis.__doc__)
    df = pd.DataFrame(
        [
            ['A', '孔令、刘 媛媛 ', 'C', pd.to_datetime('2020-12-31'), pd.to_datetime('2021-1-8'), '完成情况1'],
            ['D', '李 黎、李进 、昆 明', 'F', pd.to_datetime('2021-2-26'), pd.to_datetime('2021-3-7'), '完成情况2'],
            ['D', '王 玺', '1', pd.to_datetime('2021-1-20'), pd.to_datetime('2022-3-8'), '完成情况3'],
        ], columns=['编号', '责任人', '关键任务', '开始日期', '结束日期', '完成情况'])

    pa = PandasAnalysis(df)
    pa.cleaning(blank='all')
    print(pa.df)
    print(pa.df_cleaning)
    colum=['编号','责任人','关键任务']
    list_sort=[['D','A'],['李黎','王玺'],['F']]
    print(PandasAnalysis.sort_list(pa.df_cleaning,list_sort,colum))

  编号          责任人 关键任务       开始日期       结束日期   完成情况
0  A     孔令、刘 媛媛     C 2020-12-31 2021-01-08  完成情况1
1  D  李 黎、李进 、昆 明    F 2021-02-26 2021-03-07  完成情况2
2  D          王 玺    1 2021-01-20 2022-03-08  完成情况3
  编号       责任人 关键任务       开始日期       结束日期   完成情况
0  A    孔令、刘媛媛    C 2020-12-31 2021-01-08  完成情况1
1  D  李黎、李进、昆明    F 2021-02-26 2021-03-07  完成情况2
2  D        王玺    1 2021-01-20 2022-03-08  完成情况3
  编号       责任人 关键任务       开始日期       结束日期   完成情况
2  D        王玺    1 2021-01-20 2022-03-08  完成情况3
1  D  李黎、李进、昆明    F 2021-02-26 2021-03-07  完成情况2
0  A    孔令、刘媛媛    C 2020-12-31 2021-01-08  完成情况1


In [12]:
import pandas as pd
from pandas.api.types import CategoricalDtype
def sort_df(df,column,list_sort):
    
    for i in range(len(column)):
        #df_set.append(set(df[colum[i]]))
        list_sort[i] =list_sort[i] + list(set(df[colum[i]]).difference(set(list_sort[i])))
    print(list_sort)
    for i in range(len(column)):
        cat_order = CategoricalDtype(
            list_sort[i],
            ordered=True
        )

        df[column[i]] = df[column[i]].astype(cat_order)

    df = df.sort_values(column, axis=0, ascending=[True]*(len(column)))

    df.reset_index(drop=True, inplace=True)
    return df
df = pd.DataFrame(
    [
        ['A', '孔令、刘 媛媛 ', 'C', pd.to_datetime('2020-12-31'), pd.to_datetime('2021-1-8'), '完成情况1'],
        ['D', '李黎、李进 、昆 明', 'F', pd.to_datetime('2021-2-26'), pd.to_datetime('2021-3-7'), '完成情况2'],
        ['C', '王玺', '1', pd.to_datetime('2021-1-20'), pd.to_datetime('2022-3-8'), '完成情况3'],
    ], columns=['编号', '责任人', '关键任务', '开始日期', '结束日期', '完成情况'])
colum=['编号','责任人','关键任务']
list_sort=[['D','A'],['李黎','王玺'],['F']]
sort_df(df,colum,list_sort)


[['D', 'A', 'C'], ['李黎', '王玺', '李黎、李进 、昆 明', '孔令、刘 媛媛 '], ['F', '1', 'C']]


Unnamed: 0,编号,责任人,关键任务,开始日期,结束日期,完成情况
0,D,李黎、李进 、昆 明,F,2021-02-26,2021-03-07,完成情况2
1,A,孔令、刘 媛媛,C,2020-12-31,2021-01-08,完成情况1
2,C,王玺,1,2021-01-20,2022-03-08,完成情况3


In [33]:
import pandas as pd
from pandas.api.types import CategoricalDtype
def sort_df(df,column,list_sort):
    
    for i in range(len(column)):
        #df_set.append(set(df[colum[i]]))
        list_sort[i] =list_sort[i] + list(set(df[colum[i]]).difference(set(list_sort[i])))
    print(list_sort)
    for i in range(len(column)):
        cat_order = CategoricalDtype(
            list_sort[i],
            ordered=True
        )

        df[column[i]] = df[column[i]].astype(cat_order)

    df = df.sort_values(column, axis=0, ascending=[True]*(len(column)))

    df.reset_index(drop=True, inplace=True)
    return df
if __name__ == '__main__':
    colum=['科室','工作岗位']
    list_sort=[['部门领导','安全监察科','应急与保供电管理科','安全督查大队'],
               ['经理','副经理','主管','安全监察专责(A)','安全监察专责(B)',
                '应急管理专责','风险体系管理专责(A)','风险体系管理专责(B)',
               '班长','安全监察员']]
    ef=pd.ExcelFile(r'D:\JGY\600-Data\002-in输入文件\02-work工作\03-document工作文档\001-组织\昆明供电局安监部人员信息.xlsx',engine='openpyxl')
    df=ef.parse('昆明供电局安监部人员信息')
    df1=sort_df(df,colum,list_sort)
    df1.to_excel(r'D:\JGY\600-Data\002-in输入文件\02-work工作\03-document工作文档\001-组织\昆明供电局安监部人员信息.xlsx',
                 engine='openpyxl',
                 index=False,
                 sheet_name='昆明供电局安监部人员信息')
    

[['部门领导', '安全监察科', '应急与保供电管理科', '安全督查大队'], ['经理', '副经理', '主管', '安全监察专责(A)', '安全监察专责(B)', '应急管理专责', '风险体系管理专责(A)', '风险体系管理专责(B)', '班长', '安全监察员']]


In [12]:
import pandas as pd
import pptx
#from pptx import Presentation
from pptx.enum.shapes import MSO_SHAPE
from pptx.util import Inches,Centipoints,Cm,Emu,Mm,Pt
from pptx.enum.dml import MSO_THEME_COLOR
from pptx.dml.color import ColorFormat,RGBColor
from pptx.enum.text import PP_ALIGN

from pptx.chart.data import ChartData,CategoryChartData
from pptx.enum.chart import XL_CHART_TYPE

from pptx.enum.chart import XL_LABEL_POSITION
from pptx.enum.chart import XL_LEGEND_POSITION


class OasPptx(object):
    '''
    p_slide_layout=1
    若指定了PPT文件，p_slide_layout的数字为指定PPT中第n页的布局。
    pptx的文档结构是pptx->slide->shape(text frame)->paragraph->run(一段话中的任意部分)，所以程序处理也差不多按着这个顺序来。
    Presentation pptx文档
    slide 幻灯片——layout 布局
        auto shape 图形(shapes)
            title 标题
            placeholders 占位符
                text_frame 框架
                    paragraph 段落
            textbox 文本框
        picture 图片
        graphic frame 图形框
        group shape 组合图形
        line/connector 线/连接器
        content part 内容部分


        @author: Kongling

    '''

    def __init__(self,
                 p_path=None,
                 p_slide_layout=6,
                 f_name='微软雅黑',
                 p_height=6858000, p_width=12192000
                 ):
        self.p_path = p_path
        self.p_slide_layout = p_slide_layout
        self.f_name = f_name
        # 实例化Presentation为prs
        self.prs = pptx.Presentation(p_path)
        # 设置slide幻灯片layout布局，并赋予slide_layout
        self.slide_layout = self.prs.slide_layouts[self.p_slide_layout]
        # 设置PPT为16：9的宽屏
        self.prs.slide_height = p_height  # 设置ppt的高度
        self.prs.slide_width = p_width  # 设置ppt的宽度
        # 添加页面
        self.slide = self.prs.slides.add_slide(self.slide_layout)
        # 添加形状
        self.shapes = self.slide.shapes

    def alter_csg_logo_first_page(self, text_list):
        text_frame = self.prs.slides[0].shapes[0].text_frame
        # text_frame.clear()
        p = text_frame.paragraphs[0]
        p.text = text_list[0]
        p.font.bold = True
        # 设置字体，一旦有中文就不正常，英文还好
        p.font.name = '微软雅黑'
        # 设置字体大小
        p.font.size = Pt(44)
        # 设置颜色
        p.font.color.rgb = RGBColor(0, 54, 122)

        text_frame = self.prs.slides[0].shapes[1].text_frame
        p = text_frame.paragraphs[0]
        p.text = text_list[1]
        p.font.bold = True
        # 设置字体，一旦有中文就不正常，英文还好
        p.font.name = '微软雅黑'
        # 设置字体大小
        p.font.size = Pt(24)
        # 设置颜色
        p.font.color.rgb = RGBColor(0, 0, 0)

        text_frame = self.prs.slides[0].shapes[2].text_frame
        p = text_frame.paragraphs[0]
        p.text = text_list[2]
        p.font.bold = True
        # 设置字体，一旦有中文就不正常，英文还好
        p.font.name = '微软雅黑'
        # 设置字体大小
        p.font.size = Pt(24)
        # 设置颜色
        p.font.color.rgb = RGBColor(0, 0, 0)

    def page(self):
        # 添加slide幻灯片，并赋予slide
        self.slide = self.prs.slides.add_slide(self.slide_layout)
        self.shapes = self.slide.shapes

    def line(self):
        slide = 0

    def text(self, text, font_name=None, font_bold=False, alig='left', font_size=22, font_color=(0, 0, 0),
             textbox_left=2, textbox_top=2, textbox_width=90, textbox_height=20):
        textbox_left = int(0.01 * textbox_left * self.prs.slide_width)
        textbox_top = int(0.01 * textbox_top * self.prs.slide_height)
        textbox_width = int(0.01 * textbox_width * self.prs.slide_width)
        textbox_height = int(0.01 * textbox_height * self.prs.slide_height)
        if font_name == None:
            font_name = self.f_name
        # 在指定位置插入一个文本框，我按比例填的。
        tBox = self.slide.shapes.add_textbox(left=textbox_left,
                                             top=textbox_top,
                                             width=textbox_width,
                                             height=textbox_height)
        # 格式化为文本格式
        tf = tBox.text_frame
        tf.word_wrap = True
        # 插入段落
        p = tf.add_paragraph()
        # 设置对齐方式
        if alig == 'left':
            p.alignment = PP_ALIGN.LEFT
        elif alig == 'center':
            p.alignment = PP_ALIGN.CENTER

        p.level = 1
        # 设置粗体
        p.font.bold = font_bold
        # 设置字体，一旦有中文就不正常，英文还好
        p.font.name = font_name
        # 设置字体大小
        p.font.size = Pt(font_size)
        # 设置颜色
        p.font.color.rgb = RGBColor(font_color[0], font_color[1], font_color[2])
        # 设置文本内容
        p.text = text

    def text_n(self, text, font_name=None, font_bold=False, alig='left', font_size=22, font_color=(0, 0, 0),
               textbox_left=2, textbox_top=2, textbox_width=90, textbox_height=20):
        textbox_left = int(0.01 * textbox_left * self.prs.slide_width)
        textbox_top = int(0.01 * textbox_top * self.prs.slide_height)
        textbox_width = int(0.01 * textbox_width * self.prs.slide_width)
        textbox_height = int(0.01 * textbox_height * self.prs.slide_height)
        if font_name == None:
            font_name = self.f_name
        # 在指定位置插入一个文本框，我按比例填的。
        tBox = self.slide.shapes.add_textbox(left=textbox_left,
                                             top=textbox_top,
                                             width=textbox_width,
                                             height=textbox_height)
        # 格式化为文本格式
        tf = tBox.text_frame
        tf.word_wrap = True
        if isinstance(text, str):
            # 插入段落
            p = tf.add_paragraph()
            # 设置粗体
            p.font.bold = font_bold
            # 设置字体，一旦有中文就不正常，英文还好
            p.font.name = font_name
            # 设置字体大小
            p.font.size = Pt(font_size)
            # 设置颜色
            p.font.color.rgb = RGBColor(font_color[0], font_color[1], font_color[2])
            # 设置文本内容
            p.text = '       ' + text
        elif isinstance(text, list):
            for i in range(len(text)):
                # 插入段落
                p = tf.add_paragraph()
                # 设置粗体
                p.font.bold = font_bold
                # 设置字体，一旦有中文就不正常，英文还好
                p.font.name = font_name
                # 设置字体大小
                p.font.size = Pt(font_size)
                # 设置颜色
                p.font.color.rgb = RGBColor(font_color[0], font_color[1], font_color[2])
                # 设置文本内容
                p.text = '       ' + text[i]

    def pic(self, img_path, left=0, top=0, width=None, height=None):
        # 全屏插入一张图片，图片最好提前处理长宽比，因为是不锁比例拉伸。
        pic = self.slide.shapes.add_picture(img_path, left=left, top=top, width=width, height=height)
        # 将刚插入的图片至于底层
        # self.slide.shapes._spTree.insert(1, pic._element)

    # 插入表格，单位都是页面的百分比，1表示
    def p_table(self, df, left=2, top=25, width=95, height=60):
        rows = df.shape[0]+1
        cols = df.shape[1]
        left = int(0.01 * left * self.prs.slide_width)
        top = int(0.01 * top * self.prs.slide_height)
        width = int(0.01 * width * self.prs.slide_width)
        height = int(0.01 * height * self.prs.slide_height)
        table = self.slide.shapes.add_table(rows, cols, left, top, width, height).table

        # 获取表头列
        head = list(df)
        # 计算表格有效列宽,页面宽度90%计算平均值
        columns_width = int(width / len(head))
        # 调整表格宽度
        for i in range(len(head)):
            table.columns[i].width = columns_width
        for i in range(rows):
            if i == 0:
                for j in range(cols):
                    table.cell(i, j).text = str(head[j])
            else:
                for j in range(cols):
                    table.cell(i, j).text = str(df.iloc[i-1, j])

        return table

    def chart(self, df, chart_class='bar', left=2, top=25, width=95, height=60):
        # 获取第一例的列名，作为categories
        categories = df.iloc[:, 0].tolist()
        # 获取第二列滞后的数据
        series_name = df.iloc[:, 1:]
        # 将第二列后的数据，按照列转换成系列
        series = series_name.to_dict(orient='series')
        # define chart data ---------------------
        chart_data = ChartData()
        chart_data.categories = categories
        for key in series:
            chart_data.add_series(key, series[key].tolist())

        left = int(0.01 * left * self.prs.slide_width)
        top = int(0.01 * top * self.prs.slide_height)
        width = int(0.01 * width * self.prs.slide_width)
        height = int(0.01 * height * self.prs.slide_height)
        if chart_class == 'bar':
            graphic_frame = self.slide.shapes.add_chart(
                XL_CHART_TYPE.COLUMN_CLUSTERED, left, top, width, height, chart_data
            )
        elif chart_class == 'pie':            
            graphic_frame = self.slide.shapes.add_chart(
                XL_CHART_TYPE.PIE, left, top, width, height, chart_data
            )
        elif chart_class == 'line':
            graphic_frame = self.slide.shapes.add_chart(
                XL_CHART_TYPE.LINE, left, top, width, height, chart_data
            )

        chart = graphic_frame.chart
        if chart_class == 'line':
            chart.series[0].smooth = False
        else:
            plot = chart.plots[0]
            plot.has_data_labels = True
            data_labels = plot.data_labels

            data_labels.font.size = Pt(13)
            data_labels.font.color.rgb = RGBColor(0x0A, 0x42, 0x80)
            data_labels.position = XL_LABEL_POSITION.OUTSIDE_END

        if chart_class == 'pie':
            data_labels.number_format = '0%'

        if chart_class in ['bar', 'line'] and df.shape[1] > 2:
            chart.has_legend = True
            chart.legend.position = XL_LEGEND_POSITION.RIGHT
            chart.legend.include_in_layout = False
        else:
            chart.has_legend = True
            chart.legend.position = XL_LEGEND_POSITION.BOTTOM
            chart.legend.include_in_layout = False

    # 保存为文件
    def save_pptx(self, pptx_name):
        self.prs.save(pptx_name)  # 保存文档

if __name__ == '__main__':
    df = pd.DataFrame(
    [
        ['安监科',1,2,3],
        ['应急科',2,3,5],
        ['安监部', 3,5,8],
    ], columns=['科室', '是', '否', '合计'])
    df_pie = df[['科室','合计']].head(2)
    op = OasPptx()
    op.chart(df_pie,'pie')
    op.save_pptx(r'D:\JGY\600-Data\006-temporary临时文件\text.pptx')
    
df

<pptx.chart.data.ChartData object at 0x00000251C8995250>


Unnamed: 0,科室,是,否,合计
0,安监科,1,2,3
1,应急科,2,3,5
2,安监部,3,5,8


In [59]:
import pandas as pd
df = pd.DataFrame(
    [
        ['安监科、安监部','安监科、应急科','1、、、、、','3'],
        ['应急科','安监科、应急科',3,5],
        ['安监部','安监部、应急科',5,8],
    ], columns=['科室', '是', '否', '合计'])
contains_text = '|'.join(['安监科'])
# # df_plan_sf=df_plan[(df_plan['检查负责人'].isin(df_name_list))|(df_plan['检查人'].str.contains(contains_text)
# df_plan_sf=df[df['是'].str.contains(contains_text)]
df_record=df.copy()
# df1=df['合计'].str.split('、', expand=True)
# print(df1)
# df=pd.merge(df, pd.DataFrame(df['是'].str.split('、',expand =True)),how='left',left_index=True,right_index=True)
# df=pd.merge(df, pd.DataFrame(df['科室'].str.split('、',expand =True)),how='left',left_index=True,right_index=True)
# df=pd.merge(df, pd.DataFrame(df['否'].str.split('、',expand =True)),how='left',left_index=True,right_index=True)
# df=pd.merge(df, pd.DataFrame(df['否'].str.split('、',expand =True)),how='left',left_index=True,right_index=True)

df_count_list=['应急科']
# df2
record_count_columns=['科室', '是']
df_list = []
for item in record_count_columns:
    rccs = df_record[item].str.split('、', expand=True)
    for j in range(rccs.shape[1]):
        df_record1 = df_record[rccs[j].isin(df_count_list)]
        df_list.append(df_record1)
df_record_count = pd.concat(df_list)
df_record_count.drop_duplicates(inplace=True)
df_record_count


Unnamed: 0,科室,是,否,合计
1,应急科,安监科、应急科,3,5
0,安监科、安监部,安监科、应急科,1、、、、、,3
2,安监部,安监部、应急科,5,8


In [None]:
data['Months'] = data['Months'].str.extract(r'(.+?),', expand=True)