# 分析不合规合同

基本思路: 读取每周的统计数据, 生成一个全量的数据集存入本地 Excel 中, 然后根据全量的数据集统计数据做数据分析. 同时根据最新的 KPI 考核规范改写相关统计逻辑.


In [93]:
from functools import partial
from pathlib import Path
import pandas as pd
# pd.options.display.max_rows = 5


将周统计 Excel 文件中的多个 sheet 映射成内部定义的名称, 实现对周统计 Excel 的解耦.


In [115]:
# config
sheet_names = ['新签续签合同',
               '应终止合同',
               '应结算合同',
               '不规范合同',
               '产园项目清单',
               '所有组织机构',
               ]
name_for_df = ['new',
               'termination',
               'settlement',
               'irregular',
               'projects',
               'org'
               ]

df_sheets = pd.DataFrame({
    'sheet_name': sheet_names,
    'df_name': name_for_df
})

df_sheets


Unnamed: 0,sheet_name,df_name
0,新签续签合同,new
1,应终止合同,termination
2,应结算合同,settlement
3,不规范合同,irregular
4,产园项目清单,projects
5,所有组织机构,org


获取最新的周统计 Excel 文件的句柄.


In [95]:
# get or create 'data' directory
data_dir_name = 'data'
data_dir = Path.cwd() / data_dir_name
if not data_dir.exists():
    data_dir.mkdir()
# get all irregular contracts xlsx files into a array
irregular_contracts_dir = data_dir / 'irregular_contracts'
if not irregular_contracts_dir.exists():
    irregular_contracts_dir.mkdir()
files = [f for f in sorted(irregular_contracts_dir.glob('20*.xlsx'))]
filename = files[-1]
filename


PosixPath('/Users/levin/workspace/git-repositories/anaconda/study-pandas-tutorials/Work/data/irregular_contracts/20220401.xlsx')

将最新的周统计即本周期的 Excel 文件中的 sheets 映射成 DataFrame.


In [96]:
def map_sheet_name(sheet_name_, df_, lookup, target):
    """Map sheet names in Excel file

    Parameters:
    ----------
    sheet_name_: str
      sheet name in Excel file
    df_: DataFrame
      mapping relations
    lookup: str
      filed name in DataFrame (df_) that is sheet name in excel file
    target: str
      map to name

    Returns:
    -------
    name: str
    """
    values = df_.loc[df_[lookup] == sheet_name_][target].values
    if len(values) > 0:
        return values[0]
    return sheet_name_


map_name = partial(map_sheet_name, df_=df_sheets,
                   lookup='sheet_name', target='df_name')


In [116]:
dfs = {}
xls = pd.ExcelFile(filename)
for sheet_name in xls.sheet_names:
    dfs[map_name(sheet_name)] = pd.read_excel(xls, sheet_name)


构造分公司, 项目部, 项目三级组织机构


In [98]:
def industry_org(df_):
    industry_id = 1001
    df = df_.rename(columns={
        '机构id': 'id',
        '机构名称': 'org_name',
        '上级id': 'pid',
        '上级机构名称': 'p_org_name'
    })
    # branch
    df_branch = df.loc[df['pid'] == industry_id]
    # project department
    df_dept = pd.merge(
        df, df_branch[['id']], left_on='pid', right_on='id', suffixes=('', '_y'))
    df_industry = pd.merge(df_branch, df_dept,
                           left_on='id', right_on='pid', suffixes=('_branch', '_dept'))
    df_industry = df_industry[['id_branch',
                              'org_name_branch',
                               'id_dept',
                               'org_name_dept']] \
        .rename(columns={
            'org_name_branch': 'branch_name',
            'org_name_dept': 'dept_name'
        })

    return df_industry


In [117]:
df_org = industry_org(dfs['org'])
df_org.head()


Unnamed: 0,id_branch,branch_name,id_dept,dept_name
0,1005,园区运营中心,1436204,北京产业创新中心
1,1005,园区运营中心,1436205,价值工厂
2,1005,园区运营中心,1436206,南海意库-商业
3,1005,园区运营中心,1436207,蛇口网谷-商业
4,1005,园区运营中心,1437198,创业壹号A座招商创库


In [100]:
def project_org(df_left, df_right):
    df_projects = df_right.rename(columns={
        'ORGAN_ID': 'org_id',
        '项目名称': 'project_name',
        '上级机构id': 'pid',
        '上级机构名称': 'p_org_name'
    })
    df_all = pd.merge(df_left, df_projects, left_on='id_dept', right_on='pid')
    df_all = df_all[['id_branch', 'branch_name',
                    'id_dept', 'dept_name', 'org_id']]
    df_all = df_all.rename(columns={'id_branch': 'branch_id',
                                    'id_dept': 'dept_id',
                                    'org_id': 'project_id'
                                    })
    return df_all


In [118]:
df_org_projects = project_org(df_org, dfs['projects'])
df_org_projects


Unnamed: 0,branch_id,branch_name,dept_id,dept_name,project_id
0,1005,园区运营中心,1436204,北京产业创新中心,1435203
1,1005,园区运营中心,1436205,价值工厂,1413262
2,1005,园区运营中心,1436205,价值工厂,1413263
3,1005,园区运营中心,1436206,南海意库-商业,1433221
4,1005,园区运营中心,1436207,蛇口网谷-商业,1412260
...,...,...,...,...,...
91,1435224,产园-武汉公司,1434222,东湖网谷,1427224
92,1435224,产园-武汉公司,1434222,东湖网谷,1436221
93,1435224,产园-武汉公司,1434223,高新网谷,1427236
94,1435225,产园-青岛公司,1435226,蓝湾网谷,1421248


将不合规合同的类型和分公司结合, 创建分公司不合规类型表, 该表用于后续的统计分析, 解决分公司对某种不合规类型没有数据的问题.


In [119]:
df_branch = df_org['branch_name'].drop_duplicates()
df_irregular_category = dfs['irregular']['情况'].drop_duplicates()
df_irregular_category_with_branch = pd.merge(
    df_branch, df_irregular_category, how='cross')
df_irregular_category_with_branch


Unnamed: 0,branch_name,情况
0,园区运营中心,倒签
1,园区运营中心,应结未结
2,园区运营中心,应算未算
3,文化产业公司,倒签
4,文化产业公司,应结未结
5,文化产业公司,应算未算
6,南油平方,倒签
7,南油平方,应结未结
8,南油平方,应算未算
9,番禺科技园,倒签


分公司和不合规类型的统计不合规合同


In [157]:
df_irregular_deduplication = dfs['irregular'].drop_duplicates(subset='合同编号')
df_irregular = pd.merge(
    df_org_projects,
    df_irregular_deduplication,
    left_on='project_id',
    right_on='organ_id')
df_irregular


Unnamed: 0,branch_id,branch_name,dept_id,dept_name,project_id,organ_id,community_id,项目名称,资源id,资源名称,...,合同来源,合同终止类型,终止申请类型,终止审批状态,结算状态,终止申请状态,申请id,申请人,old_contract_id,contract_id
0,1005,园区运营中心,1436204,北京产业创新中心,1435203,1435203,1435202,北京产业创新中心,551398,北京新时代国际中心A座14-BJCYCXZX-001,...,新签合同,,,,,,42718,李丹,0,42727
1,1005,园区运营中心,1436205,价值工厂,1413262,1413262,1413228,价值工厂,526121,集装箱商业1层-2-101,...,变更合同,提前终止,终止申请类型,审批通过,已结算,正常,41663,王昆,32873,40565
2,1005,园区运营中心,1436206,南海意库-商业,1433221,1433221,1433220,南海意库-商业,536790,6栋1层-110,...,变更合同,正常终止,终止申请,审批通过,可结算,正常,42259,欧阳冰,34361,40660
3,1005,园区运营中心,1436206,南海意库-商业,1433221,1433221,1433220,南海意库-商业,536728,2栋1层-122,...,新签合同,正常终止,终止申请类型,审批通过,可结算,正常,41846,欧阳冰,0,35400
4,1005,园区运营中心,1436206,南海意库-商业,1433221,1433221,1433220,南海意库-商业,536741,5栋1层-117-118,...,新签合同,提前终止,终止申请类型,审批通过,可结算,正常,43193,欧阳冰,0,41510
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
290,1435224,产园-武汉公司,1434222,东湖网谷,1424227,1424227,1424226,东湖网谷一期,515285,1号楼4层-401-3,...,新签合同,,,,,,40752,魏自牧,0,40987
291,1435224,产园-武汉公司,1434222,东湖网谷,1424227,1424227,1424226,东湖网谷一期,515287,1号楼4层-403,...,新签合同,提前终止,终止申请类型,审批通过,已结算,正常,41084,王冠东,0,19170
292,1435224,产园-武汉公司,1434223,高新网谷,1427236,1427236,1427235,高新网谷,535702,1号楼9层-904,...,新签合同,,,,,,41170,孔灏月,0,41183
293,1435224,产园-武汉公司,1434223,高新网谷,1427236,1427236,1427235,高新网谷,535727,1号楼10层-1013,...,新签合同,,,,,,42019,陈龙,0,41492


应用白名单

In [169]:
df_whitelist = pd.read_excel(irregular_contracts_dir / 'whitelist.xlsx')
df_irregular = df_irregular[~df_irregular['合同编号']
                            .isin(df_whitelist['合同编号'])] \
    .reset_index()
df_irregular


Unnamed: 0,index,branch_id,branch_name,dept_id,dept_name,project_id,organ_id,community_id,项目名称,资源id,...,合同来源,合同终止类型,终止申请类型,终止审批状态,结算状态,终止申请状态,申请id,申请人,old_contract_id,contract_id
0,0,1005,园区运营中心,1436204,北京产业创新中心,1435203,1435203,1435202,北京产业创新中心,551398,...,新签合同,,,,,,42718,李丹,0,42727
1,1,1005,园区运营中心,1436205,价值工厂,1413262,1413262,1413228,价值工厂,526121,...,变更合同,提前终止,终止申请类型,审批通过,已结算,正常,41663,王昆,32873,40565
2,2,1005,园区运营中心,1436206,南海意库-商业,1433221,1433221,1433220,南海意库-商业,536790,...,变更合同,正常终止,终止申请,审批通过,可结算,正常,42259,欧阳冰,34361,40660
3,3,1005,园区运营中心,1436206,南海意库-商业,1433221,1433221,1433220,南海意库-商业,536728,...,新签合同,正常终止,终止申请类型,审批通过,可结算,正常,41846,欧阳冰,0,35400
4,4,1005,园区运营中心,1436206,南海意库-商业,1433221,1433221,1433220,南海意库-商业,536741,...,新签合同,提前终止,终止申请类型,审批通过,可结算,正常,43193,欧阳冰,0,41510
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
283,290,1435224,产园-武汉公司,1434222,东湖网谷,1424227,1424227,1424226,东湖网谷一期,515285,...,新签合同,,,,,,40752,魏自牧,0,40987
284,291,1435224,产园-武汉公司,1434222,东湖网谷,1424227,1424227,1424226,东湖网谷一期,515287,...,新签合同,提前终止,终止申请类型,审批通过,已结算,正常,41084,王冠东,0,19170
285,292,1435224,产园-武汉公司,1434223,高新网谷,1427236,1427236,1427235,高新网谷,535702,...,新签合同,,,,,,41170,孔灏月,0,41183
286,293,1435224,产园-武汉公司,1434223,高新网谷,1427236,1427236,1427235,高新网谷,535727,...,新签合同,,,,,,42019,陈龙,0,41492


In [160]:
df_irregular_count = df_irregular.groupby(
    ['branch_name', '情况'])['organ_id'] \
    .count() \
    .rename('count') \
    .reset_index()
df_irregular_count


Unnamed: 0,branch_name,情况,count
0,产园-南京公司,倒签,27
1,产园-南京公司,应结未结,22
2,产园-杭州公司,倒签,48
3,产园-杭州公司,应结未结,49
4,产园-武汉公司,倒签,5
5,产园-武汉公司,应结未结,2
6,产园-深圳公司,倒签,67
7,产园-深圳公司,应算未算,4
8,产园-深圳公司,应结未结,38
9,产园-重庆公司,倒签,6


In [161]:
df_irregular_count2 = pd.merge(
    df_irregular_category_with_branch,
    df_irregular_count,
    how='left')[['branch_name',
                 '情况',
                 'count']] \
    .fillna(0) \
    .astype({'count': 'int32'})

df_irregular_count2


Unnamed: 0,branch_name,情况,count
0,园区运营中心,倒签,2
1,园区运营中心,应结未结,3
2,园区运营中心,应算未算,1
3,文化产业公司,倒签,0
4,文化产业公司,应结未结,0
5,文化产业公司,应算未算,1
6,南油平方,倒签,0
7,南油平方,应结未结,0
8,南油平方,应算未算,0
9,番禺科技园,倒签,2


In [197]:
def get_irregular(df_irregular_all_, key, irregular_category):
    return df_irregular_all_[df_irregular_all_[key] == irregular_category] \
        .reset_index(drop=True)


def total_irregular(df_irregular_, df_org_projects_):
    df = pd.merge(df_org_projects_,
                  df_irregular_,
                  left_on='project_id',
                  right_on='organ_id'
                  )
    df_total = df.groupby('branch_name')['organ_id'] \
        .count() \
        .rename('count') \
        .reset_index()
    return df_total


def report_irregular(df_total_, df_irregular_):
    df_report = pd.merge(df_irregular_,
                         df_total_,
                         how='left',
                         on='branch_name'
                         )
    df_report['percent'] = round(
        df_report['count_x'] /
        df_report['count_y'],
        4
    )
    df_report['total'] = df_report['count_x'].sum()
    df_report['grand_total'] = df_report['count_y'].sum()
    df_report['average_percent'] = round(
        df_report['total'] /
        df_report['grand_total'],
        4
    )
    df_report = df_report.sort_values('percent', ascending=False) \
        .reset_index(drop=True) \
        .fillna(0) \
        .astype({'grand_total': 'int32'})
    df_report = df_report.rename(columns={
        'branch_name': '分公司',
        'count_x': '小计',
        'count_y': '合计',
        'percent': '比率',
        'total': '总计',
        'grand_total': '总量',
        'average_percent': '平均比率'
    })
    return df_report


计算倒签

In [198]:
df_reverse = get_irregular(df_irregular_count2, '情况', '倒签')
df_total_reverse = total_irregular(dfs['new'], df_org_projects)
df_report_reverse = report_irregular(df_total_reverse, df_reverse)
df_report_reverse

Unnamed: 0,分公司,情况,小计,合计,比率,总计,总量,平均比率
0,产园-杭州公司,倒签,48,66,0.7273,157,421,0.3729
1,产园-深圳公司,倒签,67,139,0.482,157,421,0.3729
2,产园-南京公司,倒签,27,71,0.3803,157,421,0.3729
3,产园-武汉公司,倒签,5,23,0.2174,157,421,0.3729
4,产园-重庆公司,倒签,6,35,0.1714,157,421,0.3729
5,园区运营中心,倒签,2,32,0.0625,157,421,0.3729
6,番禺科技园,倒签,2,34,0.0588,157,421,0.3729
7,文化产业公司,倒签,0,16,0.0,157,421,0.3729
8,南油平方,倒签,0,3,0.0,157,421,0.3729
9,产园-青岛公司,倒签,0,2,0.0,157,421,0.3729


计算应结未结

In [199]:
df_untermination = get_irregular(df_irregular_count2, '情况', '应结未结')
df_total_untermination = total_irregular(dfs['termination'], df_org_projects)
df_report_untermination = report_irregular(df_total_untermination, df_untermination)
df_report_untermination

Unnamed: 0,分公司,情况,小计,合计,比率,总计,总量,平均比率
0,产园-南京公司,应结未结,22,26.0,0.8462,125,210,0.5952
1,产园-杭州公司,应结未结,49,61.0,0.8033,125,210,0.5952
2,产园-武汉公司,应结未结,2,3.0,0.6667,125,210,0.5952
3,产园-深圳公司,应结未结,38,67.0,0.5672,125,210,0.5952
4,产园-重庆公司,应结未结,9,22.0,0.4091,125,210,0.5952
5,园区运营中心,应结未结,3,10.0,0.3,125,210,0.5952
6,番禺科技园,应结未结,2,18.0,0.1111,125,210,0.5952
7,文化产业公司,应结未结,0,1.0,0.0,125,210,0.5952
8,南油平方,应结未结,0,2.0,0.0,125,210,0.5952
9,产园-青岛公司,应结未结,0,0.0,0.0,125,210,0.5952


计算应算未算

In [200]:
df_unsettlement = get_irregular(df_irregular_count2, '情况', '应算未算')
df_total_unsettlement = total_irregular(dfs['settlement'], df_org_projects)
df_report_unsettlement = report_irregular(df_total_unsettlement, df_unsettlement)
df_report_unsettlement

Unnamed: 0,分公司,情况,小计,合计,比率,总计,总量,平均比率
0,文化产业公司,应算未算,1,15.0,0.0667,6,351,0.0171
1,产园-深圳公司,应算未算,4,99.0,0.0404,6,351,0.0171
2,园区运营中心,应算未算,1,34.0,0.0294,6,351,0.0171
3,南油平方,应算未算,0,4.0,0.0,6,351,0.0171
4,番禺科技园,应算未算,0,41.0,0.0,6,351,0.0171
5,产园-重庆公司,应算未算,0,24.0,0.0,6,351,0.0171
6,产园-南京公司,应算未算,0,45.0,0.0,6,351,0.0171
7,产园-杭州公司,应算未算,0,81.0,0.0,6,351,0.0171
8,产园-武汉公司,应算未算,0,8.0,0.0,6,351,0.0171
9,产园-青岛公司,应算未算,0,0.0,0.0,6,351,0.0171


提取不合操作要求的合同清单

In [201]:
df_report_irregular = df_irregular[[
    'branch_name',
    'dept_name',
    '项目名称',
    '合同编号',
    '资源名称',
    '甲方名称',
    '乙方名称',
    '情况',
    '说明'
]].rename(columns={
    'branch_name': '分公司',
    'dept_name': '项目部'})

df_report_irregular


Unnamed: 0,分公司,项目部,项目名称,合同编号,资源名称,甲方名称,乙方名称,情况,说明
0,园区运营中心,北京产业创新中心,北京产业创新中心,bjcycxzx-2022-03-1019,北京新时代国际中心A座14-BJCYCXZX-001,深圳市招商创业有限公司,北京至曙经贸有限公司,倒签,已审批
1,园区运营中心,价值工厂,价值工厂,jzgc-2021-12-0087,集装箱商业1层-2-101,招商局蛇口工业区控股股份有限公司,恩佐（深圳） 汽车服务有限公司,应结未结,已终止
2,园区运营中心,南海意库-商业,南海意库-商业,nhyk-sy-2021-12-1131,6栋1层-110,招商局蛇口工业区控股股份有限公司,深圳剪刀侠美发管理有限公司,应算未算,未结算
3,园区运营中心,南海意库-商业,南海意库-商业,nhyk-2019-04-0359,2栋1层-122,招商局蛇口工业区控股股份有限公司,深圳市国宾大酒店有限公司,应结未结,已终止
4,园区运营中心,南海意库-商业,南海意库-商业,nhyk-sy-2022-02-1158,5栋1层-117-118,招商局蛇口工业区控股股份有限公司,深圳潮石先生艺术时尚品牌管理有限公司,应结未结,已终止
...,...,...,...,...,...,...,...,...,...
283,产园-武汉公司,东湖网谷,东湖网谷一期,dhwgyq-2022-01-1089,1号楼4层-401-3,武汉右岸网谷产业园有限公司,武汉埃申测控技术有限公司,倒签,已审批
284,产园-武汉公司,东湖网谷,东湖网谷一期,dhwgyq-2019-10-1008,1号楼4层-403,武汉右岸网谷产业园有限公司,湖北荣屹昊机器人科技有限公司,应结未结,已终止
285,产园-武汉公司,高新网谷,高新网谷,gxwg-2022-01-1101,1号楼9层-904,武汉船舶配套工业园有限公司,武汉仕代环境科技有限公司,倒签,已审批
286,产园-武汉公司,高新网谷,高新网谷,gxwg-2022-02-1123,1号楼10层-1013,武汉船舶配套工业园有限公司,湖北天合致远工程有限公司,倒签,已审批


提取增量数据

In [202]:
output_dir_name = 'output'
out_dir = Path.cwd() / output_dir_name
if not out_dir.exists():
    out_dir.mkdir()
filename_lp = out_dir / '2022-03-25-租赁平台-合同规范性检查（下发）.xlsx'
df_lp = pd.read_excel(filename_lp, sheet_name='全量数据')
df_report_increase = df_report_irregular[~df_report_irregular['合同编号']
                                         .isin(df_lp['合同编号'])] \
    .reset_index(drop=True)
df_report_increase


Unnamed: 0,分公司,项目部,项目名称,合同编号,资源名称,甲方名称,乙方名称,情况,说明
0,园区运营中心,北京产业创新中心,北京产业创新中心,bjcycxzx-2022-03-1019,北京新时代国际中心A座14-BJCYCXZX-001,深圳市招商创业有限公司,北京至曙经贸有限公司,倒签,已审批
1,番禺科技园,番禺科技园,番禺创新科技园二期,科技园YX-Z-[2022]006,创启一号楼3层-312,广州市番禺创新科技园有限公司,冯志发,倒签,已审批
2,产园-深圳公司,蛇口网谷,创业壹号BCD座,cyyhBCDz-2022-01-1029,宏达镜业2栋2层-D204,深圳市创业壹号管理有限公司,深圳市希圣贸易有限公司,应结未结,执行中
3,产园-深圳公司,蛇口网谷,万融大厦,wrds-2021-03-1046,万融大厦C座3层-310,深圳市万融大厦管理有限公司,华景山海控股（深圳）有限公司,应结未结,执行中
4,产园-深圳公司,蛇口网谷,万融大厦,wrds-2021-03-1047,万融大厦C座6层-601,深圳市万融大厦管理有限公司,粉红互娱科技（深圳）有限公司,应结未结,已终止
5,产园-深圳公司,蛇口网谷,万维大厦,wwds-2021-12-1054,万维大厦5层-505,深圳市万维大厦管理有限公司,中国水利水电第五工程局有限公司,应结未结,执行中
6,产园-深圳公司,蛇口网谷,万维大厦,wwds-2022-03-1090,万维大厦5层-502,深圳市万维大厦管理有限公司,深圳迪聚海思科技有限公司,倒签,已审批
7,产园-深圳公司,光明科技园,招商局光明科技园,招光加22C059,宿舍B7栋宿舍4层-B7-408,招商局光明科技园有限公司,中科广化检测技术服务(深圳)有限公司,倒签,已审批
8,产园-深圳公司,光明科技园,招商局光明科技园,招光加21Z099,智慧城众创空间A2栋-131,招商局光明科技园有限公司,汪在满,应结未结,已终止
9,产园-深圳公司,光明科技园,招商局光明科技园,招光加22L006,二期研发楼A1栋6层A1-0602,招商局光明科技园有限公司,深圳中科纳美科技有限公司,应结未结,执行中


## 导出下发数据


In [203]:
output_dir_name = 'output'
out_dir = Path.cwd() / output_dir_name
if not out_dir.exists():
    out_dir.mkdir()

out_filename = f'{filename.stem}-租赁平台-合同规范性检查（下发）.xlsx'

out_path = out_dir / out_filename

with pd.ExcelWriter(out_path) as writer:
    df_report_irregular.to_excel(writer, sheet_name='不合规范合同清单')
    df_report_increase.to_excel(writer, sheet_name='不合规范合同清单(增量)')
    df_report_reverse.to_excel(writer, sheet_name='倒签统计')
    df_report_untermination.to_excel(writer, sheet_name='应结未结统计')
    df_report_unsettlement.to_excel(writer, sheet_name='应算未算统计')


## 底层实现逻辑

以下代码是计算某种不合规要求的底层计算逻辑, 用于逻辑备查.

In [192]:
df_reverse = df_irregular_count2[df_irregular_count2['情况'] == '应结未结'] \
    .reset_index(drop=True)
df_reverse


Unnamed: 0,branch_name,情况,count
0,园区运营中心,应结未结,3
1,文化产业公司,应结未结,0
2,南油平方,应结未结,0
3,番禺科技园,应结未结,2
4,产园-深圳公司,应结未结,38
5,产园-重庆公司,应结未结,9
6,产园-南京公司,应结未结,22
7,产园-杭州公司,应结未结,49
8,产园-武汉公司,应结未结,2
9,产园-青岛公司,应结未结,0


In [193]:
df_new = pd.merge(df_org_projects,
                  dfs['termination'],
                  left_on='project_id',
                  right_on='organ_id'
                  )
df_new.head()


Unnamed: 0,branch_id,branch_name,dept_id,dept_name,project_id,organ_id,community_id,项目名称,资源id,资源名称,...,合同来源,合同终止类型,终止申请类型,终止审批状态,结算状态,终止申请状态,申请id,申请人,old_contract_id,contract_id
0,1005,园区运营中心,1436205,价值工厂,1413262,1413262,1413228,价值工厂,526121,集装箱商业1层-2-101,...,变更合同,提前终止,终止申请类型,审批通过,已结算,正常,41663.0,王昆,32873,40565
1,1005,园区运营中心,1436205,价值工厂,1413262,1413262,1413228,价值工厂,159461594715959,"价值工厂1层-机械大厅-104,价值工厂1层-机械大厅-105,价值工厂1层-机械大厅110",...,变更合同,提前终止,终止申请类型,审批通过,已结算,正常,42605.0,王昆,41067,41265
2,1005,园区运营中心,1436206,南海意库-商业,1433221,1433221,1433220,南海意库-商业,536723,2栋1层-115,...,新签合同,提前终止,终止申请类型,审批通过,可结算,正常,41607.0,欧阳冰,0,35385
3,1005,园区运营中心,1436206,南海意库-商业,1433221,1433221,1433220,南海意库-商业,536728,2栋1层-122,...,新签合同,正常终止,终止申请类型,审批通过,可结算,正常,41846.0,欧阳冰,0,35400
4,1005,园区运营中心,1436206,南海意库-商业,1433221,1433221,1433220,南海意库-商业,536741,5栋1层-117-118,...,续签合同,正常终止,终止申请类型,审批通过,已结算,正常,41604.0,欧阳冰,35419,38910


In [184]:
df_new_count = df_new.groupby('branch_name')['organ_id'] \
    .count() \
    .rename('count') \
    .reset_index()
df_new_count


Unnamed: 0,branch_name,count
0,产园-南京公司,26
1,产园-杭州公司,61
2,产园-武汉公司,3
3,产园-深圳公司,67
4,产园-重庆公司,22
5,南油平方,2
6,园区运营中心,10
7,文化产业公司,1
8,番禺科技园,18


In [191]:
df_reverse_result = pd.merge(df_reverse,
                             df_new_count,
                             how='left',
                             on='branch_name'
                             )
df_reverse_result['percent'] = round(
    df_reverse_result['count_x'] /
    df_reverse_result['count_y'],
    4
)
df_reverse_result['average_percent'] = round(
    df_reverse_result['count_x'].sum() /
    df_reverse_result['count_y'].sum(),
    4
)

df_reverse_result = df_reverse_result.sort_values('percent', ascending=False) \
    .reset_index(drop=True) \
    .fillna(0)
df_reverse_result


Unnamed: 0,branch_name,情况,count_x,count_y,percent,average_percent
0,产园-武汉公司,倒签,5,3.0,1.6667,0.7476
1,产园-南京公司,倒签,27,26.0,1.0385,0.7476
2,产园-深圳公司,倒签,67,67.0,1.0,0.7476
3,产园-杭州公司,倒签,48,61.0,0.7869,0.7476
4,产园-重庆公司,倒签,6,22.0,0.2727,0.7476
5,园区运营中心,倒签,2,10.0,0.2,0.7476
6,番禺科技园,倒签,2,18.0,0.1111,0.7476
7,文化产业公司,倒签,0,1.0,0.0,0.7476
8,南油平方,倒签,0,2.0,0.0,0.7476
9,产园-青岛公司,倒签,0,0.0,0.0,0.7476
