# 合同即将到期

这里使用的数据是通过筛选条件为: 2021-09-11 至 2021-12-31 的数据

In [2]:
import pandas as pd
import numpy as np

## 数据加载


In [14]:
# 项目表, 含 3 级归属机构
df_project_full = pd.read_excel('./data/05_project_full.xlsx')
df_project_full.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 9 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   ORGAN_ID  100 non-null    int64 
 1   项目名称      100 non-null    object
 2   财务账套名称    100 non-null    object
 3   level_1   100 non-null    object
 4   level_2   96 non-null     object
 5   level_3   61 non-null     object
 6   省         100 non-null    object
 7   市         100 non-null    object
 8   区         100 non-null    object
dtypes: int64(1), object(8)
memory usage: 7.2+ KB


In [15]:
# 合同预期到期表, 合同截止日期在 2021-09-11 至 2021-12-31 间
df_origin = pd.read_excel('./data/expiring_contracts.xlsx',
                          parse_dates=['合同开始日期', '合同结束日期'])
df_origin.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 889 entries, 0 to 888
Data columns (total 15 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   项目      889 non-null    object        
 1   合同编码    889 non-null    object        
 2   乙方名称    889 non-null    object        
 3   乙方联系方式  454 non-null    object        
 4   合同开始日期  889 non-null    datetime64[ns]
 5   合同结束日期  889 non-null    datetime64[ns]
 6   合同租金总额  889 non-null    float64       
 7   保证金     889 non-null    float64       
 8   到期天数    889 non-null    int64         
 9   合同资源    889 non-null    object        
 10  合同租赁面积  889 non-null    float64       
 11  当月单价    889 non-null    float64       
 12  当月租金    889 non-null    float64       
 13  起始租金    889 non-null    float64       
 14  期末租金    889 non-null    float64       
dtypes: datetime64[ns](2), float64(7), int64(1), object(5)
memory usage: 104.3+ KB


In [16]:
def get_dti_m(s):
    """
    根据 Series 中的最小和最大值计算频率(月度)

    Parameters:
    s (Series dtype=datetime64): 日期 Series

    Returns:
    DatetimeIndex: 月度频率
    """

    start = s.min()
    # Timestamp: 最大日期所在年份的最后一天
    end = pd.Period(s.max(), freq='Y').end_time
    # DatetimeIndex: 月度频率
    return pd.date_range(start, end, freq='M')


def calc_diff_months(start, end):
    """
    计算月差

    Parameters:
    start (datetime64): 起始日期
    end (datetime64 / Timestamp): 截结束日期

    Returns:
    float: 月差(精度 2)
    """

    diff = round((end - start) / np.timedelta64(1, 'M'), 2)
    return 0 if diff < 0 else diff


def create_gap_months(df, date_col_name, price_col_name):
    """
    创建月差列和预估月收入. 月差的列数根据 col_name 的最小和最大值确定.

    Parameters:
    df (DataFrame): DataFrame
    date_col_name (string): 含有日期的列名称
    price_col_name (string): 含有单价的列名称

    Returns:
    DataFrame: 增加了月差列的 DataFrame
    """

    df_ = df.copy()
    dr = get_dti_m(df_[date_col_name])

    for ts in dr:
        month_name = ts.month_name()
        df_[month_name] = df_[date_col_name].apply(calc_diff_months, end=ts)
        df_[month_name+'_price'] = round(df_[price_col_name] * df_[month_name], 2)

    return df_

In [17]:
# 包含月份差字段和预估月租金的[合同预期到期表]
# 计算逻辑
#   - 通过合同结束日期的最小值和最大值计算出应该填补的月份
#     - 最小值为 9 月 11 日则月份差的起始月份为 9 月
#     - 最大值为 12 月 25 日则月份差的截止月份为 12 月
#   - 根据上面的月份差计算合同结束日期距当月最后一天的月差
#   - 根据上面的月差计算该月的预估租金
df_gap = create_gap_months(df_origin, '合同结束日期', '当月单价')
df_gap

Unnamed: 0,项目,合同编码,乙方名称,乙方联系方式,合同开始日期,合同结束日期,合同租金总额,保证金,到期天数,合同资源,...,起始租金,期末租金,September,September_price,October,October_price,November,November_price,December,December_price
0,新时代广场,XSDGC-2018-08-0163,雷格斯商务服务(深圳)有限公司,13510853854,2011-12-08,2021-12-07,0.00,240000.0,82,"3A,3B,3C,3D,3E,3I,3J,3K,3L",...,0.00,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.79,118.78
1,金山意库,jsyk-2018-08-0046,龚羽晗 杨金元,15826121818,2019-01-01,2021-12-31,250839.18,14403.0,106,232,...,3600.85,7940.2,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.00
2,南海意库,nhyk-2018-08-0131,深圳市达文设计有限公司,13724258682,2017-12-17,2021-10-31,2024791.00,93982.0,45,506,...,40590.00,46991.0,0.0,0.0,0.0,0.0,0.99,149.00,2.00,301.00
3,招商局广场,zsjgc-2018-08-0086,招商局仁和人寿保险股份有限公司,18813121789,2016-12-21,2021-11-30,25964568.04,1015009.0,75,"29A,29B,29C,29D,29E,29F,29G,29H",...,417525.00,0.0,0.0,0.0,0.0,0.0,0.00,0.00,1.02,307.48
4,招商局光明科技园,招光加17B002,深圳拉尔文生物工程技术有限公司,13902317463,2017-02-11,2021-11-10,2658895.00,105634.0,55,B5-06B,...,0.00,52817.0,0.0,0.0,0.0,0.0,0.66,25.67,1.68,65.34
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
884,芯云谷一期人才公寓,YG-YY-21-QT-017,漳州柳叶花香文化传播有限公司,,2021-05-26,2021-11-25,7280.00,2000.0,70,201,...,880.00,857.0,0.0,0.0,0.0,0.0,0.16,2.78,1.18,20.50
885,芯云谷一期人才公寓,YG-YY-21-QT-018,漳州柳叶花香文化传播有限公司,,2021-05-26,2021-11-25,7280.00,2000.0,70,314,...,880.00,857.0,0.0,0.0,0.0,0.0,0.16,3.04,1.18,22.43
886,招商局光明科技园,招光加21C079,深圳慈云鸽医疗科技有限责任公司,,2021-09-09,2021-12-31,13613.00,5400.0,106,B8-623,...,2200.00,2200.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.00
887,四区单身公寓,CMZD-YQ-2021-ZL-119,肖俊,18889676869,2021-09-08,2021-12-07,3990.00,1995.0,82,3-502,...,665.00,687.0,0.0,0.0,0.0,0.0,0.00,0.00,0.79,11.07


In [18]:
# 关联 df_project_full df_gap
df_rel = pd.merge(df_gap, df_project_full, how='left', left_on='项目', right_on='项目名称')
df_rel

Unnamed: 0,项目,合同编码,乙方名称,乙方联系方式,合同开始日期,合同结束日期,合同租金总额,保证金,到期天数,合同资源,...,December_price,ORGAN_ID,项目名称,财务账套名称,level_1,level_2,level_3,省,市,区
0,新时代广场,XSDGC-2018-08-0163,雷格斯商务服务(深圳)有限公司,13510853854,2011-12-08,2021-12-07,0.00,240000.0,82,"3A,3B,3C,3D,3E,3I,3J,3K,3L",...,118.78,,,,,,,,,
1,金山意库,jsyk-2018-08-0046,龚羽晗 杨金元,15826121818,2019-01-01,2021-12-31,250839.18,14403.0,106,232,...,0.00,1412212.0,金山意库,重庆招商金山意库商业管理有限公司,产业园区事业部,产园-重庆公司,金山意库,重庆市,重庆市,巴南区
2,南海意库,nhyk-2018-08-0131,深圳市达文设计有限公司,13724258682,2017-12-17,2021-10-31,2024791.00,93982.0,45,506,...,301.00,1412215.0,南海意库,招商局蛇口工业区控股股份有限公司,产业园区事业部,产园-深圳公司,南海意库,广东省,深圳市,南山区
3,招商局广场,zsjgc-2018-08-0086,招商局仁和人寿保险股份有限公司,18813121789,2016-12-21,2021-11-30,25964568.04,1015009.0,75,"29A,29B,29C,29D,29E,29F,29G,29H",...,307.48,,,,,,,,,
4,招商局光明科技园,招光加17B002,深圳拉尔文生物工程技术有限公司,13902317463,2017-02-11,2021-11-10,2658895.00,105634.0,55,B5-06B,...,65.34,1413273.0,招商局光明科技园,招商局光明科技园有限公司,产业园区事业部,产园-深圳公司,光明科技园,广东省,深圳市,光明新区
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
884,芯云谷一期人才公寓,YG-YY-21-QT-017,漳州柳叶花香文化传播有限公司,,2021-05-26,2021-11-25,7280.00,2000.0,70,201,...,20.50,1426213.0,芯云谷一期人才公寓,福建招商云谷开发有限公司,产业园区事业部,产园-深圳公司,漳州芯云谷,福建省,漳州市,龙海区
885,芯云谷一期人才公寓,YG-YY-21-QT-018,漳州柳叶花香文化传播有限公司,,2021-05-26,2021-11-25,7280.00,2000.0,70,314,...,22.43,1426213.0,芯云谷一期人才公寓,福建招商云谷开发有限公司,产业园区事业部,产园-深圳公司,漳州芯云谷,福建省,漳州市,龙海区
886,招商局光明科技园,招光加21C079,深圳慈云鸽医疗科技有限责任公司,,2021-09-09,2021-12-31,13613.00,5400.0,106,B8-623,...,0.00,1413273.0,招商局光明科技园,招商局光明科技园有限公司,产业园区事业部,产园-深圳公司,光明科技园,广东省,深圳市,光明新区
887,四区单身公寓,CMZD-YQ-2021-ZL-119,肖俊,18889676869,2021-09-08,2021-12-07,3990.00,1995.0,82,3-502,...,11.07,,,,,,,,,


In [23]:
# 不属于产园的项目
# 出现这种情况是因为导出数据的人所拥有的数据权限超过产园的数据范围
df_no_exists = df_rel[df_rel['level_1'].isna()]
df_no_exists

Unnamed: 0,项目,合同编码,乙方名称,乙方联系方式,合同开始日期,合同结束日期,合同租金总额,保证金,到期天数,合同资源,...,December_price,ORGAN_ID,项目名称,财务账套名称,level_1,level_2,level_3,省,市,区
0,新时代广场,XSDGC-2018-08-0163,雷格斯商务服务(深圳)有限公司,13510853854,2011-12-08,2021-12-07,0.00,240000.0,82,"3A,3B,3C,3D,3E,3I,3J,3K,3L",...,118.78,,,,,,,,,
3,招商局广场,zsjgc-2018-08-0086,招商局仁和人寿保险股份有限公司,18813121789,2016-12-21,2021-11-30,25964568.04,1015009.0,75,"29A,29B,29C,29D,29E,29F,29G,29H",...,307.48,,,,,,,,,
7,招商局广场,zsjgc-2018-08-0109,招商局仁和人寿保险股份有限公司,18813121789,2016-12-21,2021-11-30,19461766.26,760801.0,75,"35C,35D,35E,35F,35G,35H1",...,369.46,,,,,,,,,
8,沈阳招商钻石山,syzszss-2018-08-0004,沈阳星宇方舟展览服务有限公司,,2018-09-20,2021-09-19,355917.00,35039.0,3,1508,...,212.94,,,,,,,,,
9,阳光带,(SZ)CN201311037R1,深圳市四季分享有机厨房有限公司,13713998651,2018-12-01,2021-11-30,2915364.00,169928.0,75,"锦缎之滨服务会所100,锦缎之滨服务会所201",...,114.82,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
857,综合楼,zhl-2021-07-1007,漳州招商局经济技术开发区文化与旅游发展局,05966856586,2021-07-01,2021-12-31,706200.00,0.0,106,综合楼（文旅）,...,0.00,,,,,,,,,
858,兴华工业大厦,xhgyds-2021-07-1135,深圳迷你仓仓储股份有限公司,,2021-07-01,2021-12-31,464427.66,149112.0,106,4C,...,0.00,,,,,,,,,
862,招商漳州土地,zszztd-2021-08-1083,福建联谊建筑工程有限公司,,2020-12-01,2021-11-30,20212.00,10000.0,75,联谊建筑工程,...,4.49,,,,,,,,,
882,招商大厦,zsdx-2021-09-0245,张鹤,,2021-09-10,2021-11-30,11280.00,4800.0,75,833,...,81.60,,,,,,,,,


In [24]:
# 属于产园的项目
df_exists = df_rel[~df_rel['level_1'].isna()]
df_exists

Unnamed: 0,项目,合同编码,乙方名称,乙方联系方式,合同开始日期,合同结束日期,合同租金总额,保证金,到期天数,合同资源,...,December_price,ORGAN_ID,项目名称,财务账套名称,level_1,level_2,level_3,省,市,区
1,金山意库,jsyk-2018-08-0046,龚羽晗 杨金元,15826121818,2019-01-01,2021-12-31,250839.18,14403.0,106,232,...,0.00,1412212.0,金山意库,重庆招商金山意库商业管理有限公司,产业园区事业部,产园-重庆公司,金山意库,重庆市,重庆市,巴南区
2,南海意库,nhyk-2018-08-0131,深圳市达文设计有限公司,13724258682,2017-12-17,2021-10-31,2024791.00,93982.0,45,506,...,301.00,1412215.0,南海意库,招商局蛇口工业区控股股份有限公司,产业园区事业部,产园-深圳公司,南海意库,广东省,深圳市,南山区
4,招商局光明科技园,招光加17B002,深圳拉尔文生物工程技术有限公司,13902317463,2017-02-11,2021-11-10,2658895.00,105634.0,55,B5-06B,...,65.34,1413273.0,招商局光明科技园,招商局光明科技园有限公司,产业园区事业部,产园-深圳公司,光明科技园,广东省,深圳市,光明新区
5,招商局光明科技园,招光加17B006,深圳市杰星通科技有限公司,,2017-04-01,2021-09-30,2722873.00,115054.0,14,B6-03C,...,146.83,1413273.0,招商局光明科技园,招商局光明科技园有限公司,产业园区事业部,产园-深圳公司,光明科技园,广东省,深圳市,光明新区
6,招商局光明科技园,招光加16A016,中国电信股份有限公司深圳分公司,13360096293,2016-09-16,2021-09-15,4630740.00,172618.0,0,"A3-09D,A3-10D",...,0.00,1413273.0,招商局光明科技园,招商局光明科技园有限公司,产业园区事业部,产园-深圳公司,光明科技园,广东省,深圳市,光明新区
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
883,创业壹号A座招商创库,cyyhAzzsck-2021-09-0472,深圳市海苔创新服务有限责任公司,13609615736,2021-09-01,2021-12-31,7800.00,2600.0,106,SZSKA5F-016,...,0.00,1013.0,创业壹号A座招商创库,深圳市招商创业有限公司,产业园区事业部,园区运营中心,,广东省,深圳市,南山区
884,芯云谷一期人才公寓,YG-YY-21-QT-017,漳州柳叶花香文化传播有限公司,,2021-05-26,2021-11-25,7280.00,2000.0,70,201,...,20.50,1426213.0,芯云谷一期人才公寓,福建招商云谷开发有限公司,产业园区事业部,产园-深圳公司,漳州芯云谷,福建省,漳州市,龙海区
885,芯云谷一期人才公寓,YG-YY-21-QT-018,漳州柳叶花香文化传播有限公司,,2021-05-26,2021-11-25,7280.00,2000.0,70,314,...,22.43,1426213.0,芯云谷一期人才公寓,福建招商云谷开发有限公司,产业园区事业部,产园-深圳公司,漳州芯云谷,福建省,漳州市,龙海区
886,招商局光明科技园,招光加21C079,深圳慈云鸽医疗科技有限责任公司,,2021-09-09,2021-12-31,13613.00,5400.0,106,B8-623,...,0.00,1413273.0,招商局光明科技园,招商局光明科技园有限公司,产业园区事业部,产园-深圳公司,光明科技园,广东省,深圳市,光明新区


In [25]:
table_formatter = '{:,.2f}'
table_properties = {'text-align': 'right'}
need_cols = [x for x in df_exists.columns if 'price' in x] + ['合同租金总额']

In [26]:
df_l1 = df_exists.groupby('level_1').sum()[need_cols]
df_l1.style.format(table_formatter).set_properties(**table_properties)

Unnamed: 0_level_0,September_price,October_price,November_price,December_price,合同租金总额
level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
产业园区事业部,770.12,114815.98,278825.66,490994.8,157601916.64


In [32]:
def highlight_max(s, props=''):
    return np.where(s == np.nanmax(s.values), props, '')

In [33]:
# 按 事业部-分公司 分组
df_l2 = df_exists.groupby(['level_1', 'level_2'], dropna=False).sum()[need_cols]
# 添加 total 列
df_l2['total'] = df_l2.sum(axis=1)
# 按 total 列排序
df_l2 = df_l2.sort_values(by='total', ascending=False)
# 添加 total 行
df_l2 = df_l2.append(df_l2.sum().rename(('Total', 'total')))

df_l2.style.format(
    table_formatter).set_properties(**table_properties).apply(highlight_max, props='color:black;background-color:pink', axis=0)

Unnamed: 0_level_0,Unnamed: 1_level_0,September_price,October_price,November_price,December_price,合同租金总额,total
level_1,level_2,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
产业园区事业部,产园-深圳公司,29.37,13987.52,44794.84,89705.09,108165146.07,108313662.89
产业园区事业部,产园-南京公司,6.75,60998.8,145999.42,257623.97,20170036.73,20634665.67
产业园区事业部,园区运营中心,0.0,8695.29,18280.62,28673.28,11263928.13,11319577.32
产业园区事业部,南油平方,0.0,23.74,65.3,108.54,10127274.06,10127471.64
产业园区事业部,产园-武汉公司,0.0,46.75,102.69,392.34,3654805.39,3655347.17
产业园区事业部,番禺科技园,0.0,153.98,311.77,500.13,2103960.41,2104926.29
产业园区事业部,文化产业公司,0.0,18170.86,35629.12,53975.64,978626.67,1086402.29
产业园区事业部,产园-杭州公司,734.0,7363.04,18857.9,35439.81,848900.0,911294.75
产业园区事业部,产园-重庆公司,0.0,0.0,0.0,0.0,250839.18,250839.18
产业园区事业部,产园-青岛公司,0.0,5376.0,14784.0,24576.0,38400.0,83136.0


In [35]:
# 按 事业部-分公司 分组
df_l3 = df_exists.sort_values(['level_1', 'level_2', 'level_3']).groupby(
    ['level_1', 'level_2', 'level_3'], dropna=False).sum()[need_cols]
# 添加 total 列
df_l3['total'] = df_l3.sum(axis=1)
# 按 total 列排序
# df_l3 = df_l3.sort_values(by='total', ascending=False)
# 添加 total 行
df_l3 = df_l3.append(df_l3.sum().rename(('Total', 'total', 't')))

idx = pd.IndexSlice
slice_ = idx[idx[:, :, 't']]
slice2 = idx[idx['产业园区事业部', :, :], 'total']
slice3 = idx[idx['产业园区事业部', :, :], idx['September_price':'合同租金总额']]

df_l3.style.format(table_formatter)\
    .set_properties(**table_properties)\
    .apply(highlight_max, props='color:black;background-color:pink', axis=0, subset=slice3)\
    .set_properties(**{'background-color': '#fffff3',
                       'color': 'black'}, subset=slice_)\
    .set_properties(**{'background-color': '#ffffb8',
                       'color': 'black'}, subset=slice2)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,September_price,October_price,November_price,December_price,合同租金总额,total
level_1,level_2,level_3,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
产业园区事业部,产园-南京公司,仙东网谷,6.75,237.16,517.92,847.67,756141.2,757750.7
产业园区事业部,产园-南京公司,紫金智谷,0.0,173.64,399.09,831.81,3523542.15,3524946.69
产业园区事业部,产园-南京公司,高铁网谷,0.0,60588.0,145082.41,255944.49,15890353.38,16351968.28
产业园区事业部,产园-杭州公司,上海森兰美奂创库,0.0,0.0,0.0,0.0,0.0,0.0
产业园区事业部,产园-杭州公司,信雅达创库,734.0,7363.04,18857.9,35439.81,848900.0,911294.75
产业园区事业部,产园-武汉公司,东湖网谷,0.0,0.0,0.0,197.58,2685272.86,2685470.44
产业园区事业部,产园-武汉公司,招商蕲春,0.0,46.75,102.69,194.76,969532.53,969876.73
产业园区事业部,产园-深圳公司,光明科技园,0.0,10919.42,34793.88,64503.32,35063312.43,35173529.05
产业园区事业部,产园-深圳公司,南海意库,0.0,1368.12,5225.77,10405.82,28008512.0,28025511.71
产业园区事业部,产园-深圳公司,漳州芯云谷,29.37,226.24,569.43,1314.62,17412518.7,17414658.36


In [6]:
df3 = pd.DataFrame(np.random.randn(4,4),
                   pd.MultiIndex.from_product([['A', 'B'], ['r1', 'r2']]),
                   columns=['c1','c2','c3','c4'])
df3

Unnamed: 0,Unnamed: 1,c1,c2,c3,c4
A,r1,0.093169,-0.458769,-1.34523,-0.746773
A,r2,-1.254442,0.970957,-2.246461,-0.158132
B,r1,1.279628,0.4958,0.198105,-0.572969
B,r2,0.165545,-1.708852,-1.045231,0.490629


In [13]:
idx = pd.IndexSlice
slice_ = idx[idx[:,'r1'], idx['c2':'c4']]
df3.style.apply(highlight_max, props='color:red;', axis=0, subset=slice_)\
         .set_properties(**{'background-color': '#ffffb3'}, subset=slice_)

Unnamed: 0,Unnamed: 1,c1,c2,c3,c4
A,r1,0.093169,-0.458769,-1.34523,-0.746773
A,r2,-1.254442,0.970957,-2.246461,-0.158132
B,r1,1.279628,0.4958,0.198105,-0.572969
B,r2,0.165545,-1.708852,-1.045231,0.490629


In [3]:
pd.__version__

'1.3.2'