In [1]:
import pandas as pd
from io import StringIO

In [2]:
data = '''
合同编号	起始日期	结束日期
BH001	2023-02-20	2024-02-19
BH002	2023-04-01	2026-03-31
BH003	2022-04-10	2023-04-09
BH004	2023-03-01	2024-09-30
BH005	2023-02-01	2026-01-31
'''

In [31]:
df = pd.read_csv(StringIO(data), delim_whitespace=True)
df

Unnamed: 0,合同编号,起始日期,结束日期
0,BH001,2023-02-20,2024-02-19
1,BH002,2023-04-01,2026-03-31
2,BH003,2022-04-10,2023-04-09
3,BH004,2023-03-01,2024-09-30
4,BH005,2023-02-01,2026-01-31


In [32]:
df.columns

Index(['合同编号', '起始日期', '结束日期'], dtype='object')

In [4]:
df1 = df[['合同编号','起始日期']]
df1.columns = ['合同编号','日期']
df1



Unnamed: 0,合同编号,日期
0,BH001,2023-02-20
1,BH002,2023-04-01
2,BH003,2022-04-10
3,BH004,2023-03-01
4,BH005,2023-02-01


In [5]:
df2 = df[['合同编号','结束日期']]
df2.columns = ['合同编号','日期']
df2

Unnamed: 0,合同编号,日期
0,BH001,2024-02-19
1,BH002,2026-03-31
2,BH003,2023-04-09
3,BH004,2024-09-30
4,BH005,2026-01-31


In [17]:
df1_df2 = pd.concat([df1,df2])
df1_df2['日期'] = df1_df2['日期'].astype('datetime64')
df1_df2 = df1_df2.set_index('日期').sort_index()
df1_df2

Unnamed: 0_level_0,合同编号
日期,Unnamed: 1_level_1
2022-04-10,BH003
2023-02-01,BH005
2023-02-20,BH001
2023-03-01,BH004
2023-04-01,BH002
2023-04-09,BH003
2024-02-19,BH001
2024-09-30,BH004
2026-01-31,BH005
2026-03-31,BH002


In [24]:
gp = df1_df2.groupby('合同编号')
all_values = []
for _, values in gp:
    values = values.resample('D').ffill()
    values.fillna(method='ffill', inplace=True)
    all_values.append(values)
result = pd.concat(all_values, axis=0)
result = result.reset_index()
result['日期'] = result['日期'].astype('string')
result


Unnamed: 0,日期,合同编号
0,2023-02-20,BH001
1,2023-02-21,BH001
2,2023-02-22,BH001
3,2023-02-23,BH001
4,2023-02-24,BH001
...,...,...
3497,2026-01-27,BH005
3498,2026-01-28,BH005
3499,2026-01-29,BH005
3500,2026-01-30,BH005


In [25]:
result = result.assign(年 =result['日期'].str.split('-').str[0],月 = result['日期'].str.split('-').str[1])
result.drop_duplicates(subset=['合同编号','年','月'],inplace=True)
result = result.iloc[:,1:]
result = result.set_index('合同编号')
result

Unnamed: 0_level_0,年,月
合同编号,Unnamed: 1_level_1,Unnamed: 2_level_1
BH001,2023,02
BH001,2023,03
BH001,2023,04
BH001,2023,05
BH001,2023,06
...,...,...
BH005,2025,09
BH005,2025,10
BH005,2025,11
BH005,2025,12


In [27]:
result.query("合同编号 ==  'BH001'")

Unnamed: 0_level_0,年,月
合同编号,Unnamed: 1_level_1,Unnamed: 2_level_1
BH001,2023,2
BH001,2023,3
BH001,2023,4
BH001,2023,5
BH001,2023,6
BH001,2023,7
BH001,2023,8
BH001,2023,9
BH001,2023,10
BH001,2023,11


In [43]:
(
    df.assign(日期 = df.apply(lambda s : pd.date_range(s['起始日期'],s['结束日期']),axis = 1))
    .explode('日期')
    .assign(年 = lambda x:x.日期.dt.year,月 = lambda x:x.日期.dt.month)
    .filter(['合同编号','年','月'])
    .drop_duplicates()
)


Unnamed: 0,合同编号,年,月
0,BH001,2023,2
0,BH001,2023,3
0,BH001,2023,4
0,BH001,2023,5
0,BH001,2023,6
...,...,...,...
4,BH005,2025,9
4,BH005,2025,10
4,BH005,2025,11
4,BH005,2025,12


In [44]:
df1 = df.assign(日期 = df.apply(lambda s : pd.date_range(s['起始日期'],s['结束日期']),axis = 1))
df1

Unnamed: 0,合同编号,起始日期,结束日期,日期
0,BH001,2023-02-20,2024-02-19,"DatetimeIndex(['2023-02-20', '2023-02-21', '20..."
1,BH002,2023-04-01,2026-03-31,"DatetimeIndex(['2023-04-01', '2023-04-02', '20..."
2,BH003,2022-04-10,2023-04-09,"DatetimeIndex(['2022-04-10', '2022-04-11', '20..."
3,BH004,2023-03-01,2024-09-30,"DatetimeIndex(['2023-03-01', '2023-03-02', '20..."
4,BH005,2023-02-01,2026-01-31,"DatetimeIndex(['2023-02-01', '2023-02-02', '20..."


In [47]:
df1 = df1.explode('日期')
df1

Unnamed: 0,合同编号,起始日期,结束日期,日期
0,BH001,2023-02-20,2024-02-19,2023-02-20
0,BH001,2023-02-20,2024-02-19,2023-02-21
0,BH001,2023-02-20,2024-02-19,2023-02-22
0,BH001,2023-02-20,2024-02-19,2023-02-23
0,BH001,2023-02-20,2024-02-19,2023-02-24
...,...,...,...,...
4,BH005,2023-02-01,2026-01-31,2026-01-27
4,BH005,2023-02-01,2026-01-31,2026-01-28
4,BH005,2023-02-01,2026-01-31,2026-01-29
4,BH005,2023-02-01,2026-01-31,2026-01-30


In [48]:
df2 = df1.assign(年 = lambda x:x.日期.dt.year,月 = lambda x:x.日期.dt.month)
df2

Unnamed: 0,合同编号,起始日期,结束日期,日期,年,月
0,BH001,2023-02-20,2024-02-19,2023-02-20,2023,2
0,BH001,2023-02-20,2024-02-19,2023-02-21,2023,2
0,BH001,2023-02-20,2024-02-19,2023-02-22,2023,2
0,BH001,2023-02-20,2024-02-19,2023-02-23,2023,2
0,BH001,2023-02-20,2024-02-19,2023-02-24,2023,2
...,...,...,...,...,...,...
4,BH005,2023-02-01,2026-01-31,2026-01-27,2026,1
4,BH005,2023-02-01,2026-01-31,2026-01-28,2026,1
4,BH005,2023-02-01,2026-01-31,2026-01-29,2026,1
4,BH005,2023-02-01,2026-01-31,2026-01-30,2026,1


In [49]:
df2 = df2.filter(['合同编号','年','月'])
df2

Unnamed: 0,合同编号,年,月
0,BH001,2023,2
0,BH001,2023,2
0,BH001,2023,2
0,BH001,2023,2
0,BH001,2023,2
...,...,...,...
4,BH005,2026,1
4,BH005,2026,1
4,BH005,2026,1
4,BH005,2026,1


In [50]:
result = df2.drop_duplicates()
result

Unnamed: 0,合同编号,年,月
0,BH001,2023,2
0,BH001,2023,3
0,BH001,2023,4
0,BH001,2023,5
0,BH001,2023,6
...,...,...,...
4,BH005,2025,9
4,BH005,2025,10
4,BH005,2025,11
4,BH005,2025,12
