In [1]:
import numpy as np
import pandas as pd

# 1.使用区间进行重新采样：

### 对以区间为索引的数据进行重新采样类似于时间戳的情况：

In [2]:
df = pd.DataFrame(np.arange(72).reshape((24, 3)),
                     index=pd.period_range("2022-01","2023-12",freq="M"),
                     columns=["A", "B", "C"])

df

Unnamed: 0,A,B,C
2022-01,0,1,2
2022-02,3,4,5
2022-03,6,7,8
2022-04,9,10,11
2022-05,12,13,14
2022-06,15,16,17
2022-07,18,19,20
2022-08,21,22,23
2022-09,24,25,26
2022-10,27,28,29


In [3]:
df_annual = df.resample("A-DEC").max()
df_annual

Unnamed: 0,A,B,C
2022,33,34,35
2023,69,70,71


### 向上采样更为细致，因为在重新采样之前必须决定新频率中在时间段的哪一端放置数值（与asfreq方法类似）。convention参数默认值为start，但也可以是end。

In [4]:
# Q-DEC：每季度，年末在12月份
df_annual.resample("Q-DEC").ffill()

Unnamed: 0,A,B,C
2022Q1,33,34,35
2022Q2,33,34,35
2022Q3,33,34,35
2022Q4,33,34,35
2023Q1,69,70,71
2023Q2,69,70,71
2023Q3,69,70,71
2023Q4,69,70,71


In [5]:
df_annual.resample("Q-DEC" , convention="end").ffill()

Unnamed: 0,A,B,C
2022Q4,33,34,35
2023Q1,33,34,35
2023Q2,33,34,35
2023Q3,33,34,35
2023Q4,69,70,71


In [6]:
df_annual.resample("Q-DEC").asfreq()

Unnamed: 0,A,B,C
2022Q1,33.0,34.0,35.0
2022Q2,,,
2022Q3,,,
2022Q4,,,
2023Q1,69.0,70.0,71.0
2023Q2,,,
2023Q3,,,
2023Q4,,,


In [7]:
df_annual.resample("Q-DEC" , convention="end").asfreq()

Unnamed: 0,A,B,C
2022Q4,33.0,34.0,35.0
2023Q1,,,
2023Q2,,,
2023Q3,,,
2023Q4,69.0,70.0,71.0


### 采样规则：

In [8]:
df_annual.resample("Q-MAR").ffill()

Unnamed: 0,A,B,C
2022Q4,33,34,35
2023Q1,33,34,35
2023Q2,33,34,35
2023Q3,33,34,35
2023Q4,69,70,71
2024Q1,69,70,71
2024Q2,69,70,71
2024Q3,69,70,71


# 2.分组的时间重新采样：

### 对于时间序列数据，重采样方法在语义上是一种基于时间分段的分组操作。

In [9]:
times = pd.date_range("2022-12-12 00:00", freq="1h", periods=15)
times

DatetimeIndex(['2022-12-12 00:00:00', '2022-12-12 01:00:00',
               '2022-12-12 02:00:00', '2022-12-12 03:00:00',
               '2022-12-12 04:00:00', '2022-12-12 05:00:00',
               '2022-12-12 06:00:00', '2022-12-12 07:00:00',
               '2022-12-12 08:00:00', '2022-12-12 09:00:00',
               '2022-12-12 10:00:00', '2022-12-12 11:00:00',
               '2022-12-12 12:00:00', '2022-12-12 13:00:00',
               '2022-12-12 14:00:00'],
              dtype='datetime64[ns]', freq='H')

In [10]:
df = pd.DataFrame({"time": times,"value": np.arange(15)})
df

Unnamed: 0,time,value
0,2022-12-12 00:00:00,0
1,2022-12-12 01:00:00,1
2,2022-12-12 02:00:00,2
3,2022-12-12 03:00:00,3
4,2022-12-12 04:00:00,4
5,2022-12-12 05:00:00,5
6,2022-12-12 06:00:00,6
7,2022-12-12 07:00:00,7
8,2022-12-12 08:00:00,8
9,2022-12-12 09:00:00,9


In [11]:
# 按"time"进行索引，然后重新采样
df.set_index("time").resample("5h").count()

Unnamed: 0_level_0,value
time,Unnamed: 1_level_1
2022-12-12 00:00:00,5
2022-12-12 05:00:00,5
2022-12-12 10:00:00,5


### 如果一个 DataFrame 包含多个时间序列，并按一个附加的分组键列（本例为“key”列）进行了标记。为了对“key”列的每个值进行相同的重采样，需要引入 pandas.Grouper 对象：

In [12]:
df2 = pd.DataFrame({"time": times.repeat(3),"key": np.tile(["a", "b", "c"], 15),"value": np.arange(15 * 3.)})
df2

Unnamed: 0,time,key,value
0,2022-12-12 00:00:00,a,0.0
1,2022-12-12 00:00:00,b,1.0
2,2022-12-12 00:00:00,c,2.0
3,2022-12-12 01:00:00,a,3.0
4,2022-12-12 01:00:00,b,4.0
5,2022-12-12 01:00:00,c,5.0
6,2022-12-12 02:00:00,a,6.0
7,2022-12-12 02:00:00,b,7.0
8,2022-12-12 02:00:00,c,8.0
9,2022-12-12 03:00:00,a,9.0


In [13]:
# pandas.Grouper 对象
time_key = pd.Grouper(freq="5h")
time_key

TimeGrouper(freq=<5 * Hours>, axis=0, sort=True, closed='left', label='left', how='mean', convention='e', origin='start_day')

In [14]:
# 设置时间索引，按“key”和 time_key 分组，并聚合：
resampled = df2.set_index("time").groupby(["key", time_key]).max()
resampled

Unnamed: 0_level_0,Unnamed: 1_level_0,value
key,time,Unnamed: 2_level_1
a,2022-12-12 00:00:00,12.0
a,2022-12-12 05:00:00,27.0
a,2022-12-12 10:00:00,42.0
b,2022-12-12 00:00:00,13.0
b,2022-12-12 05:00:00,28.0
b,2022-12-12 10:00:00,43.0
c,2022-12-12 00:00:00,14.0
c,2022-12-12 05:00:00,29.0
c,2022-12-12 10:00:00,44.0


In [15]:
resampled.reset_index()

Unnamed: 0,key,time,value
0,a,2022-12-12 00:00:00,12.0
1,a,2022-12-12 05:00:00,27.0
2,a,2022-12-12 10:00:00,42.0
3,b,2022-12-12 00:00:00,13.0
4,b,2022-12-12 05:00:00,28.0
5,b,2022-12-12 10:00:00,43.0
6,c,2022-12-12 00:00:00,14.0
7,c,2022-12-12 05:00:00,29.0
8,c,2022-12-12 10:00:00,44.0
