# resample 降采样

In [None]:
"""
降采样的原理就是假如本来是一小时一个数据，然后展开的话就会有很多
然后现在把它简化为一天统计一次，这样数据量就会少很多了
resample处理的df或series必须以日期为index

inplace参数是说是否在原的参数上做修改， 
"""

In [14]:
import pandas as pd

# 创建一个包含日期时间的 DataFrame
date_rng = pd.date_range(start='2025-01-01', end='2025-01-10', freq='h')
print(date_rng)
df = pd.DataFrame(date_rng, columns=['date'])
print(df)
df['data'] = range(len(df))
print(df)
# 将数据框的日期列设为索引
df.set_index('date', inplace=True)

# 使用 resample 方法按天重采样，并计算每一天的平均值
df_resampled = df.resample('D').mean()
print(df_resampled)

DatetimeIndex(['2025-01-01 00:00:00', '2025-01-01 01:00:00',
               '2025-01-01 02:00:00', '2025-01-01 03:00:00',
               '2025-01-01 04:00:00', '2025-01-01 05:00:00',
               '2025-01-01 06:00:00', '2025-01-01 07:00:00',
               '2025-01-01 08:00:00', '2025-01-01 09:00:00',
               ...
               '2025-01-09 15:00:00', '2025-01-09 16:00:00',
               '2025-01-09 17:00:00', '2025-01-09 18:00:00',
               '2025-01-09 19:00:00', '2025-01-09 20:00:00',
               '2025-01-09 21:00:00', '2025-01-09 22:00:00',
               '2025-01-09 23:00:00', '2025-01-10 00:00:00'],
              dtype='datetime64[ns]', length=217, freq='h')
                   date
0   2025-01-01 00:00:00
1   2025-01-01 01:00:00
2   2025-01-01 02:00:00
3   2025-01-01 03:00:00
4   2025-01-01 04:00:00
..                  ...
212 2025-01-09 20:00:00
213 2025-01-09 21:00:00
214 2025-01-09 22:00:00
215 2025-01-09 23:00:00
216 2025-01-10 00:00:00

[217 rows x 1 columns

# transform

In [26]:
import pandas as pd

# 创建示例数据
data = {
    'Category': ['A', 'A', 'A', 'B', 'B', 'B'],
    'Value': [10, 20, 30, 40, 50, 60]
}
df = pd.DataFrame(data)
grouped = df.groupby('Category')['Value']
# 使用 groupby 对数据按 'Category' 分组，然后对 'Value' 列进行标准化处理
df['Standardized'] = df.groupby('Category')['Value'].transform(lambda x: (x - x.mean()) / x.std())

print(df)

  Category  Value  Standardized
0        A     10          -1.0
1        A     20           0.0
2        A     30           1.0
3        B     40          -1.0
4        B     50           0.0
5        B     60           1.0
