In [1]:
import numpy as np
import pandas as pd

## 时间序列

### 时间戳

- pd.date_range()

    注意点：start, end, periods, freq
    
    这四个参数中，必须指定其中三个，另一个参数会根据指定的参数自动推导。
    
    如果省略 freq 参数，生成的日期范围会在 start 和 end 之间均匀分布，且包含 start 和 end。

- pd.period_range()

    注意点：start, end, periods
    
    这三个参数中，必须指定其中两个，另一个参数会根据指定的参数自动推导。
    

In [30]:
# 创建时间戳
pd.Timestamp('2025-03-20')  # 时刻数据
# freq：频率，显示到哪个维度，Y：显示年度，M：显示到月份，D：显示到天
pd.Period('2025-03-20', freq='Y')  # 时期数据

# 批量生成时刻数据（从开始时间到结束时间内生成时间数据）
# periods=4，表示生成4个时间数据
index = pd.date_range(start='2025-03-01',periods=4,freq='D')
index2 = pd.period_range(start='2025-3-01',end='2025-03-20',freq='D')
index2

# 时间戳索引
pd.Series(np.random.randint(0,10,size=4), index=index)

2025-03-01    2
2025-03-02    0
2025-03-03    6
2025-03-04    2
Freq: D, dtype: int32

In [41]:
# 转换方法
# 如果时间格式不统一，需要指定format='mixed'
pd.to_datetime(['2030.03.14','2030-3-14','14/03/2030','2030/3/14'],format='mixed')
# 时间戳 -> 时间
pd.to_datetime([1898675423],unit='s')
# 
dt = pd.to_datetime([1898675423000],unit='ms')
display(dt)
# 时间差：DateOffset()，默认在原来的时间添加一天，可以指定添加时长
dt + pd.DateOffset(hours=8) # +8小时
dt + pd.DateOffset(days=8) # +8天
dt - pd.DateOffset(hours=8) # -8小时
dt + pd.DateOffset(days=-8) # -8天

DatetimeIndex(['2030-03-02 09:50:23'], dtype='datetime64[ns]', freq=None)

DatetimeIndex(['2030-02-22 09:50:23'], dtype='datetime64[ns]', freq=None)

In [42]:
# 时间戳的索引和切片
index = pd.date_range('2030-03-14',periods=100,freq='D')
index

DatetimeIndex(['2030-03-14', '2030-03-15', '2030-03-16', '2030-03-17',
               '2030-03-18', '2030-03-19', '2030-03-20', '2030-03-21',
               '2030-03-22', '2030-03-23', '2030-03-24', '2030-03-25',
               '2030-03-26', '2030-03-27', '2030-03-28', '2030-03-29',
               '2030-03-30', '2030-03-31', '2030-04-01', '2030-04-02',
               '2030-04-03', '2030-04-04', '2030-04-05', '2030-04-06',
               '2030-04-07', '2030-04-08', '2030-04-09', '2030-04-10',
               '2030-04-11', '2030-04-12', '2030-04-13', '2030-04-14',
               '2030-04-15', '2030-04-16', '2030-04-17', '2030-04-18',
               '2030-04-19', '2030-04-20', '2030-04-21', '2030-04-22',
               '2030-04-23', '2030-04-24', '2030-04-25', '2030-04-26',
               '2030-04-27', '2030-04-28', '2030-04-29', '2030-04-30',
               '2030-05-01', '2030-05-02', '2030-05-03', '2030-05-04',
               '2030-05-05', '2030-05-06', '2030-05-07', '2030-05-08',
      

In [43]:
ts = pd.Series(range(len(index)),index=index)
ts

2030-03-14     0
2030-03-15     1
2030-03-16     2
2030-03-17     3
2030-03-18     4
              ..
2030-06-17    95
2030-06-18    96
2030-06-19    97
2030-06-20    98
2030-06-21    99
Freq: D, Length: 100, dtype: int64

In [47]:
# 索引
ts['2030-03-15']  # 获取某天对应的值
ts['2030-03']  # 获取3月份的所有值
ts['2030']  # 获取年度的所有值

2030-03-14     0
2030-03-15     1
2030-03-16     2
2030-03-17     3
2030-03-18     4
              ..
2030-06-17    95
2030-06-18    96
2030-06-19    97
2030-06-20    98
2030-06-21    99
Freq: D, Length: 100, dtype: int64

In [48]:
# 切片
ts['2030-03-15':'2030-03-22']

2030-03-15    1
2030-03-16    2
2030-03-17    3
2030-03-18    4
2030-03-19    5
2030-03-20    6
2030-03-21    7
2030-03-22    8
Freq: D, dtype: int64

In [51]:
# 时间戳索引
stamp = pd.Timestamp('2030-03-22')
display(stamp)
ts[stamp]
# 切片
ts[pd.Timestamp('2030-03-15'): pd.Timestamp('2030-03-22')]

Timestamp('2030-03-22 00:00:00')

2030-03-15    1
2030-03-16    2
2030-03-17    3
2030-03-18    4
2030-03-19    5
2030-03-20    6
2030-03-21    7
2030-03-22    8
Freq: D, dtype: int64

In [52]:
# 配合date_range()获取值
ts[pd.date_range('2030-03-24',periods=10,freq='D')]

2030-03-24    10
2030-03-25    11
2030-03-26    12
2030-03-27    13
2030-03-28    14
2030-03-29    15
2030-03-30    16
2030-03-31    17
2030-04-01    18
2030-04-02    19
Freq: D, dtype: int64

#### 常用属性

In [59]:
ts.index
ts.index.year
ts.index.month
ts.index.day
ts.index.dayofweek #星期几

Index([3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5,
       6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1,
       2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4,
       5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0,
       1, 2, 3, 4],
      dtype='int32')

#### 时间序列常用方法

- 对时间做一些移动/滞后、频率转换、采样等相关操作

In [None]:
index = 