# datetime 相关知识总结

## 1. datetime

### 1. 创建日期

In [1]:
# datetime类型数据
from datetime import datetime
# 最多可以到微妙位
d = datetime(2023, 7, 13, 17, 57, 13, 12)
for i in tuple([d, d.year, d.month, d.day, d.hour, d.minute, d.second, d.microsecond]):
    print(i)

2023-07-13 17:57:13.000012
2023
7
13
17
57
13
12


In [2]:
# date类型数据
import datetime
d = datetime.date(2023, 7, 13)
for i in tuple([d, d.year, d.month, d.day]):
    print(i)

2023-07-13
2023
7
13


In [3]:
import datetime
d = datetime.time(17, 57, 25)
for i in tuple([d, d.hour, d.minute, d.second]):
    print(i)

17:57:25
17
57
25


### 2. datetime与字符串相关

In [4]:
# 1. datetime转变为字符串

In [5]:
from datetime import datetime
d = datetime(2023, 7, 13, 17, 57, 13)
print(type(d))
# 可以直接使用str()转换
d_str = str(d)
print(type(d_str))
d_str

<class 'datetime.datetime'>
<class 'str'>


'2023-07-13 17:57:13'

In [6]:
# 2. 使用 strftime 将 datetime 转换为 str

In [7]:
from datetime import datetime
d = datetime(2023, 7, 13, 17, 57, 13)
# 使用占位符控制输出格式
d_str = d.strftime('%Y-%m-%d %H:%M:%S')
d_str

'2023-07-13 17:57:13'

In [8]:
# 这种写法也可以
from datetime import datetime
d = datetime(2023, 7, 13, 17, 57, 13)
d_str_1 = datetime.strftime(d, '%Y-%m-%d %H:%M:%S')
d_str_1

'2023-07-13 17:57:13'

In [9]:
# 3. 使用 strptime 将 str 转换为 datetime

In [10]:
from datetime import datetime
d = datetime.strptime(d_str, '%Y-%m-%d %H:%M:%S')
d

datetime.datetime(2023, 7, 13, 17, 57, 13)

## 2. pandas 处理时间数据

### 1. pd.date_range

In [11]:
import pandas as pd
#date_rang快速创建日期范围
d_range =pd.date_range(start='20211025', end='20220214') #使用了start和end两个参数
d_range

DatetimeIndex(['2021-10-25', '2021-10-26', '2021-10-27', '2021-10-28',
               '2021-10-29', '2021-10-30', '2021-10-31', '2021-11-01',
               '2021-11-02', '2021-11-03',
               ...
               '2022-02-05', '2022-02-06', '2022-02-07', '2022-02-08',
               '2022-02-09', '2022-02-10', '2022-02-11', '2022-02-12',
               '2022-02-13', '2022-02-14'],
              dtype='datetime64[ns]', length=113, freq='D')

In [12]:
# 频率为十天
d_range_2 =pd.date_range(start='20211025', end='20220214', freq='10D')
d_range_2

DatetimeIndex(['2021-10-25', '2021-11-04', '2021-11-14', '2021-11-24',
               '2021-12-04', '2021-12-14', '2021-12-24', '2022-01-03',
               '2022-01-13', '2022-01-23', '2022-02-02', '2022-02-12'],
              dtype='datetime64[ns]', freq='10D')

In [13]:
# 频率为一个月
d_range_3 =pd.date_range(start='20211025',end='20220214', freq='M')
d_range_3

DatetimeIndex(['2021-10-31', '2021-11-30', '2021-12-31', '2022-01-31'], dtype='datetime64[ns]', freq='M')

In [14]:
#使用start、end、periods三个参数
d_range_4 = pd.date_range(start='20211025', periods=10, freq='2D')
d_range_4

DatetimeIndex(['2021-10-25', '2021-10-27', '2021-10-29', '2021-10-31',
               '2021-11-02', '2021-11-04', '2021-11-06', '2021-11-08',
               '2021-11-10', '2021-11-12'],
              dtype='datetime64[ns]', freq='2D')

In [15]:
# d_range转换为pd.Series
pd.Series(d_range)

0     2021-10-25
1     2021-10-26
2     2021-10-27
3     2021-10-28
4     2021-10-29
         ...    
108   2022-02-10
109   2022-02-11
110   2022-02-12
111   2022-02-13
112   2022-02-14
Length: 113, dtype: datetime64[ns]

### 2. strftime：datetime 转 str

In [16]:
import pandas as pd
d_range_x =pd.date_range(start='20211025', end='20220214', freq='H')
df = pd.DataFrame(data=d_range_x, columns=['date'])
df.head()

Unnamed: 0,date
0,2021-10-25 00:00:00
1,2021-10-25 01:00:00
2,2021-10-25 02:00:00
3,2021-10-25 03:00:00
4,2021-10-25 04:00:00


In [17]:
df['date_str'] = df['date'].dt.strftime('%Y-%m-%d')
df['date_str'].iloc[0]

'2021-10-25'

### 3. strptime：str 转 datetime

In [18]:
df['date_strp'] = df['date_str'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d'))
df['date_strp'].iloc[0]

Timestamp('2021-10-25 00:00:00')

### 4. pd.to_datetime

In [19]:
df['date_1'] = pd.to_datetime(df['date_str'])
df['date_1'].iloc[0]

Timestamp('2021-10-25 00:00:00')

In [20]:
# pd.to_pydatetime将Timestamp转为datetime类型
df['date_2'] = df['date_1'].apply(lambda x: x.to_pydatetime())
df['date_2'].iloc[0]

Timestamp('2021-10-25 00:00:00')

In [21]:
df['date_2'].iloc[0].to_pydatetime()

datetime.datetime(2021, 10, 25, 0, 0)

In [22]:
# 注意在dataframe里面，timestamp类型就用来表示datetime类型

In [23]:
# 这个例子可以说明
df['new'] = datetime(2023, 7, 13)
df['new'].iloc[0]

Timestamp('2023-07-13 00:00:00')

### 5. pd.Timestamp

In [24]:
time_stamp = pd.Timestamp('2021-10-25 19:33:55')
time_stamp

Timestamp('2021-10-25 19:33:55')

In [25]:
# 也可以根据时间直接创建
pd.Timestamp(2021, 10, 25, 19, 33, 55, 12)

Timestamp('2021-10-25 19:33:55.000012')

### 6. pd.Period

In [26]:
year_period = pd.Period('2023')
year_period, year_period.start_time, year_period.end_time

(Period('2023', 'A-DEC'),
 Timestamp('2023-01-01 00:00:00'),
 Timestamp('2023-12-31 23:59:59.999999999'))

In [27]:
month_period = pd.Period('2023-07')
month_period, month_period.start_time, month_period.end_time

(Period('2023-07', 'M'),
 Timestamp('2023-07-01 00:00:00'),
 Timestamp('2023-07-31 23:59:59.999999999'))

In [28]:
day_period = pd.Period('2023-07-01')
day_period, day_period.start_time, day_period.end_time

(Period('2023-07-01', 'D'),
 Timestamp('2023-07-01 00:00:00'),
 Timestamp('2023-07-01 23:59:59.999999999'))

### 7. datetimeindex

In [29]:
df = df.set_index('date')
df.head()

Unnamed: 0_level_0,date_str,date_strp,date_1,date_2,new
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-10-25 00:00:00,2021-10-25,2021-10-25,2021-10-25,2021-10-25,2023-07-13
2021-10-25 01:00:00,2021-10-25,2021-10-25,2021-10-25,2021-10-25,2023-07-13
2021-10-25 02:00:00,2021-10-25,2021-10-25,2021-10-25,2021-10-25,2023-07-13
2021-10-25 03:00:00,2021-10-25,2021-10-25,2021-10-25,2021-10-25,2023-07-13
2021-10-25 04:00:00,2021-10-25,2021-10-25,2021-10-25,2021-10-25,2023-07-13


In [30]:
# 在df的索引是datetime结构后，可以使用at_time来查找位于一天中特定时间的数据
df.at_time(time='9:00')

Unnamed: 0_level_0,date_str,date_strp,date_1,date_2,new
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-10-25 09:00:00,2021-10-25,2021-10-25,2021-10-25,2021-10-25,2023-07-13
2021-10-26 09:00:00,2021-10-26,2021-10-26,2021-10-26,2021-10-26,2023-07-13
2021-10-27 09:00:00,2021-10-27,2021-10-27,2021-10-27,2021-10-27,2023-07-13
2021-10-28 09:00:00,2021-10-28,2021-10-28,2021-10-28,2021-10-28,2023-07-13
2021-10-29 09:00:00,2021-10-29,2021-10-29,2021-10-29,2021-10-29,2023-07-13
...,...,...,...,...,...
2022-02-09 09:00:00,2022-02-09,2022-02-09,2022-02-09,2022-02-09,2023-07-13
2022-02-10 09:00:00,2022-02-10,2022-02-10,2022-02-10,2022-02-10,2023-07-13
2022-02-11 09:00:00,2022-02-11,2022-02-11,2022-02-11,2022-02-11,2023-07-13
2022-02-12 09:00:00,2022-02-12,2022-02-12,2022-02-12,2022-02-12,2023-07-13


### 8. resample

In [31]:
df['new'] = 1
df['year'] = df['date_1'].dt.year
df['year'] = df['date_1'].dt.year

In [32]:
# Downsample
df.resample('D').agg({'new': 'sum'})

Unnamed: 0_level_0,new
date,Unnamed: 1_level_1
2021-10-25,24
2021-10-26,24
2021-10-27,24
2021-10-28,24
2021-10-29,24
...,...
2022-02-10,24
2022-02-11,24
2022-02-12,24
2022-02-13,24


In [33]:
# Upsample
df.resample('30S').bfill()

Unnamed: 0_level_0,date_str,date_strp,date_1,date_2,new,year
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-10-25 00:00:00,2021-10-25,2021-10-25,2021-10-25,2021-10-25,1,2021
2021-10-25 00:00:30,2021-10-25,2021-10-25,2021-10-25,2021-10-25,1,2021
2021-10-25 00:01:00,2021-10-25,2021-10-25,2021-10-25,2021-10-25,1,2021
2021-10-25 00:01:30,2021-10-25,2021-10-25,2021-10-25,2021-10-25,1,2021
2021-10-25 00:02:00,2021-10-25,2021-10-25,2021-10-25,2021-10-25,1,2021
...,...,...,...,...,...,...
2022-02-13 23:58:00,2022-02-14,2022-02-14,2022-02-14,2022-02-14,1,2022
2022-02-13 23:58:30,2022-02-14,2022-02-14,2022-02-14,2022-02-14,1,2022
2022-02-13 23:59:00,2022-02-14,2022-02-14,2022-02-14,2022-02-14,1,2022
2022-02-13 23:59:30,2022-02-14,2022-02-14,2022-02-14,2022-02-14,1,2022


In [34]:
df.resample('D').max()

Unnamed: 0_level_0,date_str,date_strp,date_1,date_2,new,year
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-10-25,2021-10-25,2021-10-25,2021-10-25,2021-10-25,1,2021
2021-10-26,2021-10-26,2021-10-26,2021-10-26,2021-10-26,1,2021
2021-10-27,2021-10-27,2021-10-27,2021-10-27,2021-10-27,1,2021
2021-10-28,2021-10-28,2021-10-28,2021-10-28,2021-10-28,1,2021
2021-10-29,2021-10-29,2021-10-29,2021-10-29,2021-10-29,1,2021
...,...,...,...,...,...,...
2022-02-10,2022-02-10,2022-02-10,2022-02-10,2022-02-10,1,2022
2022-02-11,2022-02-11,2022-02-11,2022-02-11,2022-02-11,1,2022
2022-02-12,2022-02-12,2022-02-12,2022-02-12,2022-02-12,1,2022
2022-02-13,2022-02-13,2022-02-13,2022-02-13,2022-02-13,1,2022


In [35]:
index = pd.date_range('1/1/2000', periods=9, freq='T')
series = pd.Series(range(9), index=index)
series

2000-01-01 00:00:00    0
2000-01-01 00:01:00    1
2000-01-01 00:02:00    2
2000-01-01 00:03:00    3
2000-01-01 00:04:00    4
2000-01-01 00:05:00    5
2000-01-01 00:06:00    6
2000-01-01 00:07:00    7
2000-01-01 00:08:00    8
Freq: T, dtype: int64

In [36]:
series.resample('3T').sum()

2000-01-01 00:00:00     3
2000-01-01 00:03:00    12
2000-01-01 00:06:00    21
Freq: 3T, dtype: int64

In [37]:
series.resample('3T', label='right').sum()

2000-01-01 00:03:00     3
2000-01-01 00:06:00    12
2000-01-01 00:09:00    21
Freq: 3T, dtype: int64

In [38]:
import numpy as np
def custom_resampler(arraylike):
    return np.sum(arraylike) + 5

series.resample('3T').apply(custom_resampler)

2000-01-01 00:00:00     8
2000-01-01 00:03:00    17
2000-01-01 00:06:00    26
Freq: 3T, dtype: int64

In [39]:
series.resample('30S').asfreq()

2000-01-01 00:00:00    0.0
2000-01-01 00:00:30    NaN
2000-01-01 00:01:00    1.0
2000-01-01 00:01:30    NaN
2000-01-01 00:02:00    2.0
2000-01-01 00:02:30    NaN
2000-01-01 00:03:00    3.0
2000-01-01 00:03:30    NaN
2000-01-01 00:04:00    4.0
2000-01-01 00:04:30    NaN
2000-01-01 00:05:00    5.0
2000-01-01 00:05:30    NaN
2000-01-01 00:06:00    6.0
2000-01-01 00:06:30    NaN
2000-01-01 00:07:00    7.0
2000-01-01 00:07:30    NaN
2000-01-01 00:08:00    8.0
Freq: 30S, dtype: float64

In [41]:
# end of today