In [1]:
# 时间格式
## 数值型：e.g., 1612347227(时间戳), 
## 字符型：e.g, '2020-10-12', '2020-10-12 14:30:40'
## 对象型：datetime，date, time, pd.Timestamp

# 相关的包有：
## time
## datetime
## numpy包中和时间相关的数据类型：datetime64， timedelta64
## pandas包中时间相关的类：Timestamp，Period，Timedelta，DatetimeIndex，PeriodIndex，TimedeltaIndex

# 读取时间列
## pd.read_csv(): 涉及参数parse_dates, keep_date_col, date_parser

In [2]:
# 时间戳，时间元组，字符串，datetime类之间转换关系（涉及time和datetime包）
#                                                |--------------------------------------------------------------->|
#                                                |<-----------------------------obj.timetuple()-------------------|

#      -- time.localtime(), time.gmtime() -->         -- time.strftime() -->        -- datetime.strptime()--> 
# 时间戳                                       时间元组                         字符串                           datetime类（date，time类）
#      <------------ time.mktime()-----------         <--  time.strptime()--        <-- obj.strftime()------- 
#   |                                            |                              |                                 |
#   |<--------------------------------------------------------------------------|                                 |  
#   |----------------- time.ctime() 字符串格式限制------------------------------->|                                 |
#   |                                            |                              |                                 |
#   |<-------------------------------------------------------------time.mktime()----------------------------------|
#   |--------------------------------------------------------------datetime.fromtimestamp()---------------------->|
       

In [3]:
# pd.Timestamp, np.datetime64, datetime.datetime之间相互转换

# 时间戳     --- pd.to_datetime(), pd.Timestamp() --->            --- time.mktime(obj.timetuple())--> 时间戳

# 字符串     --- pd.to_datetime(), pd.Timestamp() --->            --- obj.strftime() ---------------> 字符串
 
# datetime   --- pd.to_datetime(), pd.Timestamp() ---> Timestamp --- obj.to_pydatetime()-----------> datetime

# datetime64 --- pd.to_datetime(), pd.Timestamp() --->           --- obj.to_datetime64()-----------> datetime64

### time包

In [4]:
import time

#### 时间戳

In [5]:
time.time() # UNIX Timestamp(时间戳)，秒为单位的浮点小数，1970/01/01至今的总秒数

1612505673.502017

#### 时间元组 struct_time

In [6]:
m = time.localtime(1612347227.356289) # 获取当地时间的时间元祖
# time.gmtime(1612347227.35628) # 获取格林威治天文时间下的时间元祖
m # tm_wday：一周的第几天，0是周一，6是周日；tm_isdst:是否是夏令时，取值-1，0，1

time.struct_time(tm_year=2021, tm_mon=2, tm_mday=3, tm_hour=18, tm_min=13, tm_sec=47, tm_wday=2, tm_yday=34, tm_isdst=0)

In [7]:
m.tm_year

2021

#### 字符串，时间戳，时间元组之间的转换关系

In [8]:
# 时间戳 -> 时间元祖
time.localtime(1612347227.356289)
time.gmtime(1612347227.356289)

time.struct_time(tm_year=2021, tm_mon=2, tm_mday=3, tm_hour=10, tm_min=13, tm_sec=47, tm_wday=2, tm_yday=34, tm_isdst=0)

In [9]:
# 时间元组 -> 时间戳
time.mktime(m)

1612347227.0

In [10]:
# 时间元组 -> 字符串
print(time.asctime(m))
print(time.strftime('%Y-%m-%d %H:%M', m))

Wed Feb  3 18:13:47 2021
2021-02-03 18:13


In [11]:
# 时间戳 -> 字符串
print(time.ctime(1612347227.0))

Wed Feb  3 18:13:47 2021


In [12]:
# 字符串 -> 时间元组
time.strptime('2021-02-03 18:13:47', '%Y-%m-%d %H:%M:%S')

time.struct_time(tm_year=2021, tm_mon=2, tm_mday=3, tm_hour=18, tm_min=13, tm_sec=47, tm_wday=2, tm_yday=34, tm_isdst=-1)

In [13]:
# 字符串直接转换为时间戳的函数没有，可以先用strptime把字符串转换为时间元组
# ，再用mktime把时间元祖转换为时间戳

### --------------------------------Session End------------------------------------------

### datetime包

In [14]:
import datetime

In [15]:
# datetime包里常用的类有：
#  date: 日期对象，常用属性有year,month,day
#  time: 时间对象,常用属性有hour,minute,second,microsecond
#  dateime: 日期时间对象,可以看成date类和time类的合体
#  timedelta: 计算两个datetime对象之间的差值
#  tzinfo: 时区信息
# datetime中有的常量：
#  MAXYEAR: 9999
#  MINYEAR: 1

#### date类

In [16]:
d = datetime.date.today()
d

datetime.date(2021, 2, 5)

In [17]:
print(d.year, d.month, d.day)

2021 2 5


In [18]:
d.timetuple() # 兼容time包中的struct_time，但时间相关的元素为0

time.struct_time(tm_year=2021, tm_mon=2, tm_mday=5, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=4, tm_yday=36, tm_isdst=-1)

In [19]:
# 时间戳 -> date
datetime.date.fromtimestamp(1612347227.0)

datetime.date(2021, 2, 3)

In [20]:
# date -> 字符串
print(d.strftime('%Y/%m/%d'))
print(d.ctime()) # ctime格式

2021/02/05
Fri Feb  5 00:00:00 2021


#### time类

In [21]:
t = datetime.time(12, 20, 49, 899)
t

datetime.time(12, 20, 49, 899)

In [22]:
print(t.hour, t.minute, t.second, t.microsecond)

12 20 49 899


In [23]:
# time类 -> 字符串
t.strftime('%H:%M:%S')

'12:20:49'

#### datetime类

In [24]:
dt = datetime.datetime.now()
dt

datetime.datetime(2021, 2, 5, 14, 14, 33, 721102)

In [25]:
d = dt.date() # 获取日期部分
d

datetime.date(2021, 2, 5)

In [26]:
t = dt.time() # 获取时间部分
t

datetime.time(14, 14, 33, 721102)

In [27]:
print(dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second, dt.microsecond)

2021 2 5 14 14 33 721102


In [28]:
dt.timetuple()

time.struct_time(tm_year=2021, tm_mon=2, tm_mday=5, tm_hour=14, tm_min=14, tm_sec=33, tm_wday=4, tm_yday=36, tm_isdst=-1)

In [29]:
datetime.datetime.combine(d, t) # 日期部分+时间部分

datetime.datetime(2021, 2, 5, 14, 14, 33, 721102)

In [30]:
# datetime ->  字符串
dt.strftime('%Y/%m/%d %H:%M')

'2021/02/05 14:14'

In [31]:
# 字符串 -> datetime
datetime.datetime.strptime('2021/02/03 12:00:00', '%Y/%m/%d %H:%M:%S')

datetime.datetime(2021, 2, 3, 12, 0)

In [32]:
# 时间戳 -> datetime
datetime.datetime.fromtimestamp(time.time())

datetime.datetime(2021, 2, 5, 14, 14, 33, 821928)

#### timedelta类

In [33]:
dt1 = datetime.datetime.now()
dt1

datetime.datetime(2021, 2, 5, 14, 14, 33, 831661)

In [34]:
dt1 + datetime.timedelta(days=1)

datetime.datetime(2021, 2, 6, 14, 14, 33, 831661)

In [35]:
dt1 + datetime.timedelta(days=-4)

datetime.datetime(2021, 2, 1, 14, 14, 33, 831661)

In [36]:
dt1 + datetime.timedelta(hours=1)

datetime.datetime(2021, 2, 5, 15, 14, 33, 831661)

In [37]:
dt1 - datetime.timedelta(minutes=5)

datetime.datetime(2021, 2, 5, 14, 9, 33, 831661)

In [38]:
dt1 + datetime.timedelta(seconds=1)

datetime.datetime(2021, 2, 5, 14, 14, 34, 831661)

In [39]:
dt1 - datetime.timedelta(microseconds=1)

datetime.datetime(2021, 2, 5, 14, 14, 33, 831660)

### --------------------------------Session End------------------------------------------

### numpy包

In [40]:
import numpy as np

In [41]:
np.datetime64('2021') + np.timedelta64(20, 'D')

numpy.datetime64('2021-01-21')

In [42]:
np.datetime64('2021-01-31') - np.timedelta64(1, 'W')

numpy.datetime64('2021-01-24')

In [43]:
t1 = np.datetime64(10, 'ns')
t1

numpy.datetime64('1970-01-01T00:00:00.000000010')

In [44]:
t2 = np.datetime64(10, 's')
t2

numpy.datetime64('1970-01-01T00:00:10')

In [45]:
t2 - t1  # 1秒=10^6微秒 ， 1秒=10^9纳秒

numpy.timedelta64(9999999990,'ns')

### pandas包

In [46]:
import pandas as pd

In [47]:
# pd.Timestamp类：包含year,month,day,hour,minute,second,week,quarter,weekofyear...
# pd.Period类：
# 创建时间序列：pd.DatetimeIndex(), pd.PeriodIndex(), pd.Index(),pd.to_datetime(), pd.date_range()
# 时间差：pd.Timedelta
# pandas.Series.dt（类似pandas.Series.str可以用来调用对字符串访问的函数， pandas.Series.dt可以用来调用对datetime访问的函数）

In [48]:
# Timestamp
# 字符串 -> Timestamp
m = pd.Timestamp('2021-02-01 12:00:00') 
m

Timestamp('2021-02-01 12:00:00')

In [49]:
print(m.year, m.month, m.day, m.hour, m.minute, m.second, m.week, m.quarter, m.weekofyear
     ,m.dayofyear, m.dayofweek, m.is_leap_year)
print(m.weekday(), m.day_name(), m.date(), m.time())

2021 2 1 12 0 0 5 1 5 32 0 False
0 Monday 2021-02-01 12:00:00


In [50]:
# Timestamp -> datetime
m.to_pydatetime()

datetime.datetime(2021, 2, 1, 12, 0)

In [51]:
# datetime -> Timestamp
pd.Timestamp(datetime.datetime(2021, 2, 3, 10, 30, 59))
#pd.to_datetime(datetime.datetime(2021, 2, 3, 10, 30, 59))
#pd.DatetimeIndex([datetime.datetime(2021, 2, 3, 10, 30, 59)])[0]

Timestamp('2021-02-03 10:30:59')

In [52]:
# long -> Timestamp
pd.Timestamp(1612425823.547255) #默认是nanoseconds

Timestamp('1970-01-01 00:00:01.612425823')

In [53]:
# 时间戳 -> Timestamp
pd.Timestamp(1612425823.547255, unit='s') #时间戳用的是秒

Timestamp('2021-02-04 08:03:43.547255039')

In [54]:
print(pd.Timestamp.min)
print(pd.Timestamp.max)

1677-09-21 00:12:43.145225
2262-04-11 23:47:16.854775807


In [55]:
# Period
pd.Period('2021-02') # 默认是月

Period('2021-02', 'M')

In [56]:
print(pd.Period('2021-02', freq='S')) # S代表秒
print(pd.Period('2021-02', freq='T')) # T代表分钟
print(pd.Period('2021-02', freq='H')) # H代表小时
print(pd.Period('2021-02', freq='D')) # T代表天
print(pd.Period('2021-02', freq='M')) # M代表月
print(pd.Period('2021-02', freq='Y')) # Y代表年

2021-02-01 00:00:00
2021-02-01 00:00
2021-02-01 00:00
2021-02-01
2021-02
2021


In [57]:
# to_datetime: 既可以生成单个的Timestamp，也可以生成索引DatetimeIndex
#    可以接受int, float, string, datetime, list, tuple, 1-d array, Series, DataFrame, dict-like
pd.to_datetime(datetime.datetime(2021, 2, 2, 10, 30, 59))
# pd.to_datetime(1612425823.547255, unit='s') 

Timestamp('2021-02-02 10:30:59')

In [58]:
pd.to_datetime('2021-01-10')

Timestamp('2021-01-10 00:00:00')

In [59]:
pd.to_datetime(['2021-01-01', '2021-01-02', '2021-01-03'])

DatetimeIndex(['2021-01-01', '2021-01-02', '2021-01-03'], dtype='datetime64[ns]', freq=None)

In [60]:
# DatetimeIndex列表转换为datetime
pd.to_datetime(['2021-01-01', '2021-01-02', '2021-01-03']).to_pydatetime()

array([datetime.datetime(2021, 1, 1, 0, 0),
       datetime.datetime(2021, 1, 2, 0, 0),
       datetime.datetime(2021, 1, 3, 0, 0)], dtype=object)

In [61]:
# 生成DatetimeIndex方法：pd.to_datetime, pd.DatetimeIndex, pd.Index, pd.date_range

In [62]:
pd.to_datetime([0,1,2], unit='D', origin=pd.Timestamp('2021-01-01'))

DatetimeIndex(['2021-01-01', '2021-01-02', '2021-01-03'], dtype='datetime64[ns]', freq=None)

In [63]:
dtest = [datetime.datetime(2021,2,1), datetime.datetime(2021,2,2), datetime.datetime(2021,2,3)]
pd.DatetimeIndex(dtest)

DatetimeIndex(['2021-02-01', '2021-02-02', '2021-02-03'], dtype='datetime64[ns]', freq=None)

In [64]:
pd.Index(dtest)
# pd.PeriodIndex(dtest, freq='D') # 生成PeriodIndex
# PeriodIndex(['2021-02-01', '2021-02-02', '2021-02-03'], dtype='period[D]', freq='D')

DatetimeIndex(['2021-02-01', '2021-02-02', '2021-02-03'], dtype='datetime64[ns]', freq=None)

In [65]:
pd.date_range('2020-01-01', '2020-01-04')
#pd.date_range(datetime.datetime(2020,1,1), datetime.datetime(2020,1,4))

DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04'], dtype='datetime64[ns]', freq='D')

In [66]:
pd.date_range('2020-01-01', periods=5, freq='T') # 每分钟

DatetimeIndex(['2020-01-01 00:00:00', '2020-01-01 00:01:00',
               '2020-01-01 00:02:00', '2020-01-01 00:03:00',
               '2020-01-01 00:04:00'],
              dtype='datetime64[ns]', freq='T')

In [67]:
pd.date_range('2021-01-01', '2021-01-02', freq='H') # 每小时

DatetimeIndex(['2021-01-01 00:00:00', '2021-01-01 01:00:00',
               '2021-01-01 02:00:00', '2021-01-01 03:00:00',
               '2021-01-01 04:00:00', '2021-01-01 05:00:00',
               '2021-01-01 06:00:00', '2021-01-01 07:00:00',
               '2021-01-01 08:00:00', '2021-01-01 09:00:00',
               '2021-01-01 10:00:00', '2021-01-01 11:00:00',
               '2021-01-01 12:00:00', '2021-01-01 13:00:00',
               '2021-01-01 14:00:00', '2021-01-01 15:00:00',
               '2021-01-01 16:00:00', '2021-01-01 17:00:00',
               '2021-01-01 18:00:00', '2021-01-01 19:00:00',
               '2021-01-01 20:00:00', '2021-01-01 21:00:00',
               '2021-01-01 22:00:00', '2021-01-01 23:00:00',
               '2021-01-02 00:00:00'],
              dtype='datetime64[ns]', freq='H')

In [68]:
pd.date_range('2021-01-01', '2021-03-01', freq='W') # 每个周日

DatetimeIndex(['2021-01-03', '2021-01-10', '2021-01-17', '2021-01-24',
               '2021-01-31', '2021-02-07', '2021-02-14', '2021-02-21',
               '2021-02-28'],
              dtype='datetime64[ns]', freq='W-SUN')

In [69]:
pd.date_range('2020-01-01', '2021-01-01', freq='M') # 每个月末

DatetimeIndex(['2020-01-31', '2020-02-29', '2020-03-31', '2020-04-30',
               '2020-05-31', '2020-06-30', '2020-07-31', '2020-08-31',
               '2020-09-30', '2020-10-31', '2020-11-30', '2020-12-31'],
              dtype='datetime64[ns]', freq='M')

In [70]:
pd.date_range('2020-02-01', '2021-01-01', freq='3M') # 每3个月末

DatetimeIndex(['2020-02-29', '2020-05-31', '2020-08-31', '2020-11-30'], dtype='datetime64[ns]', freq='3M')

In [71]:
t = pd.date_range('2020-01-01', '2020-01-04')
t

DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04'], dtype='datetime64[ns]', freq='D')

In [72]:
t[0]

Timestamp('2020-01-01 00:00:00', freq='D')

In [73]:
t[0].to_period('M')

Period('2020-01', 'M')

In [74]:
t[0]==pd.Timestamp('2020-01-01')

True

In [75]:
# Timestamp -> np.datetime64
t[0].to_datetime64()

numpy.datetime64('2020-01-01T00:00:00.000000000')

In [76]:
k = np.datetime64('2020-01-10')
k

numpy.datetime64('2020-01-10')

In [77]:
# datetime64 转 Timestamp
pd.to_datetime(k)
# pd.Timestamp(k)

Timestamp('2020-01-10 00:00:00')

In [78]:
# datetime64 转 datetime
k.item()
#k.tolist()

datetime.date(2020, 1, 10)

In [79]:
# pd.Timedelta
datetime.datetime(2021,2,2) + pd.Timedelta(days=1)

datetime.datetime(2021, 2, 3, 0, 0)

In [80]:
pd.Timestamp('2021-01-01 10:00:00') + pd.Timedelta(minutes=19)

Timestamp('2021-01-01 10:19:00')

In [81]:
m = pd.Timestamp('2021-01-03 10:00:30') - pd.Timestamp('2021-01-01 23:00:00')
m

Timedelta('1 days 11:00:30')

In [82]:
print(m.days)
print(m.seconds) # 11*60*60 + 30 = 39630
print(m.microseconds)

1
39630
0


### --------------------------------Session End------------------------------------------

In [83]:
from io import StringIO

In [84]:
foo = (
    'date,A,B,C\n'
    '2009-01-01,a,1,2\n'
    '2009-01-02,b,3,4\n'
    '2009-01-03,c,4,5\n'
)
m = pd.read_csv(StringIO(foo)
                , index_col=0
                , parse_dates=['date'] #或者parse_dates=[0]
                #,parse_dates=True # 建议写上面这种指定的，否则会对所有可能的列转换
               )

In [85]:
m

Unnamed: 0_level_0,A,B,C
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2009-01-01,a,1,2
2009-01-02,b,3,4
2009-01-03,c,4,5


In [86]:
m.index

DatetimeIndex(['2009-01-01', '2009-01-02', '2009-01-03'], dtype='datetime64[ns]', name='date', freq=None)

In [87]:
foo2 = (
    '20200110, 18:00:00, 1000\n'
    '20200110, 19:00:00, 2000\n'
    '20200110, 20:00:00, 1500\n'
    )
m2 = pd.read_csv(StringIO(foo2)
                 , header=None  # 无行头
                 , names=['Day','Hour', 'Amount'] # 指定行名
                 # 解析时间列，需要结合时间列，要用[[c1,c2]],不能使用[c1,c2]
                 , parse_dates=[[0,1]] 
                 , keep_date_col=True # 保留解析的时间列，默认是False会删除
                )

In [88]:
m2

Unnamed: 0,Day_Hour,Day,Hour,Amount
0,2020-01-10 18:00:00,20200110,18:00:00,1000
1,2020-01-10 19:00:00,20200110,19:00:00,2000
2,2020-01-10 20:00:00,20200110,20:00:00,1500


In [89]:
type(m2.Day_Hour)

pandas.core.series.Series

In [90]:
type(m2.Day_Hour[0])

pandas._libs.tslibs.timestamps.Timestamp

In [91]:
m2.Day_Hour.dt.date # pandas.Series.dt 可以获取日期/时间类型的相关信息

0    2020-01-10
1    2020-01-10
2    2020-01-10
Name: Day_Hour, dtype: object

In [92]:
m2.Day_Hour.dt.year # dt.month, dt.quarter, dt.time, dt.hour, dt.second, dt.day_name(), dt.to_period('M')...

0    2020
1    2020
2    2020
Name: Day_Hour, dtype: int64

In [93]:
foo3 = (
    'date,A,B,C\n'
    '2009-01-01 10:00:00,a,1,2\n'
    '2009-02-01 10:00:00,b,3,4\n'
    '2009-03-01 10:00:00,c,4,5\n'
)
m3 = pd.read_csv(StringIO(foo3)
                 ,parse_dates=['date']
                 # 如果日期很复杂，可以自己写函数进行解析
                 ,date_parser=lambda c:pd.to_datetime(c).date()
)

In [94]:
m3

Unnamed: 0,date,A,B,C
0,2009-01-01,a,1,2
1,2009-02-01,b,3,4
2,2009-03-01,c,4,5


In [95]:
type(m3.date)

pandas.core.series.Series

In [96]:
type(m3.date[0])

pandas._libs.tslibs.timestamps.Timestamp

In [97]:
m3.date.dt.is_month_start

0    True
1    True
2    True
Name: date, dtype: bool