In [1]:
import pandas as pd
import numpy as np

In [2]:
pd.Timestamp(year=2020, month=11, day=17, hour=13, minute=15, second=56)

Timestamp('2020-11-17 13:15:56')

In [3]:
pd.Timestamp('2020-11-14')

Timestamp('2020-11-14 00:00:00')

In [4]:
pd.Timestamp('2020/11/12')

Timestamp('2020-11-12 00:00:00')

In [5]:
pd.Timestamp('Nov 12, 2018')

Timestamp('2018-11-12 00:00:00')

In [6]:
pd.Timestamp(500) # count by nano-second from 1970.

Timestamp('1970-01-01 00:00:00.000000500')

In [7]:
pd.Timestamp(500, unit='D') # count by day from 1970.

Timestamp('1971-05-16 00:00:00')

In [9]:
pd.Timestamp(50, unit='Y') # count by year from 1970.

Timestamp('2020-01-01 03:00:00')

In [10]:
pd.to_datetime('2020-11-12')

Timestamp('2020-11-12 00:00:00')

In [11]:
# Timestamp: LIST can not be inputted
# to_datetime: LIST can be inputted! WOW!
s = pd.Series([10, 100, 1000])
pd.to_datetime(s, unit='D')

0   1970-01-11
1   1970-04-11
2   1972-09-27
dtype: datetime64[ns]

In [12]:
s = pd.Series(['2020-11-13', '11-09-2020', '2020/04/13', 'Jan 13, 2020'])
pd.to_datetime(s)

0   2020-11-13
1   2020-11-09
2   2020-04-13
3   2020-01-13
dtype: datetime64[ns]

In [13]:
# How to preven error to happen?
s = pd.Series(['2020-11-9999', '11-09-2020', '2020/04/13', 'Jan 13, 2020'])
# when you ignore it, change into datetime is not applied at all.
pd.to_datetime(s, errors='ignore')

0    2020-11-9999
1      11-09-2020
2      2020/04/13
3    Jan 13, 2020
dtype: object

In [14]:
# But, when you coerce it, only valid items are applied!
pd.to_datetime(s, errors='coerce')

0          NaT
1   2020-11-09
2   2020-04-13
3   2020-01-13
dtype: datetime64[ns]

In [15]:
d = 'StartDate: M11 D09, Y2020 StartTime: 09:15AM'
pd.to_datetime(d, errors='coerce') # Can't read this...

NaT

In [16]:
# https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior
formatted = 'StartDate: M%m D%d, Y%Y StartTime: %I:%M%p'
pd.to_datetime(d, format=formatted, errors='coerce') # wow!

Timestamp('2020-11-09 09:15:00')

In [17]:
d = 'Start Date: Dec 25, 2020 Start Time: 9:00 am'
formatted = 'Start Date: %b %d, %Y Start Time: %I:%M %p'
pd.to_datetime(d, format=formatted, errors='coerce')

Timestamp('2020-12-25 09:00:00')

In [18]:
df = pd.read_csv('data/sample_df.csv')
df.head()

Unnamed: 0,Name,Birth,email
0,Captain,2019-01-01 9:10,happy@gmail.com
1,Hulk,2019-01-08 9:20,1004@NAVER.COM
2,Iron,2019-02-01 10:20,Iron at yahoo.co.kr
3,Widow,2019-02-02 11:40,\tWidow@gmail.com
4,Thor,2019-02-28 15:10,thor@daum.net\t


In [19]:
df.info() # birth column's dtype is object....hmm....

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    10 non-null     object
 1   Birth   10 non-null     object
 2   email   10 non-null     object
dtypes: object(3)
memory usage: 368.0+ bytes


In [20]:
birth = df['Birth']
birth

0     2019-01-01 9:10
1     2019-01-08 9:20
2    2019-02-01 10:20
3    2019-02-02 11:40
4    2019-02-28 15:10
5    2019-04-10 19:20
6    2019-06-30 21:20
7    2019-07-20 23:30
8    2019-08-28 11:48
9     2019-09-01 3:12
Name: Birth, dtype: object

In [21]:
birth = pd.to_datetime(birth, format='%Y-%m-%d %H:%M')
birth

0   2019-01-01 09:10:00
1   2019-01-08 09:20:00
2   2019-02-01 10:20:00
3   2019-02-02 11:40:00
4   2019-02-28 15:10:00
5   2019-04-10 19:20:00
6   2019-06-30 21:20:00
7   2019-07-20 23:30:00
8   2019-08-28 11:48:00
9   2019-09-01 03:12:00
Name: Birth, dtype: datetime64[ns]

## datetime Methods

In [22]:
# date: YYYY-MM-DD (object)

In [23]:
birth.dt.date

0    2019-01-01
1    2019-01-08
2    2019-02-01
3    2019-02-02
4    2019-02-28
5    2019-04-10
6    2019-06-30
7    2019-07-20
8    2019-08-28
9    2019-09-01
Name: Birth, dtype: object

In [24]:
# year (integer)
birth.dt.year

0    2019
1    2019
2    2019
3    2019
4    2019
5    2019
6    2019
7    2019
8    2019
9    2019
Name: Birth, dtype: int64

In [25]:
# month (integer)
birth.dt.month

0    1
1    1
2    2
3    2
4    2
5    4
6    6
7    7
8    8
9    9
Name: Birth, dtype: int64

In [26]:
# day (integer)
birth.dt.day

0     1
1     8
2     1
3     2
4    28
5    10
6    30
7    20
8    28
9     1
Name: Birth, dtype: int64

In [27]:
# time (HH:MM:SS) (object)
birth.dt.time

0    09:10:00
1    09:20:00
2    10:20:00
3    11:40:00
4    15:10:00
5    19:20:00
6    21:20:00
7    23:30:00
8    11:48:00
9    03:12:00
Name: Birth, dtype: object

In [28]:
# hour (integer)
birth.dt.hour

0     9
1     9
2    10
3    11
4    15
5    19
6    21
7    23
8    11
9     3
Name: Birth, dtype: int64

In [29]:
# minute (integer)
birth.dt.minute

0    10
1    20
2    20
3    40
4    10
5    20
6    20
7    30
8    48
9    12
Name: Birth, dtype: int64

In [30]:
# second (integer)
birth.dt.second

0    0
1    0
2    0
3    0
4    0
5    0
6    0
7    0
8    0
9    0
Name: Birth, dtype: int64

In [31]:
# quarter (integer)
birth.dt.quarter

0    1
1    1
2    1
3    1
4    1
5    2
6    2
7    3
8    3
9    3
Name: Birth, dtype: int64

In [32]:
# day of week (object)
birth.dt.day_name()

0      Tuesday
1      Tuesday
2       Friday
3     Saturday
4     Thursday
5    Wednesday
6       Sunday
7     Saturday
8    Wednesday
9       Sunday
Name: Birth, dtype: object

In [33]:
# day of week (Dutch) (object)
birth.dt.day_name('Dutch')

0      Dinsdag
1      Dinsdag
2      Vrijdag
3     Zaterdag
4    Donderdag
5     Woensdag
6       Zondag
7     Zaterdag
8     Woensdag
9       Zondag
Name: Birth, dtype: object

In [34]:
# day of week (in number) (integer)
birth.dt.weekday
# 0: Mon ~ 6: Sun

0    1
1    1
2    4
3    5
4    3
5    2
6    6
7    5
8    2
9    6
Name: Birth, dtype: int64

In [35]:
# Whether or not it's weekend
birth.dt.weekday.isin([5,6])
# or birth.dt.weekday >= 5

0    False
1    False
2    False
3     True
4    False
5    False
6     True
7     True
8    False
9     True
Name: Birth, dtype: bool

In [36]:
# week of year (integer)
birth.dt.weekofyear

0     1
1     2
2     5
3     5
4     9
5    15
6    26
7    29
8    35
9    35
Name: Birth, dtype: int64

In [37]:
# day of year (integer)
birth.dt.dayofyear

0      1
1      8
2     32
3     33
4     59
5    100
6    181
7    201
8    240
9    244
Name: Birth, dtype: int64

In [38]:
# days in month (how many dates each month has) (integer)
birth.dt.days_in_month

0    31
1    31
2    28
3    28
4    28
5    30
6    30
7    31
8    31
9    30
Name: Birth, dtype: int64

In [39]:
# is leap year (윤년)
birth.dt.is_leap_year

0    False
1    False
2    False
3    False
4    False
5    False
6    False
7    False
8    False
9    False
Name: Birth, dtype: bool

In [40]:
# is month start (1st day of the month?)
birth.dt.is_month_start

0     True
1    False
2     True
3    False
4    False
5    False
6    False
7    False
8    False
9     True
Name: Birth, dtype: bool

In [41]:
# same as above!
birth.dt.day == 1

0     True
1    False
2     True
3    False
4    False
5    False
6    False
7    False
8    False
9     True
Name: Birth, dtype: bool

In [42]:
# is month end (last day of the month?)
birth.dt.is_month_end

0    False
1    False
2    False
3    False
4     True
5    False
6     True
7    False
8    False
9    False
Name: Birth, dtype: bool

In [43]:
# is quarter start
birth.dt.is_quarter_start

0     True
1    False
2    False
3    False
4    False
5    False
6    False
7    False
8    False
9    False
Name: Birth, dtype: bool

In [45]:
# is quarter end
birth.dt.is_quarter_end

0    False
1    False
2    False
3    False
4    False
5    False
6     True
7    False
8    False
9    False
Name: Birth, dtype: bool

In [46]:
# Time Difference
eday = pd.to_datetime('2020-12-11') - birth
eday

0   709 days 14:50:00
1   702 days 14:40:00
2   678 days 13:40:00
3   677 days 12:20:00
4   651 days 08:50:00
5   610 days 04:40:00
6   529 days 02:40:00
7   509 days 00:30:00
8   470 days 12:12:00
9   466 days 20:48:00
Name: Birth, dtype: timedelta64[ns]

In [49]:
# Change into the unit of day
eday.astype('timedelta64[D]').astype('int')

0    709
1    702
2    678
3    677
4    651
5    610
6    529
7    509
8    470
9    466
Name: Birth, dtype: int32

In [50]:
# Change into the unit of year
eday.astype('timedelta64[Y]').astype('int')

0    1
1    1
2    1
3    1
4    1
5    1
6    1
7    1
8    1
9    1
Name: Birth, dtype: int32

In [51]:
# add to timedate
birth + 100 # this will make an error!

TypeError: Addition/subtraction of integers and integer-arrays with DatetimeArray is no longer supported.  Instead of adding/subtracting `n`, use `n * obj.freq`

In [52]:
# add 100 days
birth + pd.to_timedelta(100, unit='D')

0   2019-04-11 09:10:00
1   2019-04-18 09:20:00
2   2019-05-12 10:20:00
3   2019-05-13 11:40:00
4   2019-06-08 15:10:00
5   2019-07-19 19:20:00
6   2019-10-08 21:20:00
7   2019-10-28 23:30:00
8   2019-12-06 11:48:00
9   2019-12-10 03:12:00
Name: Birth, dtype: datetime64[ns]

In [53]:
# add 100 mins
birth + pd.to_timedelta(100, unit='m')

0   2019-01-01 10:50:00
1   2019-01-08 11:00:00
2   2019-02-01 12:00:00
3   2019-02-02 13:20:00
4   2019-02-28 16:50:00
5   2019-04-10 21:00:00
6   2019-06-30 23:00:00
7   2019-07-21 01:10:00
8   2019-08-28 13:28:00
9   2019-09-01 04:52:00
Name: Birth, dtype: datetime64[ns]

In [54]:
# add 100 seconds
birth + pd.to_timedelta(100, unit='s')

0   2019-01-01 09:11:40
1   2019-01-08 09:21:40
2   2019-02-01 10:21:40
3   2019-02-02 11:41:40
4   2019-02-28 15:11:40
5   2019-04-10 19:21:40
6   2019-06-30 21:21:40
7   2019-07-20 23:31:40
8   2019-08-28 11:49:40
9   2019-09-01 03:13:40
Name: Birth, dtype: datetime64[ns]