# Pandas Datetime

In [1]:
# For reference, please see https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html

import numpy as np
import pandas as pd
import datetime

## Basic Date Time Operations

In [2]:
# Parsing time series information from various sources and formats

date_var = pd.to_datetime(['10/15/2019', np.datetime64('2019-10-15'), datetime.datetime(2019, 10, 15)])
date_var

DatetimeIndex(['2019-10-15', '2019-10-15', '2019-10-15'], dtype='datetime64[ns]', freq=None)

In [3]:
# Generate sequences of fixed-frequency dates and time spans
# Frequency is set to daily

date_var = pd.date_range('2019-10-15', periods=5, freq='D')
date_var

DatetimeIndex(['2019-10-15', '2019-10-16', '2019-10-17', '2019-10-18',
               '2019-10-19'],
              dtype='datetime64[ns]', freq='D')

In [4]:
# Manipulating and converting date times with time zone information
# UTC means Universal Time Coordinated, world clock time

date_var = date_var.tz_localize('UTC')
date_var

DatetimeIndex(['2019-10-15 00:00:00+00:00', '2019-10-16 00:00:00+00:00',
               '2019-10-17 00:00:00+00:00', '2019-10-18 00:00:00+00:00',
               '2019-10-19 00:00:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='D')

In [5]:
# Convert to Eastern Time zone

date_var.tz_convert('US/Eastern')

DatetimeIndex(['2019-10-14 20:00:00-04:00', '2019-10-15 20:00:00-04:00',
               '2019-10-16 20:00:00-04:00', '2019-10-17 20:00:00-04:00',
               '2019-10-18 20:00:00-04:00'],
              dtype='datetime64[ns, US/Eastern]', freq='D')

In [6]:
# Resampling or converting a time series to a particular frequency

idx = pd.date_range('2019-10-01', periods=7, freq='D')  # can change to 'S'  for seconds or 'H' hourly
ts = pd.Series(range(len(idx)), index=idx)
ts

2019-10-01    0
2019-10-02    1
2019-10-03    2
2019-10-04    3
2019-10-05    4
2019-10-06    5
2019-10-07    6
Freq: D, dtype: int64

In [7]:
# Resampling or converting a time series to a particular frequency, another example

idx = pd.date_range('2019-10-01', periods=10, freq='M')
ts = pd.Series(range(len(idx)), index=idx)
ts

2019-10-31    0
2019-11-30    1
2019-12-31    2
2020-01-31    3
2020-02-29    4
2020-03-31    5
2020-04-30    6
2020-05-31    7
2020-06-30    8
2020-07-31    9
Freq: M, dtype: int64

## Working with Timestamped Data

In [8]:
# For pandas objects, time means using the points in time.

pd.Timestamp(datetime.datetime(2019, 7, 15))

Timestamp('2019-07-15 00:00:00')

In [9]:
# When you create a timestamp, notice that hours, minutes, and seconds are included, even when not specified.
pd.Timestamp('2007-07-07')

Timestamp('2007-07-07 00:00:00')

In [10]:
# Time in terms of a span or period.
# The span represented by Period can be specified explicitly, or inferred from datetime string format.
pd.Period('2019-10')

Period('2019-10', 'M')

In [11]:
pd.Period('2019-10', freq='D')

Period('2019-10-01', 'D')

In [13]:
# Create a list of timestamps

dates = [pd.Timestamp('2015-05-01'),
         pd.Timestamp('2015-05-02'),
         pd.Timestamp('2015-05-03')]
dates

[Timestamp('2015-05-01 00:00:00'),
 Timestamp('2015-05-02 00:00:00'),
 Timestamp('2015-05-03 00:00:00')]

In [14]:
# Create a pandas series that use the dates
# Random numbers from standard normal distribution

ts = pd.Series(np.random.randn(3), dates)
ts

2015-05-01    0.185729
2015-05-02    0.460114
2015-05-03    1.918470
dtype: float64

In [15]:
# Verify type of index

type(ts.index)

pandas.core.indexes.datetimes.DatetimeIndex

In [16]:
# Create periods that are monthly

periods = [pd.Period('2017-07'), pd.Period('2017-08'), pd.Period('2017-09')]
periods

[Period('2017-07', 'M'), Period('2017-08', 'M'), Period('2017-09', 'M')]

In [17]:
# Create a pandas series, using the periods as index variable

ts = pd.Series(np.random.randn(3), periods)
ts

2017-07   -0.449947
2017-08    0.036121
2017-09    0.509469
Freq: M, dtype: float64

In [18]:
# Create a date range with 10 periods
# Date is given as YYYYMMDD

dates = pd.date_range('20191001', periods=10)
dates

DatetimeIndex(['2019-10-01', '2019-10-02', '2019-10-03', '2019-10-04',
               '2019-10-05', '2019-10-06', '2019-10-07', '2019-10-08',
               '2019-10-09', '2019-10-10'],
              dtype='datetime64[ns]', freq='D')

## Converting to Datetime

In [19]:
# To convert a Series or list-like object of date-like objects e.g. strings, epochs, or a mixture, 
# you can use the to_datetime function.

pd.to_datetime(pd.Series(['Jul 09, 2019', '2019-07-10', None]))

0   2019-07-09
1   2019-07-10
2          NaT
dtype: datetime64[ns]

In [20]:
# Convert dates to datetime

pd.to_datetime(['2005/12/25', '2005.12.31'])

DatetimeIndex(['2005-12-25', '2005-12-31'], dtype='datetime64[ns]', freq=None)

In [21]:
# If you use dates which start with the day first (i.e. European style), you can pass the dayfirst flag

pd.to_datetime(['09-01-2019 10:00'], dayfirst=True)

DatetimeIndex(['2019-01-09 10:00:00'], dtype='datetime64[ns]', freq=None)

In [22]:
pd.to_datetime(['02-01-2017', '01-03-2018'], dayfirst=True)

DatetimeIndex(['2017-01-02', '2018-03-01'], dtype='datetime64[ns]', freq=None)

In [23]:
# you pass a single string to to_datetime, it returns a single Timestamp. Timestamp can also accept string input, 
# but it doesn’t accept string parsing options like dayfirst or format, so use to_datetime if these are required

pd.to_datetime('2019/10/01')

Timestamp('2019-10-01 00:00:00')

In [24]:
# You can also use the DatetimeIndex constructor directly

pd.DatetimeIndex(['2019-01-01', '2019-01-03', '2019-01-05'])

DatetimeIndex(['2019-01-01', '2019-01-03', '2019-01-05'], dtype='datetime64[ns]', freq=None)

In [25]:
# The string ‘infer’ can be passed in order to set the frequency of the index as the inferred frequency upon creation

pd.DatetimeIndex(['2014-01-17', '2014-01-18', '2014-01-19'], freq='infer')

DatetimeIndex(['2014-01-17', '2014-01-18', '2014-01-19'], dtype='datetime64[ns]', freq='D')

In [26]:
# Providing a format argument

pd.to_datetime('2019/10/11', format='%Y/%m/%d')

Timestamp('2019-10-11 00:00:00')

In [27]:
# You can also pass a DataFrame of integer or string columns to assemble into a Series of Timestamps

df = pd.DataFrame({'year': [2017, 2018, 2019],
                       'month': [3, 4, 5],
                       'day': [15, 16, 17],
                       'hour': [8, 9, 10]})

pd.to_datetime(df)
# Notice the year, month, day, hour 

0   2017-03-15 08:00:00
1   2018-04-16 09:00:00
2   2019-05-17 10:00:00
dtype: datetime64[ns]

In [28]:
# You can pass only the columns that you need 
pd.to_datetime(df[['year', 'month', 'day']])

0   2017-03-15
1   2018-04-16
2   2019-05-17
dtype: datetime64[ns]

In [29]:
# Pull out hour data only

df.hour

0     8
1     9
2    10
Name: hour, dtype: int64

In [30]:
# Pull out month data only

df.month

0    3
1    4
2    5
Name: month, dtype: int64