In [2]:
import pandas as pd
import numpy as np

In [None]:
# Pandas has four main time-related classes:
# Timestamps, DatatimeIndex, Period and PeriodIndex

### Timestamp

In [3]:
# Timestamps represent a single timestamp and associates
# values with points in time

# Let's create a timestamp using the string 9/1/2019 10:05AM
pd.Timestamp('9/1/2019 10:05AM') 

Timestamp('2019-09-01 10:05:00')

In [4]:
# We can also create it by passing the year, month, day, etc
# separately 
pd.Timestamp(2019, 12, 20, 0, 0)

Timestamp('2019-12-20 00:00:00')

In [5]:
# An useful attribute of timestamp is isoweekday(), which
# shows the weekday of the timestamp, where 1 is monday and
# 7 sunday
pd.Timestamp(2013, 12, 20, 0, 0).isoweekday()

5

In [9]:
# The year, month, day, etc can be extracted in a similar
# fashion as well
pd.Timestamp(2019, 12, 20, 5, 2, 23).second

23

### Periods

In [10]:
# Periods reprent a single span of time, such as a specific
# day or month

# Let's create a period for January 2016, the granularity
# of the period will be 'M' for month since this is the finest
# grain provided
pd.Period('1/2016') 

Period('2016-01', 'M')

In [12]:
# Let's create a period for March 5th, 2016
pd.Period('3/5/2016')

Period('2016-03-05', 'D')

In [13]:
# These periods represent the full timespan specified

# Thanks to this, doing arithmetic with periods is easy
# e.g Find out 5 months after january 2016 
pd.Period('1/2019') + 5 

Period('2019-06', 'M')

In [14]:
# Two days before March 5th, 2016
pd.Period('3/5/2016') - 2

Period('2016-03-03', 'D')

### DatetimeIndex and PeriodIndex

In [15]:
# The index of a timestamp is DatertimeIndex
# Let's an example Series t1
t1 = pd.Series(list('abc'), [pd.Timestamp('2016-09-01'), pd.Timestamp('2016-09-02'),
pd.Timestamp('2016-09-03')])
t1

2016-09-01    a
2016-09-02    b
2016-09-03    c
dtype: object

In [16]:
# Let's look at the type of the series index
type(t1.index)

pandas.core.indexes.datetimes.DatetimeIndex

In [20]:
# Similarly to this, we can create a period-based
# index
t2 = pd.Series(list('def'), [pd.Period('2016-09'), pd.Period('2016-10'),
pd.Period('2016-11')]) 
t2

2016-09    d
2016-10    e
2016-11    f
Freq: M, dtype: object

In [21]:
type(t2.index)

pandas.core.indexes.period.PeriodIndex

### Converting to Datetime

In [22]:
# Suppose we have a list of dates as strings and we want
# to create a new dataframe
d1 = ['2 June 2013', 'Aug 29, 2014', '2015-06-26', '7/12/16']

# Let's also create some random data
ts3 = pd.DataFrame(np.random.randint(10, 100, (4,2)), index = d1,
columns = list('ab'))

ts3

Unnamed: 0,a,b
2 June 2013,54,71
"Aug 29, 2014",39,40
2015-06-26,12,21
7/12/16,16,48


In [23]:
# We can convert this to Datetime using to_datetime
ts3.index = pd.to_datetime(ts3.index)
ts3

Unnamed: 0,a,b
2013-06-02,54,71
2014-08-29,39,40
2015-06-26,12,21
2016-07-12,16,48


In [24]:
# to_date also allows us to change the date parse order
# Let's try converting to the European date format
pd.to_datetime('4.7.12', dayfirst = True)

Timestamp('2012-07-04 00:00:00')

### Timedeltas

In [25]:
# Timedeltas are differences in time, conceptually this is similar
# to Periods

# Difference between september 3rd and september 1st is a timedelta
# of 2 days
pd.Timestamp('9/03/2016') - pd.Timestamp('9/01/2016')

Timedelta('2 days 00:00:00')

In [28]:
# It can be very precise, like finding out the date and time for
# 12 days and 3 hours past september 2nd at 8:10 AM
pd.Timestamp('9/2/2016 8:10AM') + pd.Timedelta('12D 3H')

Timestamp('2016-09-14 11:10:00')

### Offset

In [29]:
# Offset is similar to timedelta but follows specific calendar
# rules. It allows for the introdcution of business days, end of
# month, semi month begin etc.

# Create a timestamp and see what day it is
pd.Timestamp('9/4/2016').weekday()

6

In [30]:
# Now we can add the timestamp with a week ahead
pd.Timestamp('9/4/2016') + pd.offsets.Week()

Timestamp('2016-09-11 00:00:00')

In [31]:
# Let's do the same with month's end to get the last
# day of september
pd.Timestamp('9/4/2016') + pd.offsets.MonthEnd()

Timestamp('2016-09-30 00:00:00')

### Working with dates in DataFrame

In [32]:
# Suppose we have 9 measurements taken bi-weekly every sunday
# starting October 2016

# With date_range we can create a timeindex for this

# date_range requires either the start or end date, if only one
# date is entered and the kind of date is not specified it'll be 
# taken as start date 

# The time period and the frequency also need to be specified

# All this is done with a sort of regex-like input, for our example
# we'll write '2w-SUN' which means bi-weekly on Sunday

In [33]:
dates = pd.date_range('10-01-2016', periods = 9, freq = '2w-SUN')
dates

DatetimeIndex(['2016-10-02', '2016-10-16', '2016-10-30', '2016-11-13',
               '2016-11-27', '2016-12-11', '2016-12-25', '2017-01-08',
               '2017-01-22'],
              dtype='datetime64[ns]', freq='2W-SUN')

In [34]:
# An often used frequency setting is the business day 
pd.date_range('10-01-2016', periods = 9, freq = 'B')

DatetimeIndex(['2016-10-03', '2016-10-04', '2016-10-05', '2016-10-06',
               '2016-10-07', '2016-10-10', '2016-10-11', '2016-10-12',
               '2016-10-13'],
              dtype='datetime64[ns]', freq='B')

In [35]:
# You also do it quarterly and indicate the quarter
# start, e.g June
pd.date_range('10-01-2016', periods = 12, freq = 'QS-JUN')

DatetimeIndex(['2016-12-01', '2017-03-01', '2017-06-01', '2017-09-01',
               '2017-12-01', '2018-03-01', '2018-06-01', '2018-09-01',
               '2018-12-01', '2019-03-01', '2019-06-01', '2019-09-01'],
              dtype='datetime64[ns]', freq='QS-JUN')

In [41]:
# Let's go back to our weekly on Sunday example and use 
# these dates with random data to create a dataframe
dates = pd.date_range('10-01-2016', periods = 9, freq = '2w-SUN')
df = pd.DataFrame({'Count 1': 100 + np.random.randint(-5, 10, 9).cumsum(),
'Count 2': 120 + np.random.randint(-5, 10, 9).cumsum()}, index = dates)

df

Unnamed: 0,Count 1,Count 2
2016-10-02,106,120
2016-10-16,106,129
2016-10-30,105,132
2016-11-13,107,134
2016-11-27,110,139
2016-12-11,119,142
2016-12-25,125,145
2017-01-08,128,141
2017-01-22,133,150


In [51]:
# Suppose we wnt to know the mean count for each month
# we can use the resample() function to do downsampling
df.resample('M').mean()

Unnamed: 0,Count 1,Count 2
2016-10-31,105.666667,127.0
2016-11-30,108.5,136.5
2016-12-31,122.0,143.5
2017-01-31,130.5,145.5


In [53]:
# To do datime indexing and slicing, we can e.g use 
# partial string indexing to find values from a 
# particular year
df.loc['2017']

Unnamed: 0,Count 1,Count 2
2017-01-08,128,141
2017-01-22,133,150


In [54]:
# For a particular month
df.loc['2016-12']

Unnamed: 0,Count 1,Count 2
2016-12-11,119,142
2016-12-25,125,145


In [55]:
# Values from december 2016 onwards
df.loc['2016-12':] 

Unnamed: 0,Count 1,Count 2
2016-12-11,119,142
2016-12-25,125,145
2017-01-08,128,141
2017-01-22,133,150
