In [3]:
import pandas as pd
import numpy as np
print(f'numpy version: {np.__version__}')
print(f'pandas version: {pd.__version__}')

numpy version: 1.19.4
pandas version: 1.1.5


## Generate series of times

In [4]:
# TIMES 
rng = pd.date_range('2016 Jul 1', periods = 10, freq = 'M')
rng

DatetimeIndex(['2016-07-31', '2016-08-31', '2016-09-30', '2016-10-31',
               '2016-11-30', '2016-12-31', '2017-01-31', '2017-02-28',
               '2017-03-31', '2017-04-30'],
              dtype='datetime64[ns]', freq='M')

In [5]:
# Which of these formats DON'T work?
#'2016 Jul 1', '7/1/2016', '1/7/2016', 'July 1, 2016', '2016-07-01', '2016/07/01'

In [6]:
# Is '7/1/2016 in January or July?

In [7]:
# What is the class of an individual object held in the date_range?
type(rng[1])

pandas._libs.tslibs.timestamps.Timestamp

## Time Stamps

In [8]:
# TIME STAMPS VS TIME SPANS
pd.Timestamp('2016-07-10')

Timestamp('2016-07-10 00:00:00')

In [9]:
# You can also more details 
pd.Timestamp('2016-07-10 10')

Timestamp('2016-07-10 10:00:00')

In [10]:
# Or even more...
pd.Timestamp('2016-07-10 10:15')

Timestamp('2016-07-10 10:15:00')

In [11]:
# How much detail can you add?
pd.Timestamp('2016-07-10 10:15:00.00001')

Timestamp('2016-07-10 10:15:00.000010')

In [12]:
# What are some properties of timestamps? Try them out.
# hint: http://pandas.pydata.org/pandas-docs/stable/timeseries.html#time-date-components
t = pd.Timestamp('2016-07-10 10:15')

## Time Offsets

In [13]:
pd.Timedelta('3 days 1 hour')

Timedelta('3 days 01:00:00')

In [14]:
pd.Timestamp('2020-11-30 06:00:30') + pd.Timedelta('3 days')

Timestamp('2020-12-03 06:00:30')

In [15]:
pd.Timedelta('1ns') # The finest grade you can get is 1 nano second

Timedelta('0 days 00:00:00.000000001')

In [16]:
rng + pd.Timedelta('1 day')

DatetimeIndex(['2016-08-01', '2016-09-01', '2016-10-01', '2016-11-01',
               '2016-12-01', '2017-01-01', '2017-02-01', '2017-03-01',
               '2017-04-01', '2017-05-01'],
              dtype='datetime64[ns]', freq=None)

## Time spans

In [17]:
# TIME SPANS
p = pd.Period('2016-01')

In [18]:
# What's that extra info above? How does it get set?
print(p.start_time)
print(p.end_time)

2016-01-01 00:00:00
2016-01-31 23:59:59.999999999


In [19]:
pd.Period('2016-01-01')

Period('2016-01-01', 'D')

In [20]:
pd.Period('2016-01-01 10')

Period('2016-01-01 10:00', 'H')

In [21]:
pd.Period('2016-01-01 10:10')

Period('2016-01-01 10:10', 'T')

In [22]:
pd.Period('2016-01-01 10:10:10')

pd.Period('2016-01-01 10:10') + pd.Timedelta('1 day')

Period('2016-01-02 10:10', 'T')

In [23]:
# What's the most detailed Period you can get?

In [24]:
# How can you make multiple time periods? 
# Hint look for analogy with pd.date_range() above

In [25]:
# FANCY FREQUENCY SETTING
# Only want business days
pd.period_range('2020-12-20 10:10', freq = 'B', periods = 10)

PeriodIndex(['2020-12-21', '2020-12-22', '2020-12-23', '2020-12-24',
             '2020-12-25', '2020-12-28', '2020-12-29', '2020-12-30',
             '2020-12-31', '2021-01-01'],
            dtype='period[B]', freq='B')

In [26]:
# It's possible to combine frequencies. What if you want to advance by 25 hours each day. What are the 2 ways to do it?
p1 = pd.period_range('2016-01-01 10:10', freq = '25H', periods = 10)

In [27]:
p2 = pd.period_range('2016-01-01 10:10', freq = '1D1H', periods = 10)

In [28]:
p1

PeriodIndex(['2016-01-01 10:00', '2016-01-02 11:00', '2016-01-03 12:00',
             '2016-01-04 13:00', '2016-01-05 14:00', '2016-01-06 15:00',
             '2016-01-07 16:00', '2016-01-08 17:00', '2016-01-09 18:00',
             '2016-01-10 19:00'],
            dtype='period[25H]', freq='25H')

In [29]:
p2

PeriodIndex(['2016-01-01 10:00', '2016-01-02 11:00', '2016-01-03 12:00',
             '2016-01-04 13:00', '2016-01-05 14:00', '2016-01-06 15:00',
             '2016-01-07 16:00', '2016-01-08 17:00', '2016-01-09 18:00',
             '2016-01-10 19:00'],
            dtype='period[25H]', freq='25H')

In [30]:
# How can you determine whether a timestamp falls within a given period
time_1 = pd.Timestamp('2020 12-20 07:30:00')
period_1 = pd.Period('2020 12-20 07')
print(period_1.end_time > time_1 > period_1.start_time)

True


## Indexing with time series

In [31]:
# INDEXING WITH TIME OBJECTS
# You can use these objects for indices
# Let's start with using a date range as above
rng = pd.date_range('2016 Jul 1 00:00:00', periods = 10, freq = 'D')
rng
pd.Series(range(len(rng)), index = rng)

2016-07-01    0
2016-07-02    1
2016-07-03    2
2016-07-04    3
2016-07-05    4
2016-07-06    5
2016-07-07    6
2016-07-08    7
2016-07-09    8
2016-07-10    9
Freq: D, dtype: int64

In [32]:
# You can also use time period indices, in cases where it makes more sense 
# to think about your index as a time span rather than a single point in time

periods = [pd.Period('2016-01'), pd.Period('2016-02'), pd.Period('2016-03')]
ts = pd.Series(np.random.randn(len(periods)), index = periods)
ts

2016-01    1.693445
2016-02   -1.204218
2016-03    0.695200
Freq: M, dtype: float64

In [33]:
# What type is the index for ts?
print(type(ts.index))
print(type(ts.index[0]))

<class 'pandas.core.indexes.period.PeriodIndex'>
<class 'pandas._libs.tslibs.period.Period'>


In [34]:
# Experiment with various indices
# Hint: does ts['2016'] work? 
ts['2016']

2016-01    1.693445
2016-02   -1.204218
2016-03    0.695200
Freq: M, dtype: float64

In [35]:
# Timestamped data can be convereted to period indices with to_period and vice versa with to_timestamp
ts = pd.Series(range(10), pd.date_range('07-10-16 8:15', periods = 10, freq = 'H'))
ts

2016-07-10 08:15:00    0
2016-07-10 09:15:00    1
2016-07-10 10:15:00    2
2016-07-10 11:15:00    3
2016-07-10 12:15:00    4
2016-07-10 13:15:00    5
2016-07-10 14:15:00    6
2016-07-10 15:15:00    7
2016-07-10 16:15:00    8
2016-07-10 17:15:00    9
Freq: H, dtype: int64

In [36]:
# Notice the start time change due to period function
ts_period = ts.to_period(freq='H')
ts_period

2016-07-10 08:00    0
2016-07-10 09:00    1
2016-07-10 10:00    2
2016-07-10 11:00    3
2016-07-10 12:00    4
2016-07-10 13:00    5
2016-07-10 14:00    6
2016-07-10 15:00    7
2016-07-10 16:00    8
2016-07-10 17:00    9
Freq: H, dtype: int64

In [37]:
# Super cool way of accessing the series with time range
# This syntax is inclusive compared with other array `__get_item` method

ts_period['2016-07-10 08:30':'2016-07-10 09:20'] # we have the concept of overlap with time periods

2016-07-10 08:00    0
2016-07-10 09:00    1
Freq: H, dtype: int64

In [38]:
# Super cool way of accessing the series with time range but notice the index is timestamp and so the return values are different
# This syntax is inclusive compared with other array `__get_item` method

ts['2016-07-10 08:30':'2016-07-10 09:20'] # we have the concept of include with timestamps

2016-07-10 09:15:00    1
Freq: H, dtype: int64

In [40]:
# Generate a timestamp with European style formatted date string (e.g. dayfirst formatted).
# In particular March 4th 2020

pd.to_datetime('04/03/2020', dayfirst=True)

Timestamp('2020-03-04 00:00:00')

In [45]:
# Generate string representation in a desired format from pd.Timestamp
# Say for example, I want to extract a string '2016' from pd.Timestamp('Mar 4 2020')

pd.Timestamp('Mar 4, 2020').strftime(format='%Y-%m-%d')

'2020-03-04'

In [50]:
# More neat indexing (jumping by 2), the number after second column indicating the jumping pace
numbers = list(range(20))
numbers[::2]

[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]

In [52]:
ts['2016-07-10 08:15'::3]

2016-07-10 08:15:00    0
2016-07-10 11:15:00    3
2016-07-10 14:15:00    6
2016-07-10 17:15:00    9
Freq: 3H, dtype: int64

In [53]:
ts['2016-07-10 16:15'::-1]

2016-07-10 16:15:00    8
2016-07-10 15:15:00    7
2016-07-10 14:15:00    6
2016-07-10 13:15:00    5
2016-07-10 12:15:00    4
2016-07-10 11:15:00    3
2016-07-10 10:15:00    2
2016-07-10 09:15:00    1
2016-07-10 08:15:00    0
Freq: -1H, dtype: int64