# Time Series Functionalities

In [2]:
import pandas as pd

## Working with dates

In [5]:
# transform a string to datetime
pd.to_datetime('2016-06-10 1:25pm')

Timestamp('2000-05-11 00:00:00')

In [7]:
# Pandas will try to infer the date, better practice is to pass the format.
pd.to_datetime('5-11-2000')  # It will assume 5 is the day

Timestamp('2000-05-11 00:00:00')

In [6]:
# By passing the format we can ensure that the conversion is done correctly
pd.to_datetime('5-11-2000', format='%d-%m-%Y') 

Timestamp('2000-11-05 00:00:00')

In [18]:
# Date Range
date_rng = pd.date_range(start='1-1-2020', end='12-31-2021', freq='D')
date_rng

DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04',
               '2020-01-05', '2020-01-06', '2020-01-07', '2020-01-08',
               '2020-01-09', '2020-01-10',
               ...
               '2021-12-22', '2021-12-23', '2021-12-24', '2021-12-25',
               '2021-12-26', '2021-12-27', '2021-12-28', '2021-12-29',
               '2021-12-30', '2021-12-31'],
              dtype='datetime64[ns]', length=731, freq='D')

## Slicing

In [23]:
# sample ts 
import numpy as np

df = pd.DataFrame(np.random.normal(size=(len(date_rng))), columns=['data'], index=date_rng)

df.head()

Unnamed: 0,data
2020-01-01,-2.282693
2020-01-02,-1.010707
2020-01-03,-1.54938
2020-01-04,-1.030667
2020-01-05,-0.879892


In [25]:
# numerical slicing
df.iloc[0:10]

Unnamed: 0,data
2020-01-01,-2.282693
2020-01-02,-1.010707
2020-01-03,-1.54938
2020-01-04,-1.030667
2020-01-05,-0.879892
2020-01-06,1.060539
2020-01-07,0.458268
2020-01-08,0.005587
2020-01-09,1.0757
2020-01-10,0.822526


<div class="alert alert-block alert-info">
<b>Info:</b> Dates and strings that parse to timestamps can be passed as indexing parameters</div>

In [32]:
# label slicing
df.loc[pd.to_datetime('2020-01-01'):pd.to_datetime('2020-01-10')]

# also strings can be used
df.loc['2020-01-01':'2020-01-10']

Unnamed: 0,data
2020-01-01,-2.282693
2020-01-02,-1.010707
2020-01-03,-1.54938
2020-01-04,-1.030667
2020-01-05,-0.879892
2020-01-06,1.060539
2020-01-07,0.458268
2020-01-08,0.005587
2020-01-09,1.0757
2020-01-10,0.822526


In [33]:
# Partial strings e.g. for year 2020
df.loc['2020']

Unnamed: 0,data
2020-01-01,-2.282693
2020-01-02,-1.010707
2020-01-03,-1.549380
2020-01-04,-1.030667
2020-01-05,-0.879892
...,...
2020-12-27,-0.165915
2020-12-28,1.739174
2020-12-29,-1.726871
2020-12-30,-0.145641


## Resampling