In [1]:
import pandas as pd

In [2]:
import numpy as np

### Pandas Series 
In this tutorial we cover Time series related data manipulation

### Create a date range using date_range() 
It generate datetime index of different frequency

<b>Daily frequency</b>

In [3]:
## Generate Index of Daily frequency.

idx = pd.date_range(start='01-May-2020', end='07-May-2020', freq='D')
data = np.random.randint(low=1, high=10, size=len(idx))

S = pd.Series(data = data, index=idx)
S

2020-05-01    1
2020-05-02    4
2020-05-03    1
2020-05-04    1
2020-05-05    8
2020-05-06    4
2020-05-07    4
Freq: D, dtype: int32

In [4]:
# Extract day

S.index.day

Int64Index([1, 2, 3, 4, 5, 6, 7], dtype='int64')

In [5]:
# Select records where day is in 2,4,6

S[S.index.day.isin([2,4,6])]

2020-05-02    4
2020-05-04    1
2020-05-06    4
dtype: int32

<b>Monthly Frequency</b>

In [6]:
## Generate Index of Monthly frequency

idx = pd.date_range(start='01-Jan-2020', end='31-Dec-2020', freq='M')
data = np.random.randint(low=1, high=10, size=len(idx))
S = pd.Series(data = data, index=idx)
S.head()

2020-01-31    3
2020-02-29    2
2020-03-31    3
2020-04-30    2
2020-05-31    4
Freq: M, dtype: int32

In [7]:
# Extract Month
S[S.index.month == 2]

2020-02-29    2
Freq: M, dtype: int32

<b>Quarterly Frequency</b>

In [8]:
## Generate Quarterly frequency 

idx = pd.date_range(start='01-Jan-2020', end='31-Dec-2020', freq='Q')
data = np.random.randint(low=1, high=10, size=len(idx))
S = pd.Series(data = data, index=idx)
S.head()

2020-03-31    2
2020-06-30    9
2020-09-30    5
2020-12-31    8
Freq: Q-DEC, dtype: int32

<b>Hourly Frequency</b>

In [9]:
# Generate Index of Hourly Frequency

idx = pd.date_range(start='01-Jan-2020', periods=10, freq='H')
data = np.random.randint(low=1, high=10, size=len(idx))
S = pd.Series(data = data, index=idx)
S.head()

2020-01-01 00:00:00    9
2020-01-01 01:00:00    7
2020-01-01 02:00:00    8
2020-01-01 03:00:00    1
2020-01-01 04:00:00    8
Freq: H, dtype: int32

In [10]:
# Extract hour

S[S.index.hour.isin([1,5,7])]

2020-01-01 01:00:00    7
2020-01-01 05:00:00    8
2020-01-01 07:00:00    4
dtype: int32

<b>Half hourly Frequency</b>

In [11]:
# Generate Index of Half-Hourly Frequency

idx = pd.date_range(start='01-Jan-2020', periods=10, freq='30min')
data = np.random.randint(low=1, high=10, size=len(idx))
S = pd.Series(data = data, index=idx)
S.head()

2020-01-01 00:00:00    8
2020-01-01 00:30:00    4
2020-01-01 01:00:00    8
2020-01-01 01:30:00    6
2020-01-01 02:00:00    8
Freq: 30T, dtype: int32

<b> to_datetime(): To Convert String to Datetime </b>

In [12]:
# generating idx_str as string from timestamp 
idx_str = [str(ts) for ts in idx]

idx_str[0:5]

['2020-01-01 00:00:00',
 '2020-01-01 00:30:00',
 '2020-01-01 01:00:00',
 '2020-01-01 01:30:00',
 '2020-01-01 02:00:00']

In [13]:
# Check datatype of elements of idx_str
type(idx_str[0])

str

In [14]:
# Convert strings into datetime
pd.to_datetime(idx_str)

DatetimeIndex(['2020-01-01 00:00:00', '2020-01-01 00:30:00',
               '2020-01-01 01:00:00', '2020-01-01 01:30:00',
               '2020-01-01 02:00:00', '2020-01-01 02:30:00',
               '2020-01-01 03:00:00', '2020-01-01 03:30:00',
               '2020-01-01 04:00:00', '2020-01-01 04:30:00'],
              dtype='datetime64[ns]', freq=None)

In [15]:
S.head()

2020-01-01 00:00:00    8
2020-01-01 00:30:00    4
2020-01-01 01:00:00    8
2020-01-01 01:30:00    6
2020-01-01 02:00:00    8
Freq: 30T, dtype: int32

<b>between_time(): Search between a time range </b>

In [17]:
# search between 01:30 and 03:30

S.between_time('01:30', '03:30')

2020-01-01 01:30:00    6
2020-01-01 02:00:00    8
2020-01-01 02:30:00    8
2020-01-01 03:00:00    2
2020-01-01 03:30:00    2
Freq: 30T, dtype: int32

<b>strptime() : String to timestamps </b>

In [18]:
from datetime import datetime

In [19]:
string_dates = ['May-10-2020', 'May-11-2020', 'May-12-2020', 'May-13-2020']
string_dates

['May-10-2020', 'May-11-2020', 'May-12-2020', 'May-13-2020']

In [20]:
new_idx = [datetime.strptime(x, '%B-%d-%Y') for x in string_dates]

data = np.random.randint(low=1, high=10, size=len(new_idx))
S = pd.Series(data = data, index=new_idx)
S.head()

2020-05-10    2
2020-05-11    8
2020-05-12    7
2020-05-13    3
dtype: int32

<b> to_period()  : Convert Series from DatetimeIndex to PeriodIndex with desired frequency </b>

In [21]:
S.index.to_period(freq='W')

PeriodIndex(['2020-05-04/2020-05-10', '2020-05-11/2020-05-17',
             '2020-05-11/2020-05-17', '2020-05-11/2020-05-17'],
            dtype='period[W-SUN]', freq='W-SUN')

<b> first() : </b>

In [22]:
i = pd.date_range('2020-05-01', periods=15, freq='3H')
d = np.random.randint(low=10, high=20, size=len(i))

Ser = pd.Series(data = d, index=i)
Ser.head()

2020-05-01 00:00:00    18
2020-05-01 03:00:00    19
2020-05-01 06:00:00    15
2020-05-01 09:00:00    18
2020-05-01 12:00:00    15
Freq: 3H, dtype: int32

In [23]:
# select first 10h
Ser.first('10h')

2020-05-01 00:00:00    18
2020-05-01 03:00:00    19
2020-05-01 06:00:00    15
2020-05-01 09:00:00    18
Freq: 3H, dtype: int32

<b> last() </b>

In [24]:
# Select last 6h
Ser.last('6h')

2020-05-02 15:00:00    15
2020-05-02 18:00:00    13
Freq: 3H, dtype: int32

<b> at_time() : at specific time </b>

In [25]:
Ser.at_time('03:00')

2020-05-01 03:00:00    19
2020-05-02 03:00:00    17
Freq: 24H, dtype: int32

### pandas.Series.dt.date
<p>Series.dt can be used to access the values of the series as datetimelike and return several properties. Pandas Series.dt.date attribute return a numpy array of python datetime.date objects.</p>

In [26]:
X = pd.Series(data = pd.date_range(start='2020-05-01', end='2020-05-10', freq='8h'))
X.head()

0   2020-05-01 00:00:00
1   2020-05-01 08:00:00
2   2020-05-01 16:00:00
3   2020-05-02 00:00:00
4   2020-05-02 08:00:00
dtype: datetime64[ns]

In [27]:
# Series.dt.date

X.dt.date.head()

0    2020-05-01
1    2020-05-01
2    2020-05-01
3    2020-05-02
4    2020-05-02
dtype: object

In [28]:
# Series.dt.day

X[X.dt.day.isin([1, 7])]

0    2020-05-01 00:00:00
1    2020-05-01 08:00:00
2    2020-05-01 16:00:00
18   2020-05-07 00:00:00
19   2020-05-07 08:00:00
20   2020-05-07 16:00:00
dtype: datetime64[ns]

In [29]:
# Series.dt.day_name() : to extract day name 

X[X.dt.day_name() == 'Saturday']

3    2020-05-02 00:00:00
4    2020-05-02 08:00:00
5    2020-05-02 16:00:00
24   2020-05-09 00:00:00
25   2020-05-09 08:00:00
26   2020-05-09 16:00:00
dtype: datetime64[ns]

In [30]:
# Series.dt.hour

X[X.dt.hour == 8]

1    2020-05-01 08:00:00
4    2020-05-02 08:00:00
7    2020-05-03 08:00:00
10   2020-05-04 08:00:00
13   2020-05-05 08:00:00
16   2020-05-06 08:00:00
19   2020-05-07 08:00:00
22   2020-05-08 08:00:00
25   2020-05-09 08:00:00
dtype: datetime64[ns]

In [31]:
# Extract time series.dt.time
X.dt.time.head()

0    00:00:00
1    08:00:00
2    16:00:00
3    00:00:00
4    08:00:00
dtype: object