In [21]:
import pandas as pd
import numpy as np

# Converting Strings to Dates

In [22]:
## Creating strings
date_strings = np.array(['13-03-2005 11:35 PM'])

## Converting to datetimes
pd.to_datetime(date_strings, format = '%d-%m-%Y %I:%M %p')

DatetimeIndex(['2005-03-13 23:35:00'], dtype='datetime64[ns]', freq=None)

# Handling Time Zones

In [23]:
## Creating time in London
pd.Timestamp('2018-05-24 01:01:01', tz = 'Europe/London')

Timestamp('2018-05-24 01:01:01+0100', tz='Europe/London')

# Selecting Dates 

In [24]:
## Creating a data-frame
data = pd.DataFrame({'Date': pd.date_range('1/1/2001', periods = 100)})
data

Unnamed: 0,Date
0,2001-01-01
1,2001-01-02
2,2001-01-03
3,2001-01-04
4,2001-01-05
...,...
95,2001-04-06
96,2001-04-07
97,2001-04-08
98,2001-04-09


In [25]:
## Selecting dates between two date-times
data[(data['Date'] > '2001-01-02') & (data['Date'] < '2001-03-21')]

Unnamed: 0,Date
2,2001-01-03
3,2001-01-04
4,2001-01-05
5,2001-01-06
6,2001-01-07
...,...
74,2001-03-16
75,2001-03-17
76,2001-03-18
77,2001-03-19


# Breaking Up Date into Multiple Features

In [26]:
## Extracting year
data['Date'].dt.year

0     2001
1     2001
2     2001
3     2001
4     2001
      ... 
95    2001
96    2001
97    2001
98    2001
99    2001
Name: Date, Length: 100, dtype: int64

In [27]:
## Extracting month
data['Date'].dt.month

0     1
1     1
2     1
3     1
4     1
     ..
95    4
96    4
97    4
98    4
99    4
Name: Date, Length: 100, dtype: int64

In [28]:
## Extracting day
data['Date'].dt.day

0      1
1      2
2      3
3      4
4      5
      ..
95     6
96     7
97     8
98     9
99    10
Name: Date, Length: 100, dtype: int64

In [29]:
## Extracting hour
data['Date'].dt.hour

0     0
1     0
2     0
3     0
4     0
     ..
95    0
96    0
97    0
98    0
99    0
Name: Date, Length: 100, dtype: int64

In [30]:
## Extracting minute
data['Date'].dt.minute

0     0
1     0
2     0
3     0
4     0
     ..
95    0
96    0
97    0
98    0
99    0
Name: Date, Length: 100, dtype: int64

# Calculating the Difference Between Dates

In [31]:
## Creating two dates
date_1 = pd.Timestamp('01-01-2017')
date_2 = pd.Timestamp('01-06-2017')

## Taking the difference between the two dates
(date_2 - date_1).days

5

# Econding Days of the Week

In [32]:
## Extracting name of the day
data['Date'].dt.day_name()

0        Monday
1       Tuesday
2     Wednesday
3      Thursday
4        Friday
        ...    
95       Friday
96     Saturday
97       Sunday
98       Monday
99      Tuesday
Name: Date, Length: 100, dtype: object

# Creating a Lagged Feature

In [33]:
## Adding synthetic stock prices (from normal distribution)
data['Stock_Price'] = np.random.normal(0, 1, data.shape[0])
data.head()

Unnamed: 0,Date,Stock_Price
0,2001-01-01,-2.045994
1,2001-01-02,-0.067176
2,2001-01-03,-0.147173
3,2001-01-04,-1.76546
4,2001-01-05,0.571404


In [34]:
## Adding lagged values by one row
data['Previous_Stock_Price'] = data['Stock_Price'].shift(1)
data.head()

Unnamed: 0,Date,Stock_Price,Previous_Stock_Price
0,2001-01-01,-2.045994,
1,2001-01-02,-0.067176,-2.045994
2,2001-01-03,-0.147173,-0.067176
3,2001-01-04,-1.76546,-0.147173
4,2001-01-05,0.571404,-1.76546


# Using Rolling Time Windows

In [35]:
## Adding rolling mean (based of three days)
data['moving_average'] = data['Stock_Price'].rolling(window = 3).mean()
data.head()

Unnamed: 0,Date,Stock_Price,Previous_Stock_Price,moving_average
0,2001-01-01,-2.045994,,
1,2001-01-02,-0.067176,-2.045994,
2,2001-01-03,-0.147173,-0.067176,-0.753448
3,2001-01-04,-1.76546,-0.147173,-0.659936
4,2001-01-05,0.571404,-1.76546,-0.447076
