# Operations on Time series 
* For felxible operations on dates, use Timestamp constructor in pandas library
* Using Timestamp, the dates operations performed on: MM-DD-YYYY, MM/DD/YYYY, MMDDYY etc.
* Import time series data
* Rolling operations
* Filling operations

Require two important packages:
* pandas_datareader for reading data from web
* datetime package to manipulate on date column

In [3]:
# install library pandas_datareader
!pip install pandas_datareader



In [13]:
!pip install datetime

Collecting datetime
  Downloading DateTime-4.3-py2.py3-none-any.whl (60 kB)
Installing collected packages: datetime
Successfully installed datetime-4.3


In [14]:
# Let's understand dates

# import datetime library

import datetime as dt

In [17]:
last_day_1 = dt.date(2021, 11, 30)
# above line is same as:
last_day_2 = dt.date(year = 2021, month= 11, day = 30)
print("last_day_1 ", last_day_1)
print("last_day_1 type ", type(last_day_1))
print("last_day_2 ", last_day_2)
print("last_day_2 type ", type(last_day_2))

last_day_1  2021-11-30
last_day_1 type  <class 'datetime.date'>
last_day_2  2021-11-30
last_day_2 type  <class 'datetime.date'>


In [18]:
# Fetch year, month & day of last_day_1
print("year ", last_day_1.year)
print("month ", last_day_1.month)
print("day ", last_day_1.day)

year  2021
month  11
day  30


In [19]:
# create a datetime variable with the following
# last_dt with 2021 30 November 16:54:20 (4:54pm 20 seconds)
last_dt = dt.datetime(year = 2021, month= 11, day = 30, hour=16, minute = 54, second=20)

In [20]:
# check the values of last_dt
last_dt

datetime.datetime(2021, 11, 30, 16, 54, 20)

In [21]:
# fetch hour, minute and seconds from last_dt
print("last_dt hour : ", last_dt.hour)
print("last_dt minute : ", last_dt.minute)
print("last_dt second : ", last_dt.second)

last_dt hour :  16
last_dt minute :  54
last_dt second :  20


In [103]:
# import numpy & pandas libraries
import numpy as np
import pandas as pd

In [23]:
# create a last_dt variable with the following as Timestamp object
# last_ts with 2021 30 November 16:54:20 (4:54pm 20 seconds)
last_ts = pd.Timestamp(year = 2021, month= 11, day = 30, hour=16, minute = 54, second=20)

In [24]:
# check values of last_ts
last_ts

Timestamp('2021-11-30 16:54:20')

In [26]:
# verify the attributes of last_ts object
print("year ", last_ts.year)
print("month : ", last_ts.month)
print("day : ", last_ts.day)
print("hour : ", last_ts.hour)
print("minute : ", last_ts.minute)
print("second : ", last_ts.second)

year  2021
month :  11
day :  30
hour :  16
minute :  54
second :  20


In [27]:
# convert date strings into Timestamp objects
x = 'Nov-30-2021'

# convert x to Timestamp object using to_datetime() in Pandas
# same applicable to columns in pandas dataframe when you need to convert strings to Timestamp
x_dt = pd.to_datetime(x)

# check the type of x_dt object
x_dt

Timestamp('2021-11-30 00:00:00')

In [29]:
# get the day name of x_dt
x_dt.day_name()

'Tuesday'

In [32]:
# Add 5 days to x_dt using DateOffset() in Pandas
x_dt + pd.DateOffset(days=5)

Timestamp('2021-12-05 00:00:00')

In [34]:
# Subtract 5 days to x_dt using DateOffset() in Pandas
x_dt - pd.DateOffset(days=5)

Timestamp('2021-11-25 00:00:00')

In [4]:
# import data  from pandas_datareader
from pandas_datareader import data
# read INFOSYS stock from 01-Jan-2021 to 30-Nov-2021 from Yahoo, and reset the index
ts_df=data.DataReader("INFY.NS", 'yahoo', '20210101', '20211130').reset_index()

# Check the head values of Infy dataframe
ts_df.head()

Unnamed: 0,Date,High,Low,Open,Close,Volume,Adj Close
0,2021-01-01,1265.5,1255.800049,1257.900024,1260.449951,4253550.0,1236.039307
1,2021-01-04,1290.0,1261.150024,1269.0,1288.25,7208454.0,1263.301025
2,2021-01-05,1299.0,1275.050049,1282.0,1293.800049,8145280.0,1268.74353
3,2021-01-06,1302.0,1268.050049,1300.0,1282.099976,7161715.0,1257.270142
4,2021-01-07,1297.650024,1255.5,1296.0,1262.150024,10954918.0,1237.706543


In [5]:
# Check the tail values of Infy dataframe
ts_df.tail()

Unnamed: 0,Date,High,Low,Open,Close,Volume,Adj Close
218,2021-11-23,1747.25,1710.0,1735.099976,1736.900024,8436160.0,1736.900024
219,2021-11-24,1740.5,1688.0,1740.0,1696.0,7333061.0,1696.0
220,2021-11-25,1726.199951,1696.949951,1700.0,1722.400024,4476260.0,1722.400024
221,2021-11-26,1718.349976,1684.0,1702.550049,1691.650024,4494181.0,1691.650024
222,2021-11-29,1712.0,1669.150024,1691.0,1696.349976,3940946.0,1696.349976


In [6]:
# Check the columns data types
ts_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 223 entries, 0 to 222
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       223 non-null    datetime64[ns]
 1   High       223 non-null    float64       
 2   Low        223 non-null    float64       
 3   Open       223 non-null    float64       
 4   Close      223 non-null    float64       
 5   Volume     223 non-null    float64       
 6   Adj Close  223 non-null    float64       
dtypes: datetime64[ns](1), float64(6)
memory usage: 12.3 KB


In [37]:
# Create returns column using shift() method
ts_df['returns'] = (ts_df['Adj Close'] - ts_df['Adj Close'].shift(1))/ts_df['Adj Close'].shift(1)

In [38]:
ts_df.head()

Unnamed: 0,Date,High,Low,Open,Close,Volume,Adj Close,returns
0,2021-01-01,1265.5,1255.800049,1257.900024,1260.449951,4253550.0,1236.039307,
1,2021-01-04,1290.0,1261.150024,1269.0,1288.25,7208454.0,1263.301025,0.022056
2,2021-01-05,1299.0,1275.050049,1282.0,1293.800049,8145280.0,1268.74353,0.004308
3,2021-01-06,1302.0,1268.050049,1300.0,1282.099976,7161715.0,1257.270142,-0.009043
4,2021-01-07,1297.650024,1255.5,1296.0,1262.150024,10954918.0,1237.706543,-0.01556


# Rolling operations on time series data
* use rolling() method followed by summary of interest

In [39]:
# calculate the mean over the trailing three elements
ts_df.rolling(3).mean().head()

Unnamed: 0,High,Low,Open,Close,Volume,Adj Close,returns
0,,,,,,,
1,,,,,,,
2,1284.833333,1264.000041,1269.633341,1280.833333,6535761.0,1256.027954,
3,1297.0,1268.083374,1283.666667,1288.050008,7505150.0,1263.104899,0.005774
4,1299.550008,1266.200033,1292.666667,1279.350016,8753971.0,1254.573405,-0.006765


In [40]:
# exponential weighted the mean over the trailing three elements
ts_df.ewm(3).mean().head()

Unnamed: 0,High,Low,Open,Close,Volume,Adj Close,returns
0,1265.5,1255.800049,1257.900024,1260.449951,4253550.0,1236.039307,
1,1279.5,1258.857178,1264.242868,1276.335693,5942067.0,1251.617432,0.022056
2,1287.932432,1265.8595,1271.921628,1283.887847,6894808.0,1259.023312,0.011914
3,1293.077143,1266.660615,1282.190289,1283.233997,6992419.0,1258.382153,0.002852
4,1294.576064,1263.002334,1286.716904,1276.322989,8291267.0,1251.605,-0.003882


# Filling operations on time series data

In [65]:
# create a series type dataset with time features

test_ts = pd.Series({"date" : ['20211130', '20211129', '20211128', '20211126', '20211125', '20211124'], 
"temp": [7, 6, 6.5, '', 8, 7.5], "humidity": [42, 80, '', 54, '', 74]} )

In [104]:
# create a series type dataset with time features
date = pd.date_range(start = '25/11/2021', end = '30/11/2021', freq = 'D')
# use np.NaN to create NaN values
night_temp = [7, 6, 6.5, np.NaN, 8, 7.5]
humidity = [42, 80, '', 54, np.NaN, 74]
temp_df = pd.Series(night_temp, index= date )

In [105]:
# verify type of test_ts
temp_df

2021-11-25    7.0
2021-11-26    6.0
2021-11-27    6.5
2021-11-28    NaN
2021-11-29    8.0
2021-11-30    7.5
Freq: D, dtype: float64

In [111]:
# perform forward fill
# forward fill uses the prior value
temp_df.resample('1D').mean().ffill()

2021-11-25    7.0
2021-11-26    6.0
2021-11-27    6.5
2021-11-28    6.5
2021-11-29    8.0
2021-11-30    7.5
Freq: D, dtype: float64

In [112]:
# perform backward fill
# backward fill uses the next value
temp_df.resample('1D').mean().bfill()

2021-11-25    7.0
2021-11-26    6.0
2021-11-27    6.5
2021-11-28    8.0
2021-11-29    8.0
2021-11-30    7.5
Freq: D, dtype: float64

In [113]:
# perform interpolate fill
# interpolate fill replaces NaN based on interpolation of non-NaN values
temp_df.resample('1D').mean().interpolate()

2021-11-25    7.00
2021-11-26    6.00
2021-11-27    6.50
2021-11-28    7.25
2021-11-29    8.00
2021-11-30    7.50
Freq: D, dtype: float64