# Feature Engineering on Temporal Data

In [1]:
##--Import necessary dependencies and settings

import datetime
import numpy as np
import pandas as pd
from dateutil.parser import parse
import pytz  # contains world time zones

In [2]:
##--Load and process sample temporal data
time_stamps = ['2015-03-08 10:30:00.360000+00:00', 
               '2017-07-13 15:45:05.755000-07:00', 
               '2012-01-20 23:30:00.254000+05:30', 
               '2016-12-25 00:30:00.000000+10:00']
df = pd.DataFrame(time_stamps, columns=['Time'])
print(df)

                               Time
0  2015-03-08 10:30:00.360000+00:00
1  2017-07-13 15:45:05.755000-07:00
2  2012-01-20 23:30:00.254000+05:30
3  2016-12-25 00:30:00.000000+10:00


In [3]:
# by default, all time values are stored as strings or text in the dataframe 
# so we can convert time into Timestamp object by using the 
# following code snippet

print("printing time-stamp objects....")

ts_objs = np.array([pd.Timestamp(item) for item in np.array(df.Time)])

df['TS_obj'] = ts_objs

print(ts_objs)

printing time-stamp objects....
[Timestamp('2015-03-08 10:30:00.360000+0000', tz='UTC')
 Timestamp('2017-07-13 15:45:05.755000-0700', tz='pytz.FixedOffset(-420)')
 Timestamp('2012-01-20 23:30:00.254000+0530', tz='pytz.FixedOffset(330)')
 Timestamp('2016-12-25 00:30:00+1000', tz='pytz.FixedOffset(600)')]


In [4]:
##--Date based features
df['Year'] = df['TS_obj'].apply(lambda d: d.year)
df['Month'] = df['TS_obj'].apply(lambda d: d.month)
df['Day'] = df['TS_obj'].apply(lambda d: d.day)
df['DayOfWeek'] = df['TS_obj'].apply(lambda d: d.dayofweek)
df['DayOfYear'] = df['TS_obj'].apply(lambda d: d.dayofyear)
df['WeekOfYear'] = df['TS_obj'].apply(lambda d: d.weekofyear)
df['Quarter'] = df['TS_obj'].apply(lambda d: d.quarter)

print(df[['Time', 'Year', 'Month', 'Day', 'Quarter', 
    'DayOfWeek', 'DayOfYear', 'WeekOfYear']])

# to get DayName from dayOfWeek no.
# Code Solution is here :
#https://stackoverflow.com/questions/36341484/get-day-name-from-weekday-int/36341648


                               Time  Year  Month  Day  Quarter  DayOfWeek  \
0  2015-03-08 10:30:00.360000+00:00  2015      3    8        1          6   
1  2017-07-13 15:45:05.755000-07:00  2017      7   13        3          3   
2  2012-01-20 23:30:00.254000+05:30  2012      1   20        1          4   
3  2016-12-25 00:30:00.000000+10:00  2016     12   25        4          6   

   DayOfYear  WeekOfYear  
0         67          10  
1        194          28  
2         20           3  
3        360          51  


# Time-Based Features

Each temporal value also has a time component that can be used to extract useful information and features pertaining to the time. These include attributes like hour, minute, second, microsecond, and more.

In [5]:
# # Time based features

df['Hour'] = df['TS_obj'].apply(lambda d: d.hour)
df['Minute'] = df['TS_obj'].apply(lambda d: d.minute)
df['Second'] = df['TS_obj'].apply(lambda d: d.second)
df['MUsecond'] = df['TS_obj'].apply(lambda d: d.microsecond)

print(df[['Time', 'Hour', 'Minute', 'Second', 'MUsecond']])

                               Time  Hour  Minute  Second  MUsecond
0  2015-03-08 10:30:00.360000+00:00    10      30       0    360000
1  2017-07-13 15:45:05.755000-07:00    15      45       5    755000
2  2012-01-20 23:30:00.254000+05:30    23      30       0    254000
3  2016-12-25 00:30:00.000000+10:00     0      30       0         0


In [6]:
# Let’s try to use binning to bin each temporal value into 
# a specific time of the day by leveraging the Hour feature we just obtained.
hour_bins = [-1, 5, 11, 16, 21, 23]
bin_names = ['Late Night', 'Morning', 'Afternoon', 'Evening', 'Night']

df['TimeOfDayBin'] = pd.cut(df['Hour'], bins=hour_bins, labels=bin_names)
# Thus you can see from the preceding output that based on hour ranges 
# (0-5, 5-11, 11-16, 16-21,21-23) we have assigned 
# a specific time of the day bin for each temporal value.

print(df[['Time', 'Hour', 'TimeOfDayBin']])

                               Time  Hour TimeOfDayBin
0  2015-03-08 10:30:00.360000+00:00    10      Morning
1  2017-07-13 15:45:05.755000-07:00    15    Afternoon
2  2012-01-20 23:30:00.254000+05:30    23        Night
3  2016-12-25 00:30:00.000000+10:00     0   Late Night
