# Feature Engineering of DateTime type variables for Machine Learning, Data Science

### In this notebook, we will cover following topics.

1. How to extract Date / Time components
1. How to create Day Part Flag
1. How to create Boolean Flags
1. How to create Weekend Flag
1. How to Calculate Date / Time Differences

**Bonus Section:** Using Fast_ml for feature engineering

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
pd.set_option('display.max_columns', 1000)
    
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df = pd.read_csv('/kaggle/input/loan-data/loan.csv', parse_dates = ['date_issued', 'date_last_payment'])
df.head()

# 1: Extract Date time Component

In [None]:
df['date_issued:year'] = df['date_issued'].dt.year
df['date_issued:month'] = df['date_issued'].dt.month
df['date_issued:day'] = df['date_issued'].dt.day
df['date_issued:day_of_week'] = df['date_issued'].dt.day_of_week
df['date_issued:day_of_year'] = df['date_issued'].dt.day_of_year
df[['date_issued','date_issued:year','date_issued:month','date_issued:day','date_issued:day_of_week','date_issued:day_of_year']].head()

# 2: Create Day Part Flag

In [None]:
def day_part(hour):
    if hour in [4,5]:
        return "dawn"
    elif hour in [6,7]:
        return "early morning"
    elif hour in [8,9,10]:
        return "late morning"
    elif hour in [11,12,13]:
        return "noon"
    elif hour in [14,15,16]:
        return "afternoon"
    elif hour in [17, 18,19]:
        return "evening"
    elif hour in [20, 21, 22]:
        return "night"
    elif hour in [23,24,1,2,3]:
        return "midnight"
    
    
# utilize it along with apply method
df['date_issued:hour'] = df['date_issued'].dt.hour
df['date_issued:day_part'] = df['date_issued:hour'].apply(day_part)
df.head()

# 3 : Create Boolean Flags

In [None]:
df['date_issued:is_year_start'] = df['date_issued'].dt.is_year_start
df['date_issued:is_quarter_start'] = df['date_issued'].dt.is_quarter_start
df['date_issued:is_month_start'] = df['date_issued'].dt.is_month_start
df['date_issued:is_month_end'] = df['date_issued'].dt.is_month_end
df[['date_issued','date_issued:is_year_start','date_issued:is_quarter_start','date_issued:is_month_start','date_issued:is_month_end']].head()

# 4: Create Weekend Flag

In [None]:
df['date_issued:is_weekend'] = np.where(df['date_issued:day_of_week'].isin([5,6]), 1,0)
df[['date_issued','date_issued:day_of_week','date_issued:is_weekend']].head()

# 5 : Calculate Date / Time Differences

In [None]:
df['date_last_payment'] - df['date_issued']

In [None]:
(df['date_last_payment'] - df['date_issued']).dt.days

In [None]:
(df['date_last_payment'] - df['date_issued']).dt.months

In [None]:
(df['date_last_payment'] - df['date_issued'])/np.timedelta64(1, 'M')

In [None]:
(df['date_last_payment'] - df['date_issued'])/np.timedelta64(3, 'M')

# BONUS -- Fast_ml for quick feature engieering

In [None]:
!pip install fast_ml --upgrade

In [None]:
df = pd.read_csv('/kaggle/input/loan-data/loan.csv', parse_dates = ['date_issued', 'date_last_payment'])

In [None]:
from fast_ml.feature_engineering import FeatureEngineering_DateTime

dt_fe = FeatureEngineering_DateTime()

dt_fe.fit(df, datetime_variables=['date_issued'], prefix = 'date_issued:')

df = dt_fe.transform(df)
df.head()

In [None]:
df.columns

---

In [None]:
from datetime import datetime

datetime.today()