In [1]:
# Import the pandas library for data manipulation and analysis.
import pandas as pd
# Import the numpy library for numerical operations.
import numpy as np
# Import the date object from the datetime module to work with dates.
from datetime import date

In [2]:
# Create a sample DataFrame with a 'doj' (date of joining) column.
# The dates are initially stored as strings.
data = pd.DataFrame({'doj':
['01-01-2017',
'04-12-2008',
'23-06-1988',
'25-08-1999',
'20-02-1993',
]})

In [3]:
# Display the DataFrame to see its initial content.
data

Unnamed: 0,doj
0,01-01-2017
1,04-12-2008
2,23-06-1988
3,25-08-1999
4,20-02-1993


In [4]:
# The 'doj' column is currently of string type.
# To perform date-based calculations, we need to convert it to a datetime object.
# We use pd.to_datetime and specify the format to ensure correct parsing.
data['doj'] = pd.to_datetime(data.doj, format="%d-%m-%Y")

In [5]:
# Display the head of the DataFrame to verify the data type conversion.
data.head()

Unnamed: 0,doj
0,2017-01-01
1,2008-12-04
2,1988-06-23
3,1999-08-25
4,1993-02-20


In [6]:
# Extract the year from the 'doj' column and store it in a new 'year' column.
# The .dt accessor allows us to use datetime properties on the Series.
data['year'] = data['doj'].dt.year

In [7]:
# Display the DataFrame to see the newly added 'year' column.
data.head()

Unnamed: 0,doj,year
0,2017-01-01,2017
1,2008-12-04,2008
2,1988-06-23,1988
3,1999-08-25,1999
4,1993-02-20,1993


In [8]:
# Extract the month from the 'doj' column and create a new 'month' column.
data['month'] = data['doj'].dt.month

In [9]:
# Display the DataFrame to see the 'month' column.
data.head()

Unnamed: 0,doj,year,month
0,2017-01-01,2017,1
1,2008-12-04,2008,12
2,1988-06-23,1988,6
3,1999-08-25,1999,8
4,1993-02-20,1993,2


In [10]:
# Calculate the number of years that have passed since the 'doj'.
# This is done by subtracting the 'doj' year from the current year.
data['passed_years'] = date.today().year - data['doj'].dt.year

In [11]:
# Display the DataFrame to see the 'passed_years' column.
data

Unnamed: 0,doj,year,month,passed_years
0,2017-01-01,2017,1,8
1,2008-12-04,2008,12,17
2,1988-06-23,1988,6,37
3,1999-08-25,1999,8,26
4,1993-02-20,1993,2,32


In [12]:
# Calculate the total number of months that have passed since the 'doj'.
# This provides a more granular measure of time passed.
data['passed_months'] = (date.today().year - data['doj'].dt.year) * 12 + date.today().month - data['doj'].dt.month

In [13]:
# Display the DataFrame to verify the 'passed_months' column.
data

Unnamed: 0,doj,year,month,passed_years,passed_months
0,2017-01-01,2017,1,8,102
1,2008-12-04,2008,12,17,199
2,1988-06-23,1988,6,37,445
3,1999-08-25,1999,8,26,311
4,1993-02-20,1993,2,32,389


In [14]:
# Extract the name of the weekday from the 'doj' and create a 'day_name' column.
# This can be useful for analyzing trends based on the day of the week.
data['day_name'] = data['doj'].dt.day_name()

In [15]:
# Display the final DataFrame with all the extracted features.
data

Unnamed: 0,doj,year,month,passed_years,passed_months,day_name
0,2017-01-01,2017,1,8,102,Sunday
1,2008-12-04,2008,12,17,199,Thursday
2,1988-06-23,1988,6,37,445,Thursday
3,1999-08-25,1999,8,26,311,Wednesday
4,1993-02-20,1993,2,32,389,Saturday
