In [1]:
import pandas as pd

In [2]:
data = {'Date': ["2023-07-15", "2022-12-05", "2021-06-20", "2020-01-10", "2019-11-25"]}
df = pd.DataFrame(data)
df

Unnamed: 0,Date
0,2023-07-15
1,2022-12-05
2,2021-06-20
3,2020-01-10
4,2019-11-25


In [3]:
#Only if you are intending to do TIME SERIES ANALYSIS

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Date    5 non-null      object
dtypes: object(1)
memory usage: 172.0+ bytes


In [5]:
# Convert the column into datetime format

df['Date'] = pd.to_datetime(df['Date'])

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    5 non-null      datetime64[ns]
dtypes: datetime64[ns](1)
memory usage: 172.0 bytes


In [7]:
#Extract common features

#Year

df['Year'] = df['Date'].dt.year

In [8]:
df

Unnamed: 0,Date,Year
0,2023-07-15,2023
1,2022-12-05,2022
2,2021-06-20,2021
3,2020-01-10,2020
4,2019-11-25,2019


In [9]:
df['Month'] = df["Date"].dt.month
df['Day'] = df["Date"].dt.day
df

Unnamed: 0,Date,Year,Month,Day
0,2023-07-15,2023,7,15
1,2022-12-05,2022,12,5
2,2021-06-20,2021,6,20
3,2020-01-10,2020,1,10
4,2019-11-25,2019,11,25


In [10]:
#Day of the week
# Monday --- 0
# Tuesday -- 1
#...

df['DayOfTheWeek'] = df['Date'].dt.dayofweek
df

Unnamed: 0,Date,Year,Month,Day,DayOfTheWeek
0,2023-07-15,2023,7,15,5
1,2022-12-05,2022,12,5,0
2,2021-06-20,2021,6,20,6
3,2020-01-10,2020,1,10,4
4,2019-11-25,2019,11,25,0


In [11]:
# Week of the Year

df['WeekOfTheYear'] = df['Date'].dt.isocalendar().week
df

Unnamed: 0,Date,Year,Month,Day,DayOfTheWeek,WeekOfTheYear
0,2023-07-15,2023,7,15,5,28
1,2022-12-05,2022,12,5,0,49
2,2021-06-20,2021,6,20,6,24
3,2020-01-10,2020,1,10,4,2
4,2019-11-25,2019,11,25,0,48


In [36]:
df.iloc[:,[0,1,2,4,5]]

Unnamed: 0,Date,Year,Month,DayOfTheWeek,WeekOfTheYear
0,2023-07-15,2023,7,5,28
1,2022-12-05,2022,12,0,49
2,2021-06-20,2021,6,6,24
3,2020-01-10,2020,1,4,2
4,2019-11-25,2019,11,0,48


In [None]:
# Year ------------- Useful for detecting trend over the time
# Month and Day ---- Identifying seasonal pattern
# Day of the week -- Something that can help identify business impact (e.g. higher sales on weekends, higher sales on festive seasons)
# Week of the year -- Time-based aggregation

In [20]:
df2 = pd.DataFrame({"date":["25-12-2024","01-02-2025"]})
df2

Unnamed: 0,date
0,25-12-2024
1,01-02-2025


In [13]:
df2['date']=pd.to_datetime(df2['date'], dayfirst=True)
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   date    2 non-null      datetime64[ns]
dtypes: datetime64[ns](1)
memory usage: 148.0 bytes


In [15]:
df2['date'].dt.year

Unnamed: 0,date
0,2024
1,2025


In [21]:
df2['date2']=pd.to_datetime(df2['date'])
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   date    2 non-null      object        
 1   date2   2 non-null      datetime64[ns]
dtypes: datetime64[ns](1), object(1)
memory usage: 164.0+ bytes


  df2['date2']=pd.to_datetime(df2['date'])


In [23]:
df2['date2'].dt.year

Unnamed: 0,date2
0,2024
1,2025


In [24]:
pd.__version__

'2.2.2'

In [None]:
#Handling Errors related to Date Format in a date column

In [25]:
data = {'Date': ["2023-07-15", "2022-12-05", "2021/06/20", "2020-01-10", "2019-11-25","25-02-2018","First of May 1998"]}
df4 = pd.DataFrame(data)
df4

Unnamed: 0,Date
0,2023-07-15
1,2022-12-05
2,2021/06/20
3,2020-01-10
4,2019-11-25
5,25-02-2018
6,First of May 1998


In [26]:
df4.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7 entries, 0 to 6
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Date    7 non-null      object
dtypes: object(1)
memory usage: 188.0+ bytes


In [30]:
#NaT --- Not a Time
df4['DateConverted'] = pd.to_datetime(df4["Date"] , errors='coerce')
df4

Unnamed: 0,Date,DateConverted
0,2023-07-15,2023-07-15
1,2022-12-05,2022-12-05
2,2021/06/20,NaT
3,2020-01-10,2020-01-10
4,2019-11-25,2019-11-25
5,25-02-2018,NaT
6,First of May 1998,NaT


In [34]:
date2={'Date':['21-07-2001','2009-20-12','27/04/2027','First of June']}
df4=pd.DataFrame(date2)
df4['Date']=pd.to_datetime(df4['Date'],errors='coerce', dayfirst=True)
df4

Unnamed: 0,Date
0,2001-07-21
1,NaT
2,NaT
3,NaT
