In [1]:
import pandas as pd
import numpy as np

# to_datetime( )

- we observe that date can be specified in a lot different formats like below
- to_datetime can convert all these formats into the standart timestamp
![image.png](attachment:image.png)

### converting various date formats


In [16]:
a = ['2017-01-05','Jan 5, 2017','01/05/2016','2017.01.05','2017/01/05']

b = pd.to_datetime(a)
print(b)
print(type(b))

DatetimeIndex(['2017-01-05', '2017-01-05', '2016-01-05', '2017-01-05',
               '2017-01-05'],
              dtype='datetime64[ns]', freq=None)
<class 'pandas.core.indexes.datetimes.DatetimeIndex'>


### converting date along with time

In [15]:
a = ['2017-01-05 1:30:00 PM','Jan 5, 2017 13:30:00','01/05/2016','2017.01.05','2017/01/05']

b = pd.to_datetime(a)
print(b)
print(type(b))

DatetimeIndex(['2017-01-05 13:30:00', '2017-01-05 13:30:00',
               '2016-01-05 00:00:00', '2017-01-05 00:00:00',
               '2017-01-05 00:00:00'],
              dtype='datetime64[ns]', freq=None)
<class 'pandas.core.indexes.datetimes.DatetimeIndex'>


### Note : to_datetime( )  follows US date standards :
 - US : MM / DD / YYYY ( followed by to_datetime( ) )
 - Europe : DD / MM /YYYY

In [12]:
# Aiming for 5th January 2022, but it reads 1St May 2022 because it follows US standards
pd.to_datetime('05-01-2022') 

Timestamp('2022-05-01 00:00:00')

In [13]:
# It correctly recognizes this as 31 cannot be a month
pd.to_datetime('31-01-2022') 

Timestamp('2022-01-31 00:00:00')

### we can use 'dayfirst' to change the format to Europe

In [14]:
pd.to_datetime('05-01-2022', dayfirst=True) 

Timestamp('2022-01-05 00:00:00')

### we can use 'format' to define custom format to our date

In [26]:
pd.to_datetime('05$01$2022', format='%d$%m$%Y') 

Timestamp('2022-01-05 00:00:00')

### 'errors' attribute of to_datetime
-  if any one string is a garbage string, to_datetime( ) will throw an error
- because the 'errors' attribute is set to 'raise' which raises the value-error

In [32]:
# Throws an error

# a = ['2017-01-05 1:30:00 PM','Jan 5, 2017 13:30:00','01/05/2016','ada','2017.01.05','2017/01/05']
# pd.to_datetime(a)

In [33]:
# set error = 'ignore', to return the whole input as it is without parsing if a garbage string is obtained

a = ['2017-01-05 1:30:00 PM','Jan 5, 2017 13:30:00','01/05/2016','ada','2017.01.05','2017/01/05']
pd.to_datetime(a,errors='ignore')

Index(['2017-01-05 1:30:00 PM', 'Jan 5, 2017 13:30:00', '01/05/2016', 'ada',
       '2017.01.05', '2017/01/05'],
      dtype='object')

In [36]:
# set error = 'coerce', parse all invalid strings as NaT (Not A Timestamp) and others as usual

a = ['2017-01-05 1:30:00 PM','Jan 5, 2017 13:30:00','01/05/2016','ada','2017.01.05','2017/01/05']
pd.to_datetime(a,errors='coerce')

DatetimeIndex(['2017-01-05 13:30:00', '2017-01-05 13:30:00',
               '2016-01-05 00:00:00',                 'NaT',
               '2017-01-05 00:00:00', '2017-01-05 00:00:00'],
              dtype='datetime64[ns]', freq=None)

### Convert Epoches through to_datetime( )
- It is measured in seconds

![image.png](attachment:image.png)

- The unit of to_datetime( ) is set in miliseconds
- But the epoch time is in seconds
- So , we need to convert the unit to seconds

In [44]:
epoc = 1650639293 # current UNIX time

dt = pd.to_datetime([epoc] , unit='s') # converting the unit to seconds
dt

DatetimeIndex(['2022-04-22 14:54:53'], dtype='datetime64[ns]', freq=None)

In [50]:
# to view back epoc time, it is giving the answer in mili seconds
dt.view('int64')

array([1650639293000000000], dtype=int64)