## Date and Time

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("https://raw.githubusercontent.com/justmarkham/pandas-videos/master/data/ufo.csv")
df.head()

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
0,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
1,Willingboro,,OTHER,NJ,6/30/1930 20:00
2,Holyoke,,OVAL,CO,2/15/1931 14:00
3,Abilene,,DISK,KS,6/1/1931 13:00
4,New York Worlds Fair,,LIGHT,NY,4/18/1933 19:00


In [3]:
df.dtypes

City               object
Colors Reported    object
Shape Reported     object
State              object
Time               object
dtype: object

In [None]:
df.to_csv("ufo.csv")

**check the data type of each column**

In [None]:
df.dtypes

Here,data type of `Time` column is string

**Converting `Time` column into `DateTime` by using `pd.to_datetime()` method**

In [5]:
df['Time'] = pd.to_datetime(df['Time'])
print(df.dtypes)

City                       object
Colors Reported            object
Shape Reported             object
State                      object
Time               datetime64[ns]
dtype: object


In [6]:
df.head(3)

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
0,Ithaca,,TRIANGLE,NY,1930-06-01 22:00:00
1,Willingboro,,OTHER,NJ,1930-06-30 20:00:00
2,Holyoke,,OVAL,CO,1931-02-15 14:00:00


### Properties of `Date and Time` column

**Reference**

https://pandas.pydata.org/pandas-docs/stable/reference/series.html#api-series-dt

- Series.dt.year = Extract only `year` from the datetime
- Series.dt.month = Extract only `month` from the datatime
- Series.dt.day = Extract only `day` from the datatime
- Series.dt.dayofweek = Extract only `day of the week` from the datatime(Monday = 0 ,Sunday=6)
- Series.dt.quater = Extract only `Quarter` from the datatime

In [8]:
series = df['Time']

In [10]:
df['Year'] = series.dt.year

In [11]:
df.head()

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time,Year
0,Ithaca,,TRIANGLE,NY,1930-06-01 22:00:00,1930
1,Willingboro,,OTHER,NJ,1930-06-30 20:00:00,1930
2,Holyoke,,OVAL,CO,1931-02-15 14:00:00,1931
3,Abilene,,DISK,KS,1931-06-01 13:00:00,1931
4,New York Worlds Fair,,LIGHT,NY,1933-04-18 19:00:00,1933


In [13]:
df['Month'] = series.dt.month

In [14]:
df.head()

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time,Year,Month
0,Ithaca,,TRIANGLE,NY,1930-06-01 22:00:00,1930,6
1,Willingboro,,OTHER,NJ,1930-06-30 20:00:00,1930,6
2,Holyoke,,OVAL,CO,1931-02-15 14:00:00,1931,2
3,Abilene,,DISK,KS,1931-06-01 13:00:00,1931,6
4,New York Worlds Fair,,LIGHT,NY,1933-04-18 19:00:00,1933,4


In [16]:
df['M_name'] = series.dt.month_name()

In [17]:
df.head(2)

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time,Year,Month,M_name
0,Ithaca,,TRIANGLE,NY,1930-06-01 22:00:00,1930,6,June
1,Willingboro,,OTHER,NJ,1930-06-30 20:00:00,1930,6,June


In [19]:
df['day'] = series.dt.day
df.head(2)

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time,Year,Month,M_name,day
0,Ithaca,,TRIANGLE,NY,1930-06-01 22:00:00,1930,6,June,1
1,Willingboro,,OTHER,NJ,1930-06-30 20:00:00,1930,6,June,30


In [21]:
df['d_name'] = series.dt.day_name()
df.head(2)

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time,Year,Month,M_name,day,d_name
0,Ithaca,,TRIANGLE,NY,1930-06-01 22:00:00,1930,6,June,1,Sunday
1,Willingboro,,OTHER,NJ,1930-06-30 20:00:00,1930,6,June,30,Monday


In [23]:
df['Q']= series.dt.quarter

In [24]:
series.dt.dayofweek

0        6
1        0
2        6
3        0
4        1
5        5
6        5
7        2
8        3
9        1
10       6
11       3
12       4
13       4
14       6
15       2
16       2
17       0
18       2
19       4
20       1
21       6
22       6
23       4
24       5
25       5
26       5
27       6
28       3
29       4
        ..
18211    3
18212    3
18213    3
18214    4
18215    4
18216    4
18217    4
18218    4
18219    4
18220    4
18221    4
18222    4
18223    4
18224    4
18225    5
18226    5
18227    5
18228    6
18229    6
18230    6
18231    6
18232    6
18233    6
18234    6
18235    6
18236    6
18237    6
18238    6
18239    6
18240    6
Name: Time, Length: 18241, dtype: int64

In [None]:
df['Year'] = series.dt.year
df.head(2)

In [None]:
df['Month'] = series.dt.month
df.head(2)

In [None]:
df['Month_Name'] = series.dt.month_name()
df.head(2)

In [None]:
df['Day'] = series.dt.day
df.head(2)

In [None]:
df['Week'] = series.dt.week
df.head(2)

In [None]:
df['Weekday_name'] = series.dt.weekday_name
df.head(2)

In [None]:
df['Weekday'] = series.dt.weekday
df.head(2)
# Mon = 0, Tue = 1, Wed = 2, Thus = 3, Fri = 4, Sat = 5, Sun = 6

In [None]:
df['Quarter'] = series.dt.quarter
df.head(2)

In [None]:
df['Daysinmonth'] = series.dt.daysinmonth
df.head(2)

## Note

### day_frist

In [30]:
dt = '09-01-2020' # 9th january 2020

In [31]:
type(dt)

str

In [32]:
Date = pd.to_datetime(dt)
Date

Timestamp('2020-09-01 00:00:00')

In [29]:
Date # 1st september 2020

Timestamp('2020-09-01 00:00:00')

In [28]:
# In order handle above mistake we have to use day_first = True in to_date method
pd.to_datetime(dt,dayfirst=True) # 9th january 2020

Timestamp('2020-01-09 00:00:00')

### format 

In [33]:
dt = "09$01$2018" # own format 
# 9th jan 2018

In [34]:
pd.to_datetime(dt)

ValueError: ('Unknown string format:', '09$01$2018')

In [36]:
# In order to handle own format we have to use format parameter 
pd.to_datetime(dt,dayfirst=True,format='%d$%m$%Y')

Timestamp('2018-01-09 00:00:00')

### error

In [38]:
dates = ['2017-01-05', 
         'Jan 5, 2017', 
         '01/05/2017', 
         '2017.01.05', 
         '2017/01/05',
         '20170105',
         'abc']



pd.to_datetime(dates)

ValueError: ('Unknown string format:', 'abc')

In [40]:
pd.to_datetime(dates,errors='ignore') # By keeping errors = ignore,date conversion not going to perform

array(['2017-01-05', 'Jan 5, 2017', '01/05/2017', '2017.01.05',
       '2017/01/05', '20170105', 'abc'], dtype=object)

In [41]:
# if we want to convert all the values into date format except invalid string then keep errors=coerc
pd.to_datetime(dates,errors='coerce')

DatetimeIndex(['2017-01-05', '2017-01-05', '2017-01-05', '2017-01-05',
               '2017-01-05', '2017-01-05',        'NaT'],
              dtype='datetime64[ns]', freq=None)

## Operations

**Create dates DataFrame by using `date_range` method**

https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.date_range.html?highlight=date_range#pandas.date_range

**`pd.date_range('start=None', 'end=None', 'periods=None', 'freq=None')`**

- start  : Start Date
-  end   : End Date
- periods : Number of periods to generate
- freq : Frequency strings can have multiples
    Example : freq
        M : Month and frequency
        W : Weekly frequency
        
https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases

**1. Specify `start` and `end`, with the default `daily frequency`.**

In [45]:
pd.date_range(start='2019-10-04',end='2019-10-20')

DatetimeIndex(['2019-10-04', '2019-10-05', '2019-10-06', '2019-10-07',
               '2019-10-08', '2019-10-09', '2019-10-10', '2019-10-11',
               '2019-10-12', '2019-10-13', '2019-10-14', '2019-10-15',
               '2019-10-16', '2019-10-17', '2019-10-18', '2019-10-19',
               '2019-10-20'],
              dtype='datetime64[ns]', freq='D')

**2. Specify `start` and `periods`, the number of periods (days).**

In [49]:
pd.date_range(start='2019-10-04',periods=30)

DatetimeIndex(['2019-10-04', '2019-10-05', '2019-10-06', '2019-10-07',
               '2019-10-08', '2019-10-09', '2019-10-10', '2019-10-11',
               '2019-10-12', '2019-10-13', '2019-10-14', '2019-10-15',
               '2019-10-16', '2019-10-17', '2019-10-18', '2019-10-19',
               '2019-10-20', '2019-10-21', '2019-10-22', '2019-10-23',
               '2019-10-24', '2019-10-25', '2019-10-26', '2019-10-27',
               '2019-10-28', '2019-10-29', '2019-10-30', '2019-10-31',
               '2019-11-01', '2019-11-02'],
              dtype='datetime64[ns]', freq='D')

**3. Specify `end` and `periods`, the number of periods (days).**

In [50]:
pd.date_range(end='2019-10-08',periods=5)

DatetimeIndex(['2019-10-04', '2019-10-05', '2019-10-06', '2019-10-07',
               '2019-10-08'],
              dtype='datetime64[ns]', freq='D')

**4. Specify `start`, `end`, and `periods`; the frequency is generated automatically (linearly spaced).**

In [51]:
pd.date_range(start='2019-10-01',end='2019-10-30',periods=3)

DatetimeIndex(['2019-10-01 00:00:00', '2019-10-15 12:00:00',
               '2019-10-30 00:00:00'],
              dtype='datetime64[ns]', freq=None)

**5. Changed the freq (frequency) to 'M' (month end frequency).**

In [52]:
pd.date_range(start='2019-10-01',periods=5,freq='m')

DatetimeIndex(['2019-10-31', '2019-11-30', '2019-12-31', '2020-01-31',
               '2020-02-29'],
              dtype='datetime64[ns]', freq='M')

**6. Multiples are allowed**

In [53]:
pd.date_range(start='2019-10-01',periods=5,freq='3m')

DatetimeIndex(['2019-10-31', '2020-01-31', '2020-04-30', '2020-07-31',
               '2020-10-31'],
              dtype='datetime64[ns]', freq='3M')

In [None]:
d = pd.to_datetime('08-01-2020',dayfirst=True)

In [None]:
d.day