# How to extract `Day of Year' using pandas ?

In [18]:
import numpy as np
import pandas as pd
import calendar
import time
import datetime
print("numpy version :", np.__version__)
print("pandas version :", pd.__version__)

numpy version : 1.16.4
pandas version : 0.25.1


### Creating Scenario

In [35]:
df = pd.DataFrame({"TIME_STAMP":['2017-01-01', '2017-01-09', '2017-01-23',
                                 #'2017-11-06', '2017-11-26', '2017-11-27',
                                 '2017-02-01','2017-12-31']})
df

Unnamed: 0,TIME_STAMP
0,2017-01-01
1,2017-01-09
2,2017-01-23
3,2017-02-01
4,2017-12-31


 - Day of Year
 - Week of Year
 - Day of Week (number 0,1,2,3,4,5,6)
 - Day of Week (name)

In [36]:
print(calendar.month(2017, 2))

   February 2017
Mo Tu We Th Fr Sa Su
       1  2  3  4  5
 6  7  8  9 10 11 12
13 14 15 16 17 18 19
20 21 22 23 24 25 26
27 28



In [37]:
## Expected Output

## DAY OF YEAR

In [38]:
df["TIME_STAMP"] = pd.to_datetime(df["TIME_STAMP"])

In [39]:
df["TIME_STAMP"].dt.dayofyear

0      1
1      9
2     23
3     32
4    365
Name: TIME_STAMP, dtype: int64

In [40]:
df["day_of_year"] = df["TIME_STAMP"].dt.dayofyear
df

Unnamed: 0,TIME_STAMP,day_of_year
0,2017-01-01,1
1,2017-01-09,9
2,2017-01-23,23
3,2017-02-01,32
4,2017-12-31,365


## Week of Year

In [41]:
df["TIME_STAMP"].dt.weekofyear

0    52
1     2
2     4
3     5
4    52
Name: TIME_STAMP, dtype: int64

In [27]:
df["TIME_STAMP"]

0   2017-01-01
1   2017-10-09
2   2017-10-23
3   2017-11-06
4   2017-11-26
5   2017-11-27
6   2017-12-31
Name: TIME_STAMP, dtype: datetime64[ns]

In [42]:
df["week_of_year"] = df["TIME_STAMP"].dt.weekofyear
df

Unnamed: 0,TIME_STAMP,day_of_year,week_of_year
0,2017-01-01,1,52
1,2017-01-09,9,2
2,2017-01-23,23,4
3,2017-02-01,32,5
4,2017-12-31,365,52


In [None]:
DatetimeIndex.isocalendar().week

## Day of Week (Numeric)

In [25]:
df["TIME_STAMP"].dt.dayofweek

0    6
1    0
2    0
3    0
4    6
5    0
6    6
Name: TIME_STAMP, dtype: int64

In [33]:
df["day_of_week"] = df["TIME_STAMP"].dt.dayofweek
df

Unnamed: 0,TIME_STAMP,dayofyear,day_of_year,day_of_week
0,2017-01-01,1,1,6
1,2017-10-09,282,282,0
2,2017-10-23,296,296,0
3,2017-11-06,310,310,0
4,2017-11-26,330,330,6
5,2017-11-27,331,331,0
6,2017-12-31,365,365,6


## Day of Week (Name)

In [43]:
df["TIME_STAMP"].dt.day_name()

0       Sunday
1       Monday
2       Monday
3    Wednesday
4       Sunday
Name: TIME_STAMP, dtype: object

In [44]:
df["day_of_week"] = df["TIME_STAMP"].dt.day_name()
df

Unnamed: 0,TIME_STAMP,day_of_year,week_of_year,day_of_week
0,2017-01-01,1,52,Sunday
1,2017-01-09,9,2,Monday
2,2017-01-23,23,4,Monday
3,2017-02-01,32,5,Wednesday
4,2017-12-31,365,52,Sunday


## Method 1 : `pandas.Series.dt.date`

Returns the date part of Timestamps without timezone information

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7 entries, 0 to 6
Data columns (total 1 columns):
TIME_STAMP    7 non-null object
dtypes: object(1)
memory usage: 136.0+ bytes


In [5]:
# Convert time_stamp column to datetime object
df["TIME_STAMP"] = pd.to_datetime(df["TIME_STAMP"])

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7 entries, 0 to 6
Data columns (total 1 columns):
TIME_STAMP    7 non-null datetime64[ns]
dtypes: datetime64[ns](1)
memory usage: 136.0 bytes


In [8]:
df

Unnamed: 0,TIME_STAMP
0,2017-10-01 15:23:25
1,2017-10-09 00:12:58
2,2017-10-23 19:55:03
3,2017-11-06 19:24:31
4,2017-11-26 12:25:49
5,2017-11-27 18:11:49
6,2017-12-18 08:02:36


In [7]:
# Code to extract Date
df["TIME_STAMP"].dt.date

0    2017-10-01
1    2017-10-09
2    2017-10-23
3    2017-11-06
4    2017-11-26
5    2017-11-27
6    2017-12-18
Name: TIME_STAMP, dtype: object

In [9]:
# Add the extracted date as a new column
df["DATE"] = df["TIME_STAMP"].dt.date
df

Unnamed: 0,TIME_STAMP,DATE
0,2017-10-01 15:23:25,2017-10-01
1,2017-10-09 00:12:58,2017-10-09
2,2017-10-23 19:55:03,2017-10-23
3,2017-11-06 19:24:31,2017-11-06
4,2017-11-26 12:25:49,2017-11-26
5,2017-11-27 18:11:49,2017-11-27
6,2017-12-18 08:02:36,2017-12-18


## Method 2 : Using `pandas.Series.dt.strftime`

Returns the string representation of the Date, depending on the selected format. Format must be a string containing one or several directives.

In [13]:
# Convert time_stamp column to datetime object
df["TIME_STAMP"] = pd.to_datetime(df["TIME_STAMP"])

### Directive

- `%Y` :Year with century as a decimal number e.g. 1990, 2020
- `%m` : Month as a decimal number [01,12]
- `%b` : Locale’s abbreviated month name [Jan, Dec]
- `%d` : Day of the month as a decimal number [01,31]
 
[pandas.Period.strftime](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Period.strftime.html) 

In [17]:
# 2020-12-01, "%Y-%m-%d"
df["TIME_STAMP"].dt.strftime("%Y-%m-%d")

0    2017-10-01
1    2017-10-09
2    2017-10-23
3    2017-11-06
4    2017-11-26
5    2017-11-27
6    2017-12-18
Name: TIME_STAMP, dtype: object

In [18]:
# 2020-Dec-01, "%Y-%b-%d"
df["TIME_STAMP"].dt.strftime("%Y-%b-%d")

0    2017-Oct-01
1    2017-Oct-09
2    2017-Oct-23
3    2017-Nov-06
4    2017-Nov-26
5    2017-Nov-27
6    2017-Dec-18
Name: TIME_STAMP, dtype: object

In [19]:
# 01-12-2020, "%d-%m-%Y"
df["TIME_STAMP"].dt.strftime("%d-%m-%Y")

0    01-10-2017
1    09-10-2017
2    23-10-2017
3    06-11-2017
4    26-11-2017
5    27-11-2017
6    18-12-2017
Name: TIME_STAMP, dtype: object

In [20]:
# 01-12-20, "%d-%m-%Y"
df["TIME_STAMP"].dt.strftime("%d-%m-%y")

0    01-10-17
1    09-10-17
2    23-10-17
3    06-11-17
4    26-11-17
5    27-11-17
6    18-12-17
Name: TIME_STAMP, dtype: object

# Summary

 - Convert the timestamp column into datetime object
 - series.dt.date
 - series.dt.strftime(directives)
 - series.dt.strftime("%Y-%m-%d")

# THANK YOU :)

In [4]:
# test data from OP, with values already in a datetime format
data = {'to_date': [pd.Timestamp('2014-01-24 13:03:12.050000'), pd.Timestamp('2014-01-27 11:57:18.240000'), pd.Timestamp('2014-01-23 10:07:47.660000')],
        'from_date': [pd.Timestamp('2014-01-26 23:41:21.870000'), pd.Timestamp('2014-01-27 15:38:22.540000'), pd.Timestamp('2014-01-23 18:50:41.420000')]}

# test dataframe; the columns must be in a datetime format; use pandas.to_datetime if needed
df = pd.DataFrame(data)
df

Unnamed: 0,to_date,from_date
0,2014-01-24 13:03:12.050,2014-01-26 23:41:21.870
1,2014-01-27 11:57:18.240,2014-01-27 15:38:22.540
2,2014-01-23 10:07:47.660,2014-01-23 18:50:41.420


In [5]:
df['time_delta'] = (df.from_date - df.to_date)

In [8]:
df.time_delta/np.timedelta64(1, 'D')

0    2.443169
1    0.153522
2    0.363122
Name: time_delta, dtype: float64

In [16]:
df.time_delta/np.timedelta64(1, 'h')

0    58.636061
1     3.684528
2     8.714933
Name: time_delta, dtype: float64

In [15]:
df.time_delta/np.timedelta64(1, 'm')

0    3518.163667
1     221.071667
2     522.896000
Name: time_delta, dtype: float64

In [26]:
df.time_delta/np.timedelta64(1, 's')

0    211089.82
1     13264.30
2     31373.76
Name: time_delta, dtype: float64

In [18]:
df.time_delta/pd.Timedelta(hours=1)

0    58.636061
1     3.684528
2     8.714933
Name: time_delta, dtype: float64

In [19]:
df.time_delta/pd.Timedelta(minutes=1)

0    3518.163667
1     221.071667
2     522.896000
Name: time_delta, dtype: float64

In [24]:
df.time_delta/pd.Timedelta(seconds=1)

0    211089.82
1     13264.30
2     31373.76
Name: time_delta, dtype: float64