## working with date and time 

In [1]:
import pandas as pd 
import numpy as np 

In [2]:
import time 
from datetime import date 

### use date time libraries to gather current time and date 

In [6]:
time.localtime()

time.struct_time(tm_year=2021, tm_mon=10, tm_mday=20, tm_hour=16, tm_min=17, tm_sec=52, tm_wday=2, tm_yday=293, tm_isdst=1)

In [7]:
date.today()

datetime.date(2021, 10, 20)

### use date time in a data source 

In [3]:
df=pd.read_csv('air_quality_no2_long.csv')

#### explore data 

In [4]:
df.head()

Unnamed: 0,city,country,date.utc,location,parameter,value,unit
0,Paris,FR,2019-06-21 00:00:00+00:00,FR04014,no2,20.0,µg/m³
1,Paris,FR,2019-06-20 23:00:00+00:00,FR04014,no2,21.8,µg/m³
2,Paris,FR,2019-06-20 22:00:00+00:00,FR04014,no2,26.5,µg/m³
3,Paris,FR,2019-06-20 21:00:00+00:00,FR04014,no2,24.9,µg/m³
4,Paris,FR,2019-06-20 20:00:00+00:00,FR04014,no2,21.4,µg/m³


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2068 entries, 0 to 2067
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   city       2068 non-null   object 
 1   country    2068 non-null   object 
 2   date.utc   2068 non-null   object 
 3   location   2068 non-null   object 
 4   parameter  2068 non-null   object 
 5   value      2068 non-null   float64
 6   unit       2068 non-null   object 
dtypes: float64(1), object(6)
memory usage: 113.2+ KB


In [8]:
df['city'].unique()

array(['Paris', 'Antwerpen', 'London'], dtype=object)

- as we can see the date time field is not stored as a date 
- its currently an object (ie string)
- we call a pandas function to parse this field as a date time 

In [10]:
df['date.utc']=pd.to_datetime(df['date.utc'],errors='coerce')

In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2068 entries, 0 to 2067
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype              
---  ------     --------------  -----              
 0   city       2068 non-null   object             
 1   country    2068 non-null   object             
 2   date.utc   2068 non-null   datetime64[ns, UTC]
 3   location   2068 non-null   object             
 4   parameter  2068 non-null   object             
 5   value      2068 non-null   float64            
 6   unit       2068 non-null   object             
dtypes: datetime64[ns, UTC](1), float64(1), object(5)
memory usage: 113.2+ KB


- once this field is a date we can call the date time library 
- this gives us access to parts of the date - eg month, day 
- this means we can create new columns based on those dates 
- then we can aggregate up to the date period - eg what was the avg pollution level per month


In [12]:
#choosing a row, identifying the month 
df['date.utc'][1000].month

5

In [14]:
#create a month column 
df['month']=pd.DatetimeIndex(df['date.utc']).month

In [15]:
#create a weekday (number) column 
df['weekday']=pd.DatetimeIndex(df['date.utc']).weekday

In [16]:
#replace that with a weekday name 
df['weekday']=df['date.utc'].dt.strftime('%a')

strftime formulas - special characters to set date formatting

[cheatsheet here](https://strftime.org/)

In [17]:
#check the data again 
df.head()

Unnamed: 0,city,country,date.utc,location,parameter,value,unit,month,weekday
0,Paris,FR,2019-06-21 00:00:00+00:00,FR04014,no2,20.0,µg/m³,6,Fri
1,Paris,FR,2019-06-20 23:00:00+00:00,FR04014,no2,21.8,µg/m³,6,Thu
2,Paris,FR,2019-06-20 22:00:00+00:00,FR04014,no2,26.5,µg/m³,6,Thu
3,Paris,FR,2019-06-20 21:00:00+00:00,FR04014,no2,24.9,µg/m³,6,Thu
4,Paris,FR,2019-06-20 20:00:00+00:00,FR04014,no2,21.4,µg/m³,6,Thu
