## Handling Dates and Times

#### Converting Strings to Dates

In [2]:
# Load libraries
import numpy as np
import pandas as pd
# Create strings
date_strings = np.array(['03-04-2005 11:35 PM','23-05-2010 12:01 AM','04-09-2009 09:09 PM'])
# Convert to datetimes
date = [pd.to_datetime(date, format='%d-%m-%Y %I:%M %p') for date in date_strings]
date

[Timestamp('2005-04-03 23:35:00'),
 Timestamp('2010-05-23 00:01:00'),
 Timestamp('2009-09-04 21:09:00')]

### Handling Time Zones

In [12]:
import pandas as pd
dates = ['2023-01-01', '2023-01-02', '2023-01-03']
date_in_london_ = []
for date_time in dates:
    date = pd.Timestamp(date_time)
    # Set time zone
    date_in_london = date.tz_localize('Europe/London')
    date_in_london_.append(date_in_london)
    print(date_in_london)


2023-01-01 00:00:00+00:00
2023-01-02 00:00:00+00:00
2023-01-03 00:00:00+00:00


In [13]:
date_in_london  = date_in_london_[-1]
# Change time zone
date_in_london.tz_convert('Africa/Abidjan')

Timestamp('2023-01-03 00:00:00+0000', tz='Africa/Abidjan')

### Selecting Dates and Times

In [14]:
# Load library
import pandas as pd
# Create data frame
dataframe = pd.DataFrame()
# Create datetimes
dataframe['date'] = pd.date_range('1/1/2001', periods=100000, freq='H')
# Select observations between two datetimes
dataframe[(dataframe['date'] > '2002-1-1 01:00:00') &
(dataframe['date'] <= '2002-1-1 04:00:00')]

Unnamed: 0,date
8762,2002-01-01 02:00:00
8763,2002-01-01 03:00:00
8764,2002-01-01 04:00:00


#### Breaking Up Date Data into Multiple Features

In [17]:
# Load library
import pandas as pd
# Create data frame
dataframe = pd.DataFrame()
# Create five dates
dataframe['date'] = pd.date_range('1/1/2001', periods=150, freq='W')
# Create features for year, month, day, hour, and minute
# Create features for year, month, day, hour, and minute
dataframe['year'] = dataframe['date'].dt.year
dataframe['month'] = dataframe['date'].dt.month  # Fixed the typo here
dataframe['day'] = dataframe['date'].dt.day
dataframe['hour'] = dataframe['date'].dt.hour
dataframe['minute'] = dataframe['date'].dt.minute
dataframe


Unnamed: 0,date,year,month,day,hour,minute
0,2001-01-07,2001,1,7,0,0
1,2001-01-14,2001,1,14,0,0
2,2001-01-21,2001,1,21,0,0
3,2001-01-28,2001,1,28,0,0
4,2001-02-04,2001,2,4,0,0
...,...,...,...,...,...,...
145,2003-10-19,2003,10,19,0,0
146,2003-10-26,2003,10,26,0,0
147,2003-11-02,2003,11,2,0,0
148,2003-11-09,2003,11,9,0,0


###  Encoding Days of the Week

In [19]:
import pandas as pd

dates = pd.Series(pd.date_range("2/2/2002", periods=3, freq="M"))
# Show days of the week
day_names = dates.dt.day_name()
print(day_names)

0    Thursday
1      Sunday
2     Tuesday
dtype: object


### Creating a Lagged Feature

In [20]:
# Load library
import pandas as pd
# Create data frame
dataframe = pd.DataFrame()
# Create data
dataframe["dates"] = pd.date_range("1/1/2001", periods=5, freq="D")
dataframe["stock_price"] = [1.1,2.2,3.3,4.4,5.5]
# Lagged values by one row
dataframe["previous_days_stock_price"] = dataframe["stock_price"].shift(1)
# Show data frame
dataframe

Unnamed: 0,dates,stock_price,previous_days_stock_price
0,2001-01-01,1.1,
1,2001-01-02,2.2,1.1
2,2001-01-03,3.3,2.2
3,2001-01-04,4.4,3.3
4,2001-01-05,5.5,4.4


###  Handling Missing Data in Time Series

In [21]:
# Load libraries
import pandas as pd
import numpy as np
# Create date
time_index = pd.date_range("01/01/2010", periods=5, freq="M")
# Create data frame, set index
dataframe = pd.DataFrame(index=time_index)
# Create feature with a gap of missing values
dataframe["Sales"] = [1.0,2.0,np.nan,np.nan,5.0]# Interpolate missing values
dataframe.interpolate()

Unnamed: 0,Sales
2010-01-31,1.0
2010-02-28,2.0
2010-03-31,3.0
2010-04-30,4.0
2010-05-31,5.0


In [22]:
# Forward-fill
dataframe.ffill()

Unnamed: 0,Sales
2010-01-31,1.0
2010-02-28,2.0
2010-03-31,2.0
2010-04-30,2.0
2010-05-31,5.0
