In [63]:
from datetime import time
import calendar

import pandas as pd
import numpy as np

from utils import load_dataset

In [64]:
df = load_dataset()

## Period of day
- `morning`: between 5:00 and 11:59
- `afternoon`: between 12:00 and 18:59
- `night`: between 7:00 and 4:59 based on Date-I.

In [65]:
def get_period(row):
    if time(5, 0, 0) <= row['Fecha-I'].time() <= time(11, 59, 0):
        return 'morning'
    elif time(12, 0, 0) <= row['Fecha-I'].time() <= time(18, 59, 0):
        return 'afternoon'
    else:
        return 'night'

In [66]:
df.reset_index(inplace=True)
df['day_period'] = df.apply(lambda row: get_period(row), axis=1)
df.set_index('Fecha-I', inplace=True)

In [67]:
df['day_period'].value_counts()

afternoon    25506
morning      25352
night        17348
Name: day_period, dtype: int64

## High season
`high_season`: 1 if `Date-I` is between in (15-Dic and 3-Mar), or (15-Jul y 31-Jul), or (11-Sep y 30-Sep), 0 if not.

In [70]:
df['high_season'] = 0
df.loc['07-15-2017':'07-31-2017', 'high_season'] = 1
df.loc['09-11-2017':'09-30-2017', 'high_season'] = 1
df.loc[:'03-03-2017', 'high_season'] = 1
df.loc['12-15-2017':, 'high_season'] = 1

## Difference in minutes 
`min_diff`: Difference between `Date-O` and `Date-I` in minutes.

In [1]:
def get_min_diff(df):
    df.reset_index(inplace=True)
    df['min_diff'] = (df['Fecha-O'] - df['Fecha-I']).astype('timedelta64[m]')
    df.set_index('Fecha-I', inplace=True)

## Delay 15: 
`delay_15`: 1 if `min_diff` > 15, 0 if not.

In [60]:
df['delay_15'] = df['min_diff'].apply(lambda x: 1 if x > 15 else 0)