In [1]:
from os import chdir
chdir('/home/jovyan')

In [2]:
from lib import postgres as pg

# 1. Feature engineering

Create new features to be used by the model

In [3]:
check_target = pg.load_query_to_df("""
SELECT main.* FROM
(
SELECT
    *
FROM 
    clean.individual_household_power_consumption
) main
ORDER BY 1 ASC
LIMIT 10;
""")
check_target.head()

Unnamed: 0,_id,datetime,global_active_power,global_reactive_power,voltage,current,sub_metering_1,sub_metering_2,sub_metering_3
0,1,2006-12-16 17:24:00,4.216,0.418,234.84,18.4,0.0,0.06,1.02
1,2,2006-12-16 17:25:00,5.36,0.436,233.63,23.0,0.0,0.06,0.96
2,3,2006-12-16 17:26:00,5.374,0.498,233.29,23.0,0.0,0.12,1.02
3,4,2006-12-16 17:27:00,5.388,0.502,233.74,23.0,0.0,0.06,1.02
4,5,2006-12-16 17:28:00,3.666,0.528,235.68,15.8,0.0,0.06,1.02


In [4]:
check_target = pg.load_query_to_df("""
SELECT main.* FROM
(
SELECT
    *
FROM 
    clean.individual_household_power_consumption_h
) main
ORDER BY 1 ASC
LIMIT 10;
""")
check_target.head()

Unnamed: 0,_id,datetime,global_active_power,global_reactive_power,voltage,current,sub_metering_1,sub_metering_2,sub_metering_3
0,1,2006-12-16 17:00:00,4.223,0.229,234.644,18.1,0.0,0.032,1.012
1,2,2006-12-16 18:00:00,3.632,0.08,234.58,15.6,0.0,0.403,1.012
2,3,2006-12-16 19:00:00,3.4,0.085,233.233,14.503,0.0,0.086,1.001
3,4,2006-12-16 20:00:00,3.269,0.075,234.072,13.917,0.0,0.0,1.007
4,5,2006-12-16 21:00:00,3.056,0.077,237.159,13.047,0.0,0.025,1.033


## 1.2. Time features

The central objective for load forecasting is to predict the power consumption for an instant **t**, given information about its behavior in the past and additional information about other variables that might be known not only in the past but also in the future, such as *calendar information*. The Individual Household Power Consumption Data Set, referred here as **IHPC**, has different *electric signals* in the household and the *timestamp* for an instant, from which is possible to extract calendar information. Ther following features will be generated:

Feature | Type | Description
:---: | :---: | :---
**is_workday** | *BOOLEAN* | day is between monday and friday
**is_morning** | *BOOLEAN* | hour is between 07:00h and 11:59h
**is_afternoon** | *BOOLEAN* | hour is between 12:00h and 17:59h
**is_night** | *BOOLEAN* | hour is between 18:00h and 23:59h
**is_dawn** | *BOOLEAN* | hour is between 00:00h and 06:59h
**is_monday** | *BOOLEAN* | day is monday
**is_tuesday** | *BOOLEAN* | day is tuesday
**is_wednesday** | *BOOLEAN* | day is wednesday
**is_thursday** | *BOOLEAN* | day is thursday
**is_friday** | *BOOLEAN* | day is friday
**is_saturday** | *BOOLEAN* | day is saturday
**is_sunday** | *BOOLEAN* | day is sunday

In [16]:
check_target = pg.load_query_to_df("""
SELECT main.* FROM
(
SELECT
    present.*,
    CASE WHEN EXTRACT( DOW FROM (datetime + INTERVAL '1' MINUTE) ) in (0,6) THEN FALSE ELSE TRUE END as is_workday,
    EXTRACT( DOW FROM (datetime + INTERVAL '1' MINUTE) ) + 1 as day_of_week_t1,
    EXTRACT( DOY FROM (datetime + INTERVAL '1' MINUTE) ) as day_of_year_t1,
    EXTRACT( WEEK FROM (datetime + INTERVAL '1' MINUTE) ) as week_of_year_t1,
    EXTRACT( DAY FROM (datetime + INTERVAL '1' MINUTE) ) as day_of_month_t1,
    EXTRACT( HOUR FROM (datetime + INTERVAL '1' MINUTE) ) as hour_of_day_t1
FROM 
    clean.individual_household_power_consumption present    
) main
ORDER BY 1 ASC
LIMIT 10;
""")
check_target.head()

Unnamed: 0,_id,datetime,global_active_power,global_reactive_power,voltage,current,sub_metering_1,sub_metering_2,sub_metering_3,is_workday,day_of_week_t1,day_of_year_t1,week_of_year_t1,day_of_month_t1,hour_of_day_t1
0,1,2006-12-16 17:24:00,4.216,0.418,234.84,18.4,0.0,0.06,1.02,False,7.0,350.0,50.0,16.0,17.0
1,2,2006-12-16 17:25:00,5.36,0.436,233.63,23.0,0.0,0.06,0.96,False,7.0,350.0,50.0,16.0,17.0
2,3,2006-12-16 17:26:00,5.374,0.498,233.29,23.0,0.0,0.12,1.02,False,7.0,350.0,50.0,16.0,17.0
3,4,2006-12-16 17:27:00,5.388,0.502,233.74,23.0,0.0,0.06,1.02,False,7.0,350.0,50.0,16.0,17.0
4,5,2006-12-16 17:28:00,3.666,0.528,235.68,15.8,0.0,0.06,1.02,False,7.0,350.0,50.0,16.0,17.0
