# Feature Engineering

### Import libraries

In [69]:
import pandas as pd
from datetime import datetime, date, time 

In [70]:
# Options
pd.set_option('display.max_columns', None)

### Import Data

In [71]:
## For train-data
outputfile = "pp_sg_train_features.csv"
df = pd.read_csv("./pp_sg_train_cleaned.csv", sep=";")
df.head()

Unnamed: 0,P24,P44,P42,P33,P23,P25,P21,P31,P53,P32,P22,P52,P51,P43,datetime,ferien,feiertag,covid_19,olma_offa,temperature_2m_max,temperature_2m_min,rain_sum,snowfall_sum
0,206.0,253.0,0.0,89.0,127.0,198.0,221.0,130.0,0.0,63.0,74.0,0.0,57.0,32.0,02.10.2019 07:25,1,0,0,0,14.701,6.601,16.9,0.0
1,87.0,87.0,0.0,4.0,24.0,173.0,69.0,25.0,0.0,26.0,0.0,0.0,13.0,5.0,04.10.2019 15:28,1,0,0,0,12.301,3.551,8.0,0.0
2,99.0,106.0,0.0,3.0,38.0,175.0,68.0,21.0,0.0,27.0,3.0,0.0,14.0,11.0,04.10.2019 15:43,1,0,0,0,12.301,3.551,8.0,0.0
3,105.0,109.0,0.0,3.0,37.0,178.0,78.0,22.0,0.0,30.0,1.0,0.0,18.0,15.0,04.10.2019 15:58,1,0,0,0,12.301,3.551,8.0,0.0
4,104.0,133.0,0.0,8.0,44.0,183.0,91.0,22.0,0.0,32.0,1.0,0.0,24.0,21.0,04.10.2019 16:13,1,0,0,0,12.301,3.551,8.0,0.0


In [72]:
## For test-data
#outputfile = "pp_sg_test_features.csv"
#df = pd.read_csv("./pp_sg_test_cleaned.csv", sep=";")
#df.head()

### Extract Time Components

In [73]:
# Make Object to datetime
df['datetime'] = pd.to_datetime(df['datetime'], format='%d.%m.%Y %H:%M')

# Extract Date
df['date'] = df['datetime'].dt.date

# Extract Year
df['year'] = df['datetime'].dt.year

# Extract Month
df['month'] = df['datetime'].dt.month

# Extract Day
df['day'] = df['datetime'].dt.day

# Extract Weekday
df['weekdayname'] = df['datetime'].dt.day_name()
df['weekday'] = df['datetime'].dt.dayofweek

# Extract Time
df['time'] = df['datetime'].dt.strftime('%H:%M')

# Extract Hour
df['hour'] = df['datetime'].dt.hour

# Extract Minute
df['minute'] = df['datetime'].dt.minute

In [74]:
df.head(10)

Unnamed: 0,P24,P44,P42,P33,P23,P25,P21,P31,P53,P32,P22,P52,P51,P43,datetime,ferien,feiertag,covid_19,olma_offa,temperature_2m_max,temperature_2m_min,rain_sum,snowfall_sum,date,year,month,day,weekdayname,weekday,time,hour,minute
0,206.0,253.0,0.0,89.0,127.0,198.0,221.0,130.0,0.0,63.0,74.0,0.0,57.0,32.0,2019-10-02 07:25:00,1,0,0,0,14.701,6.601,16.9,0.0,2019-10-02,2019,10,2,Wednesday,2,07:25,7,25
1,87.0,87.0,0.0,4.0,24.0,173.0,69.0,25.0,0.0,26.0,0.0,0.0,13.0,5.0,2019-10-04 15:28:00,1,0,0,0,12.301,3.551,8.0,0.0,2019-10-04,2019,10,4,Friday,4,15:28,15,28
2,99.0,106.0,0.0,3.0,38.0,175.0,68.0,21.0,0.0,27.0,3.0,0.0,14.0,11.0,2019-10-04 15:43:00,1,0,0,0,12.301,3.551,8.0,0.0,2019-10-04,2019,10,4,Friday,4,15:43,15,43
3,105.0,109.0,0.0,3.0,37.0,178.0,78.0,22.0,0.0,30.0,1.0,0.0,18.0,15.0,2019-10-04 15:58:00,1,0,0,0,12.301,3.551,8.0,0.0,2019-10-04,2019,10,4,Friday,4,15:58,15,58
4,104.0,133.0,0.0,8.0,44.0,183.0,91.0,22.0,0.0,32.0,1.0,0.0,24.0,21.0,2019-10-04 16:13:00,1,0,0,0,12.301,3.551,8.0,0.0,2019-10-04,2019,10,4,Friday,4,16:13,16,13
5,108.0,131.0,0.0,17.0,52.0,191.0,98.0,34.0,0.0,36.0,6.0,0.0,33.0,29.0,2019-10-04 16:25:00,1,0,0,0,12.301,3.551,8.0,0.0,2019-10-04,2019,10,4,Friday,4,16:25,16,25
6,68.0,68.0,0.0,21.0,19.0,133.0,108.0,23.0,0.0,9.0,21.0,0.0,24.0,6.0,2019-10-08 14:26:00,1,0,0,0,17.451,5.001,2.7,0.0,2019-10-08,2019,10,8,Tuesday,1,14:26,14,26
7,67.0,70.0,0.0,21.0,13.0,133.0,113.0,12.0,0.0,11.0,22.0,0.0,24.0,6.0,2019-10-08 14:41:00,1,0,0,0,17.451,5.001,2.7,0.0,2019-10-08,2019,10,8,Tuesday,1,14:41,14,41
8,61.0,44.0,0.0,12.0,4.0,132.0,108.0,18.0,0.0,13.0,27.0,0.0,28.0,15.0,2019-10-08 14:56:00,1,0,0,0,17.451,5.001,2.7,0.0,2019-10-08,2019,10,8,Tuesday,1,14:56,14,56
9,64.0,35.0,0.0,13.0,16.0,135.0,102.0,15.0,0.0,13.0,26.0,0.0,30.0,21.0,2019-10-08 15:11:00,1,0,0,0,17.451,5.001,2.7,0.0,2019-10-08,2019,10,8,Tuesday,1,15:11,15,11


### Write CSV

In [75]:
#df.to_csv(outputfile, sep=";", index=False)