# Feature engineering

In [20]:
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.feature_selection import RFE

## Read dataset

In [14]:
data = pd.read_csv('data_droped_nov18_dummy_final.csv')

In [15]:
data = data.set_index('datetime')
data.index = pd.to_datetime(data.index)
data = data.drop(['Unnamed: 0.2','Unnamed: 0.1','Unnamed: 0', 'diff'],axis=1)
data = data.reindex(columns=['lots_available', 'total_lot',  'carpark_number','x_coord', 'y_coord',
'car_park_decks', 'gantry_height','BASEMENT CAR PARK', 'COVERED CAR PARK','MECHANISED AND SURFACE CAR PARK', 'MULTI-STOREY CAR PARK',
'SURFACE CAR PARK', '7AM-10.30PM', '7AM-7PM', 'NO', 'WHOLE DAY', 'NO.1','SUN & PH FR 1PM-10.30PM', 'SUN & PH FR 7AM-10.30PM', 'NO.2', 
'YES','N', 'Y'])

## Create time as a feature, day of week, hour of day

In [16]:
data["day_of_week"] = data.index.weekday
data["hour_of_day"] = data.index.hour

In [17]:
features = ['total_lot', 'carpark_number', 'x_coord', 'y_coord',
       'car_park_decks', 'gantry_height', 'BASEMENT CAR PARK',
       'COVERED CAR PARK', 'MECHANISED AND SURFACE CAR PARK',
       'MULTI-STOREY CAR PARK', 'SURFACE CAR PARK', '7AM-10.30PM', '7AM-7PM',
       'NO', 'WHOLE DAY', 'NO.1', 'SUN & PH FR 1PM-10.30PM',
       'SUN & PH FR 7AM-10.30PM', 'NO.2', 'YES', 'N', 'Y', 'day_of_week','hour_of_day']

## X,Y split

In [18]:
x = data[features]

In [19]:
y = data['lots_available']

## Build a model

In [22]:
model = DecisionTreeRegressor()

## Define RFE(recursive feature elimination)

In [23]:
rfe = RFE(model, n_features_to_select=12)

## Select top 12 features by fitting x,y to RFE

In [24]:
fit = rfe.fit(x, y)

## Result

In [25]:
#Create a dataframe for the results 
df_RFE_results = []
for i in range(x.shape[1]):
    df_RFE_results.append(
        {      
            'Feature_names': features[i],
            'Selected':  rfe.support_[i],
            'RFE_ranking':  rfe.ranking_[i],
        }
    )

df_RFE_results = pd.DataFrame(df_RFE_results)
df_RFE_results.index.name='Columns'
df_RFE_results

Unnamed: 0_level_0,Feature_names,Selected,RFE_ranking
Columns,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,total_lot,True,1
1,carpark_number,True,1
2,x_coord,True,1
3,y_coord,True,1
4,car_park_decks,True,1
5,gantry_height,True,1
6,BASEMENT CAR PARK,False,8
7,COVERED CAR PARK,False,11
8,MECHANISED AND SURFACE CAR PARK,False,13
9,MULTI-STOREY CAR PARK,True,1
