# Ensemble Learning

## Import libraries

In [17]:
import numpy as np
import pandas as pd

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor
from sklearn.preprocessing import OrdinalEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error

## Data

### Load data

In [18]:
dataset_path = './data/Housing.csv'
df = pd.read_csv(dataset_path)

### Data preprocessing

In [19]:
categorical_cols = df.select_dtypes(include=['object']).columns.tolist()
print(categorical_cols)

['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea', 'furnishingstatus']


In [20]:
ordinal_encoder = OrdinalEncoder()
encoded_categorical_cols = ordinal_encoder.fit_transform(
    df[categorical_cols]
)
encoded_categorical_df = pd.DataFrame(
    encoded_categorical_cols, 
    columns=categorical_cols
)
numerical_df = df.drop(columns=categorical_cols)
encoded_df = pd.concat(
    [numerical_df, encoded_categorical_df], 
    axis=1
)

### Data standardization

In [21]:
normalizer = StandardScaler()
dataset_arr = normalizer.fit_transform(encoded_df)

### Train test split

In [22]:
X, y = dataset_arr[:, 1:], dataset_arr[:, 0]

In [23]:
test_size = 0.3
random_state = 1
is_shuffle = True

X_train, X_test, y_train, y_test = train_test_split(
    X, y, 
    test_size=test_size,
    random_state=random_state,
    shuffle=is_shuffle
)

## Model

### Train

#### Decision Tree

In [24]:
regressor_DT = DecisionTreeRegressor(
    random_state=random_state
)

regressor_DT.fit(X_train, y_train)

#### Random Forest

In [25]:
regressor_RF = RandomForestRegressor(
    random_state=random_state
)

regressor_RF.fit(X_train, y_train)

#### AdaBoost

In [26]:
regressor_AB = AdaBoostRegressor(
    random_state=random_state
)

regressor_AB.fit(X_train, y_train)

#### Gradient Boosting

In [27]:
regressor_GB = GradientBoostingRegressor(
    random_state=random_state
)

regressor_GB.fit(X_train, y_train)

### Evaluate

In [29]:
y_pred = regressor_RF.predict(X_test)

In [30]:
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

print("Decision Tree")
print(f'MAE: {mae}')
print(f'MSE: {mse}')

Decision Tree
MAE: 0.4610288397546841
MSE: 0.3790201169342062
