# Ensemble Learning

## Import libraries

In [14]:
import numpy as np
import pandas as pd

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor
from sklearn.preprocessing import OrdinalEncoder, StandardScaler
from sklearn.model_selection import train_test_split

## Data

### Load data

In [3]:
dataset_path = './data/Housing.csv'
df = pd.read_csv(dataset_path)

### Data preprocessing

In [4]:
categorical_cols = df.select_dtypes(include=['object']).columns.tolist()
print(categorical_cols)

['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea', 'furnishingstatus']


In [5]:
ordinal_encoder = OrdinalEncoder()
encoded_categorical_cols = ordinal_encoder.fit_transform(
    df[categorical_cols]
)
encoded_categorical_df = pd.DataFrame(
    encoded_categorical_cols, 
    columns=categorical_cols
)
numerical_df = df.drop(columns=categorical_cols)
encoded_df = pd.concat(
    [numerical_df, encoded_categorical_df], 
    axis=1
)

### Data standardization

In [6]:
normalizer = StandardScaler()
dataset_arr = normalizer.fit_transform(encoded_df)

### Train test split

In [7]:
X, y = dataset_arr[:, :-1], dataset_arr[:, -1]

In [8]:
test_size = 0.3
random_state = 1
is_shuffle = True

X_train, X_test, y_train, y_test = train_test_split(
    X, y, 
    test_size=test_size,
    random_state=random_state,
    shuffle=is_shuffle
)

## Model

### Train

#### Decision Tree

In [11]:
regressor_DT = DecisionTreeRegressor(
    random_state=random_state
)

regressor_DT.fit(X_train, y_train)

#### Random Forest

In [15]:
regressor_RF = RandomForestRegressor(
    random_state=random_state
)

regressor_RF.fit(X_train, y_train)

#### AdaBoost

In [16]:
regressor_AB = AdaBoostRegressor(
    random_state=random_state
)

regressor_AB.fit(X_train, y_train)

#### Gradient Boosting

In [17]:
regressor_GB = GradientBoostingRegressor(
    random_state=random_state
)

regressor_GB.fit(X_train, y_train)

### Evaluate

In [19]:
y_pred = regressor_DT.predict(X_test)

In [20]:
mae = np.mean(np.abs(y_test - y_pred))
mse = np.mean((y_test - y_pred) ** 2)

print("Decision Tree")
print(f'MAE: {mae}')
print(f'MSE: {mse}')

Decision Tree
MAE: 0.9378719158549539
MSE: 1.7598500235435834
