# Packages

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# import shap

# Load & Process data

In [7]:
DATA_PATH = r'./data/clean'
FILE_NAME = 'data_clean_20240509.csv'

data = pd.read_csv(DATA_PATH+FILE_NAME, sep=';')
print(f'{data.shape[0]} rows, {data.shape[1]} attributes')

18304 rows, 8 attributes


In [8]:
data_processed = pd.get_dummies(data, prefix_sep = '_')

X = data_processed.drop('Price', axis=1)
y = data_processed['Price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)

# Models building

In [9]:
def get_metrics(y_test, y_pred, return_=False):
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    print(f'Mean Absolute Error: {round(mae,3)}')
    print(f'Mean Squared Error: {round(mse,3)}')
    print(f'R-squared {round(r2,3)}')

    if return_:
        return mse, mae, r2

## RandomForestRegressor

In [22]:
from sklearn.ensemble import RandomForestRegressor
rfr = RandomForestRegressor()
rfr.fit(X_train, y_train)
y_pred_rfr = rfr.predict(X_test)
get_metrics(y_test, y_pred_rfr)

## XGBRegressor

In [12]:
from xgboost import XGBRegressor
xgb = XGBRegressor(n_jobs=-1)
xgb.fit(X_train, y_train)
y_pred_xgb = xgb.predict(X_test)
get_metrics(y_test, y_pred_xgb, xgb)

## LightGBM  
[*medium article 1*](https://medium.com/@turkishtechnology/light-gbm-light-and-powerful-gradient-boost-algorithm-eaa1e804eca8)  
[*medium article 2*](https://nikolh92.medium.com/what-makes-lightgbm-sometimes-better-how-to-quickly-implement-it-3265e701e8d2)  
[*medium article 3*](https://medium.com/@ilyurek/light-gbm-a-powerful-gradient-boosting-algorithm-fe145a1cd8a6)

In [25]:
from lightgbm import LGBMRegressor
lgbm = LGBMRegressor(objective='regression')
lgbm.fit(X_train, y_train)
y_pred_lgbm = lgbm.predict(X_test)
get_metrics(y_pred_lgbm, lgbm)

# Error Analysis  
WIP

# SHAP values  
WIP