In [22]:
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [4]:
url = 'https://raw.githubusercontent.com/mGalarnyk/Tutorial_Data/master/King_County/kingCountyHouseData.csv'

In [5]:
df = pd.read_csv(url)

In [6]:
columns = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'price']

In [7]:
df = df.loc[:, columns]

In [8]:
features = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors']

In [9]:
X = df.loc[:, features]

In [10]:
y = df.loc[:, 'price']

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [12]:
xgb_model = XGBRegressor(objective='reg:squarederror', random_state=42)

In [13]:
xgb_model.fit(X_train, y_train)

In [15]:
# Make predictions on the test set
y_pred = xgb_model.predict(X_test)

In [18]:
r2 = r2_score(y_test, y_pred)
print(f'R² Score: {r2}')

R² Score: 0.5196903768357878


In [19]:
feature_importance = xgb_model.feature_importances_

In [20]:
print('Feature Importance:')


Feature Importance:


In [21]:
for i, feature in enumerate(features):
    print(f'{feature}: {feature_importance[i]}')

bedrooms: 0.06132914870977402
bathrooms: 0.07641507685184479
sqft_living: 0.6686041951179504
sqft_lot: 0.09688830375671387
floors: 0.09676332771778107


### Evaluate the model using additional metrics

In [23]:
mae = mean_absolute_error(y_test, y_pred)

In [24]:
mse = mean_squared_error(y_test, y_pred)

In [25]:
rmse = np.sqrt(mse)

In [26]:
print(f'R² Score: {r2}')
print(f'Mean Absolute Error (MAE): {mae}')
print(f'Mean Squared Error (MSE): {mse}')
print(f'Root Mean Squared Error (RMSE): {rmse}')

R² Score: 0.5196903768357878
Mean Absolute Error (MAE): 161562.5081363342
Mean Squared Error (MSE): 71848375466.61842
Root Mean Squared Error (RMSE): 268045.47275904217
