In [25]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


In [3]:
apartments = pd.read_csv('/content/drive/MyDrive/apartments/apartments.csv')
apartments = apartments.drop('price_per_m2', axis=1)
apartments = pd.get_dummies(apartments)

In [4]:
X = apartments.drop('price', axis=1)
y = apartments['price']

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [26]:
rf_model = RandomForestRegressor(random_state=42)

In [27]:
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

In [29]:
grid_search_rf = GridSearchCV(estimator=rf_model, param_grid=param_grid, scoring='neg_mean_squared_error', cv=5)
grid_search_rf.fit(X_train, y_train)
grid_search_rf.best_params_

{'max_depth': 20,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'n_estimators': 100}

In [40]:
cv_results = grid_search_rf.cv_results_
best_index = grid_search_rf.best_index_

cv_results['mean_test_score'][best_index]

-25990482510.75818

In [30]:
y_pred_rf = grid_search_rf.predict(X_test)

In [31]:
mse_rf = mean_squared_error(y_test, y_pred_rf)
mae_rf = mean_absolute_error(y_test, y_pred_rf)
r2_rf = r2_score(y_test, y_pred_rf)

print(f'Random Forest - Mean Squared Error: {mse_rf}')
print(f'Random Forest - Mean Absolute Error: {mae_rf}')
print(f'Random Forest - R-squared: {r2_rf}')

Random Forest - Mean Squared Error: 22409105492.724953
Random Forest - Mean Absolute Error: 92952.3142879391
Random Forest - R-squared: 0.7201632692924071


Final model

In [42]:
final_model = RandomForestRegressor(max_depth= 20, min_samples_leaf= 1, min_samples_split= 2, n_estimators= 100, random_state=42)

In [43]:
final_model.fit(X,y)

Save model

In [44]:
import joblib

In [46]:
joblib.dump(final_model,'final_model.pkl')
joblib.dump(list(X.columns),'column_names.pkl')

['column_names.pkl']