In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [25]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras import regularizers, layers
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


In [3]:
apartments = pd.read_csv('/content/drive/MyDrive/apartments/apartments.csv')
apartments = apartments.drop('price_per_m2', axis=1)
apartments = pd.get_dummies(apartments)

In [4]:
X = apartments.drop('price', axis=1)
y = apartments['price']

In [7]:
model = tf.keras.Sequential([
    layers.Dense(128, activation="relu", kernel_regularizer=regularizers.l2(0.001), input_shape=(X.shape[1],)),
    layers.Dropout(0.3),
    layers.Dense(64, activation="relu", kernel_regularizer=regularizers.l2(0.001)),
    layers.Dropout(0.3),
    layers.Dense(32, activation="relu", kernel_regularizer=regularizers.l2(0.001)),
    layers.Dense(1)
])

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss="mse",
              metrics=["mae"])

early_stopping = EarlyStopping(monitor='val_loss', patience=5)
lr_scheduler = LearningRateScheduler(lambda epoch, lr: lr * tf.math.exp(-0.1) if epoch >= 10 else lr)

history = model.fit(
    X, y,
    epochs=30,
    validation_split=0.2,
    batch_size=32,
    callbacks=[early_stopping, lr_scheduler]
)


Epoch 1/30


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - loss: 429847740416.0000 - mae: 593333.3125 - val_loss: 258575433728.0000 - val_mae: 467218.5000 - learning_rate: 0.0010
Epoch 2/30
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 444317532160.0000 - mae: 598821.3125 - val_loss: 247298719744.0000 - val_mae: 455956.9062 - learning_rate: 0.0010
Epoch 3/30
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 416857554944.0000 - mae: 573779.6250 - val_loss: 185468846080.0000 - val_mae: 388285.1875 - learning_rate: 0.0010
Epoch 4/30
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 313576325120.0000 - mae: 483550.8438 - val_loss: 62427381760.0000 - val_mae: 191749.4219 - learning_rate: 0.0010
Epoch 5/30
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 125305593856.0000 - mae: 263463.6250 - val_loss: 31629649920.0000 - val_mae: 128104.2969 - learn

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [26]:
rf_model = RandomForestRegressor(random_state=42)

In [27]:
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

In [29]:
grid_search_rf = GridSearchCV(estimator=rf_model, param_grid=param_grid, scoring='neg_mean_squared_error', cv=5)
grid_search_rf.fit(X_train, y_train)
grid_search_rf.best_params_

{'max_depth': 20,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'n_estimators': 100}

In [40]:
cv_results = grid_search_rf.cv_results_
best_index = grid_search_rf.best_index_

cv_results['mean_test_score'][best_index]

-25990482510.75818

In [30]:
y_pred_rf = grid_search_rf.predict(X_test)

In [31]:
mse_rf = mean_squared_error(y_test, y_pred_rf)
mae_rf = mean_absolute_error(y_test, y_pred_rf)
r2_rf = r2_score(y_test, y_pred_rf)

print(f'Random Forest - Mean Squared Error: {mse_rf}')
print(f'Random Forest - Mean Absolute Error: {mae_rf}')
print(f'Random Forest - R-squared: {r2_rf}')

Random Forest - Mean Squared Error: 22409105492.724953
Random Forest - Mean Absolute Error: 92952.3142879391
Random Forest - R-squared: 0.7201632692924071


Final model

In [42]:
final_model = RandomForestRegressor(max_depth= 20, min_samples_leaf= 1, min_samples_split= 2, n_estimators= 100, random_state=42)

In [43]:
final_model.fit(X,y)

Save model

In [44]:
import joblib

In [46]:
joblib.dump(final_model,'final_model.pkl')
joblib.dump(list(X.columns),'column_names.pkl')

['column_names.pkl']