In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import pickle

In [None]:
train_df = pd.read_csv('/kaggle/input/house-price-dataset/train.csv')
test_df = pd.read_csv('/kaggle/input/house-price-dataset/test.csv')

In [None]:
train_df.shape

In [None]:
test_df.shape

In [None]:
target_column = 'price'

In [None]:
X_train = train_df.drop(columns=[target_column])
y_train = train_df[target_column]

In [None]:
X_test = test_df.drop(columns=[target_column])
y_test = test_df[target_column]

In [None]:
param_grid = {
    'n_estimators': [100, 200, 400, 600, 800],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 3, 4, 5],
    'min_samples_leaf': [1, 2, 3, 4],
    'max_features': ['sqrt', 'log2'],
    'max_samples': [0.5, 0.75, None],
    'bootstrap': [True, False]
}

In [None]:
# Train the Random Forest model
rf = RandomForestRegressor(random_state=42)
print("Training the model...")
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=3, n_jobs=-1, verbose=2, scoring='r2')
grid_search.fit(X_train, y_train)

In [None]:
print(grid_search.best_params_)

In [None]:
# Evaluate the model
best_rf = grid_search.best_estimator_
y_pred = best_rf.predict(X_test)

print("\nModel Performance:")
print(f"Mean Absolute Error (MAE): {mean_absolute_error(y_test, y_pred):.2f}")
print(f"Mean Squared Error (MSE): {mean_squared_error(y_test, y_pred):.2f}")
print(f"Root Mean Squared Error (RMSE): {np.sqrt(mean_squared_error(y_test, y_pred)):.2f}")
print(f"R^2 Score: {r2_score(y_test, y_pred):.2f}")

In [None]:
import pickle

with open('random_forest_model.pkl', 'wb') as f:
    pickle.dump(best_rf, f)

print("Model saved into file 'random_forest_model.pkl'")