### 1. Import Libraries:

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import joblib

### 2. Load and Preprocess Data:

In [3]:
# Load the data
data = pd.read_csv('housing.csv')

# Drop 'ocean_proximity' column
data = data.drop('ocean_proximity', axis=1)

# Handle missing values (if any)
data = data.dropna()

# Separate features and target variable
X = data.drop('median_house_value', axis=1)
y = data['median_house_value']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### 3. Train Random Forest Regression Model:

In [4]:
# Create and train the Random Forest model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

Mean Squared Error: 2408424402.3731775


### 4. Save the Trained Model:

In [5]:
# Save the trained model using joblib
joblib.dump(model, 'house_price_model.joblib')

['house_price_model.joblib']