In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# 1. Generate Synthetic Data
np.random.seed(42)
m = 1000  # Number of samples
X = np.random.rand(m, 3) * 10  # 3 features
y = 2 * X[:, 0] + 3 * X[:, 1] - 1.5 * X[:, 2] + np.random.randn(m)  # Linear relationship with noise

# 2. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 3. Initialize Model
rf_model = RandomForestRegressor(
    n_estimators=100,  # Number of trees
    max_depth=5,       # Maximum tree depth
    max_features="sqrt",  # Features per split
    random_state=42
)

# 4. Train Model (Bootstrap Sampling + Feature Subsetting)
rf_model.fit(X_train, y_train)

# 5. Evaluate
y_pred = rf_model.predict(X_test)
print(f"Test MSE: {mean_squared_error(y_test, y_pred):.4f}")
print(f"Test R²: {r2_score(y_test, y_pred):.4f}")

# 6. Predict New Data
new_data = np.array([[5, 3, 2]])
prediction = rf_model.predict(new_data)
print(f"Prediction: {prediction}")

Test MSE: 9.6383
Test R²: 0.9276
Prediction: [13.9369028]
