# Weighted ML internal

https://www.jmlr.org/papers/volume25/23-0607/23-0607.pdf

https://github.com/XinyuChen-hey/Optimal-Weighted-Random-Forests/tree/main
- only in certain conditions, so not worth it for now 

In [2]:
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from scipy.optimize import minimize

# Create dummy regression data
X, y = make_regression(n_samples=1000, n_features=20, noise=0.1, random_state=42)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Train standard Random Forest
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Get predictions from individual trees on validation set
tree_preds = np.array([tree.predict(X_val) for tree in rf.estimators_]).T

# Define objective function for optimal weights
def objective_1step(weights):
    pred = np.dot(tree_preds, weights)
    return mean_squared_error(y_val, pred)

# Constraints: Weights sum to 1
constraints = ({'type': 'eq', 'fun': lambda w: np.sum(w) - 1})

# Bounds: Weights between 0 and 1
bounds = [(0, 1)] * rf.n_estimators

# 1-Step Optimal Weighting
initial_weights = np.ones(rf.n_estimators) / rf.n_estimators
result_1step = minimize(objective_1step, initial_weights, bounds=bounds, constraints=constraints)
optimal_weights_1step = result_1step.x

# 2-Step Optimal Weighting (Refine based on 1-Step result)
def objective_2step(weights):
    pred = np.dot(tree_preds, weights)
    return mean_squared_error(y_val, pred)

result_2step = minimize(objective_2step, optimal_weights_1step, bounds=bounds, constraints=constraints)
optimal_weights_2step = result_2step.x

# Predictions with optimal weights
final_tree_preds = np.array([tree.predict(X_test) for tree in rf.estimators_]).T

# Evaluate standard RF
rf_pred = rf.predict(X_test)
rf_mse = mean_squared_error(y_test, rf_pred)
rf_rmse = np.sqrt(rf_mse)
rf_r2 = r2_score(y_test, rf_pred)

# Evaluate 1-Step Weighted RF
weighted_pred_1step = np.dot(final_tree_preds, optimal_weights_1step)
weighted_mse_1step = mean_squared_error(y_test, weighted_pred_1step)
weighted_rmse_1step = np.sqrt(weighted_mse_1step)
weighted_r2_1step = r2_score(y_test, weighted_pred_1step)

# Evaluate 2-Step Weighted RF
weighted_pred_2step = np.dot(final_tree_preds, optimal_weights_2step)
weighted_mse_2step = mean_squared_error(y_test, weighted_pred_2step)
weighted_rmse_2step = np.sqrt(weighted_mse_2step)
weighted_r2_2step = r2_score(y_test, weighted_pred_2step)

# Print results
print(f"Standard RF MSE: {rf_mse:.4f}, RMSE: {rf_rmse:.4f}, R2: {rf_r2:.4f}")
print(f"1-Step Optimal Weighted RF MSE: {weighted_mse_1step:.4f}, RMSE: {weighted_rmse_1step:.4f}, R2: {weighted_r2_1step:.4f}")
print(f"2-Step Optimal Weighted RF MSE: {weighted_mse_2step:.4f}, RMSE: {weighted_rmse_2step:.4f}, R2: {weighted_r2_2step:.4f}")


Standard RF MSE: 7734.2107, RMSE: 87.9444, R2: 0.7927
1-Step Optimal Weighted RF MSE: 7927.0351, RMSE: 89.0339, R2: 0.7875
2-Step Optimal Weighted RF MSE: 7921.9549, RMSE: 89.0054, R2: 0.7876
