In [13]:
import numpy as np
from sklearn.tree import DecisionTreeRegressor

In [12]:
def gradient_boosting_mse(X, y, num_iter, max_depth=1, nu=0.1):
    """Given X, a array y and num_iter return y_mean and trees 
   
    Input: X, y, num_iter
           max_depth
           nu (is the shinkage)
    Outputs:y_mean, array of trees from DecisionTreeRegression
    """
    trees = []
    N, _ = X.shape
    y_mean = np.mean(y)
    fm = y_mean
    for m in range(num_iter):
        resid=y-fm
        tree = DecisionTreeRegressor(max_depth=max_depth)
        tree.fit(X,resid)
        trees.append(tree)
        fm=fm+nu*tree.predict(X)
    return y_mean, trees

def gradient_boosting_predict(X, trees, y_mean,  nu=0.1):
    """Given X, trees, y_mean predict y_hat
    """
    y_hat = np.full(X.shape[0],y_mean)
    for tree in trees:
        y_hat=y_hat+nu*tree.predict(X)
    return y_hat

---
## Test on a sample data


In [14]:
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Load diabetes dataset
X, y = load_diabetes(return_X_y=True)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Apply Gradient Boosting
y_mean, trees = gradient_boosting_mse(X_train, y_train, num_iter=100, max_depth=1, nu=0.1)

# Predict on test set
y_pred = gradient_boosting_predict(X_test, trees, y_mean, nu=0.1)

# Calculate MSE
mse = mean_squared_error(y_test, y_pred)
mse


2751.53094468078

**Note:** MSE measures the average squared difference between actual and predicted values in a regression model. An MSE of 2751.53 on its own doesn't convey good or bad performance without context. It should be compared to the baseline metrics (like the variance of the dataset) and the specific goals of the modeling effort.