In [None]:
# Q1. What is Gradient Boosting Regression?
"""
  Gradient Boosting regression is a type of machine learning algorithm is used for regression tasks ,which involve 
  predicting a continious numerical output based on a set of input features .

GBR works by iteratively adding decision trees to the ensemble, with each new tree trained to correct the errors of the 
previous trees in the ensemble. GBR builds a sequence of regression models, where each model is trained to
 predict the residual errors of the previous model. The final prediction is obtained by adding up the predictions of all 
 the models in the sequence.

The gradient in GBR refers to the use of gradient descent optimization to minimize the loss function, which measures the 
difference between the predicted and actual values. This involves finding the optimal weights for each tree, so that the 
ensemble minimizes the loss function on the training data.

"""








In [11]:
# Q2. Implement a simple gradient boosting algorithm from scratch using Python and NumPy. Use a
# simple regression problem as an example and train the model on a small dataset. Evaluate the model's
# performance using metrics such as mean squared error and R-squared.



import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score

class GradientBoostingRegressor:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.trees = []
        self.weights = []

    def fit(self, X, y):
        self.mean = np.mean(y)
        y_pred = np.full_like(y, self.mean, dtype=np.float64)

        for i in range(self.n_estimators):
            residuals = y - y_pred
            tree = DecisionTreeRegressor(max_depth=self.max_depth)
            tree.fit(X, residuals)
            self.trees.append(tree)

            self.weights.append(self.learning_rate)
            y_pred += self.learning_rate * tree.predict(X)

    def predict(self, X):
        y_pred = np.full(X.shape[0], self.mean, dtype=np.float64)

        for i, tree in enumerate(self.trees):
            y_pred += self.weights[i] * tree.predict(X)

        return y_pred

# Sample data for regression
X = np.array([[1], [2], [3], [4], [5]])
y = np.array([2, 4, 5, 4, 5], dtype=np.float64)

# Train the model
model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3)
model.fit(X, y)

# Evaluate the model
y_pred = model.predict(X)
mse = mean_squared_error(y, y_pred)
r2 = r2_score(y, y_pred)

print("Mean Squared Error:", mse)
print("R-squared:", r2)


Mean Squared Error: 1.262823920870276e-09
R-squared: 0.9999999989476467


In [None]:
# Q3. Experiment with different hyperparameters such as learning rate, number of trees, and tree depth to
# optimise the performance of the model. Use grid search or random search to find the best
# hyperparameters



class GradientBoostingRegressor:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.trees = []
        self.weights = []

    def fit(self, X, y):
        self.mean = np.mean(y)
        y_pred = np.full_like(y, self.mean)

        for i in range(self.n_estimators):
            residuals = y - y_pred
            tree = DecisionTreeRegressor(max_depth=self.max_depth)
            tree.fit(X, residuals)
            self.trees.append(tree)

            self.weights.append(self.learning_rate)
            y_pred += self.learning_rate * tree.predict(X)

    def predict(self, X):
        y_pred = np.full(X.shape[0], self.mean)

        for i, tree in enumerate(self.trees):
            y_pred += self.weights[i] * tree.predict(X)

        return y_pred

    def score(self, X, y):
        y_pred = self.predict(X)
        return r2_score(y, y_pred)
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

# Sample data for regression
X = np.array([[1], [2], [3], [4], [5]])
y = np.array([2, 4, 5, 4, 5])

# Create a GradientBoostingRegressor object
model = GradientBoostingRegressor()

# Define the hyperparameter search space
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.5],
    'max_depth': [3, 5, 7]
}

# Create a GridSearchCV object to search for the best hyperparameters
grid_search = GridSearchCV(model, param_grid, cv=5)

# Fit the GridSearchCV object to the data
grid_search.fit(X, y)

# Print the best hyperparameters and score
print("Best hyperparameters:", grid_search.best_params_)
print("Best score:", grid_search.best_score_)


In [None]:
# Q4. What is a weak learner in Gradient Boosting?
"""In Gradient Boosting, a weak learner is a simple model that is not capable of making accurate predictions on its own
 but can contribute to the overall accuracy when combined with other weak learners. In the context of decision trees, 
 a weak learner can be a tree with low depth or limited number of leaf nodes. The idea behind using weak learners is
  to combine their predictions in an additive manner to obtain a strong learner that can achieve high accuracy.
   The boosting algorithm trains the weak learners sequentially, with each tree trying to correct the errors of the
    previous tree. The final prediction is the weighted sum of the predictions of all weak learners."""

In [None]:
# Q5. What is the intuition behind the Gradient Boosting algorithm?
"""The intuition behind the Gradient Boosting algorithm is to create a powerful ensemble model by combining several weak learners.
 The algorithm works by sequentially training a series of weak learners, such as decision trees, and adding them to the ensemble 
 model. Each tree tries to correct the errors of the previous tree by fitting to the residual errors of the current predictions.
  This process is repeated multiple times until the ensemble model can no longer improve.

The name "Gradient Boosting" comes from the fact that the algorithm uses gradient descent optimization to minimize the loss 
function of the ensemble model. The gradient of the loss function is computed with respect to the predictions of the current
 ensemble model and is used to update the weights of the weak learners. The updates are made in such a way that each subsequent
  weak learner focuses on the errors made by the previous learners, thereby reducing the overall error of the ensemble model.

The intuition behind Gradient Boosting is that by combining several weak learners, we can create a more powerful model that
 can generalize well to new data. The algorithm is particularly effective in handling complex and non-linear relationships 
 between the input and output variables. However, it can be sensitive to overfitting, so careful tuning of the hyperparameters
  and regularization techniques are necessary to achieve optimal performance."""







In [None]:
# Q6. How does Gradient Boosting algorithm build an ensemble of weak learners?
"""The Gradient Boosting algorithm builds an ensemble of weak learners in a sequential manner. At each iteration, it fits a new
 model to the negative gradient of the loss function with respect to the predicted output of the previous model. The new model
  is then added to the ensemble, and its prediction is combined with the predictions of the previous models to make a new
   prediction. The process is repeated iteratively, with each new model attempting to correct the errors of the previous models.
    The final prediction is a weighted sum of the predictions of all the models in the ensemble, where the weights are determined 
    by the learning rate and the performance of the individual models. By combining the predictions of many weak learners, 
    the Gradient Boosting algorithm can build a strong learner that can make accurate predictions on a variety of problems.

In [None]:
# Q7. What are the steps involved in constructing the mathematical intuition of Gradient Boosting
# algorithm?
"""The mathematical intuition behind the Gradient Boosting algorithm involves the following steps:

(1)Define a loss function--- The first step is to define a loss function that measures the difference between the predicted values
 and the actual values of the target variable. The loss function should be differentiable so that the gradient can be computed.

(2)Fit a model to the data--- The second step is to fit a model to the data using the loss function as the objective function to 
minimize. The model can be a simple model such as a decision tree or a more complex model such as a neural network.

(3)Compute the residuals--- Once the model is trained, the residuals are computed by subtracting the predicted values from the
 actual values of the target variable.

(4)Fit a new model to the residuals--- The next step is to fit a new model to the residuals. This model should be a weak learner, 
i.e., a model that is only slightly better than random guessing.

(5)Add the new model to the ensemble--- Once the new model is trained, it is added to the ensemble of models. The ensemble is updated
 by adding the weighted sum of the predictions of all the models in the ensemble.

(6)Repeat steps 3 to 5: Steps 3 to 5 are repeated until a stopping criterion is met. The stopping criterion can be a maximum number
 of iterations or a minimum improvement in the loss function.






