In [4]:
import numpy as np
from sklearn.tree import DecisionTreeRegressor
import pandas as pd

from sklearn.metrics import mean_squared_error

In [46]:
#dataset

df = pd.read_csv("Student_Marks.csv")

X = df.drop("Marks", axis=1)
y = df["Marks"]


X_train = X[:70].values
y_train = y[:70].values
X_test = X[70:].values
y_test = y[70:].values




In [117]:
class GBM():
    
    def __init__(self):
        
        self.X_train = None
        self.y_train = None
        
        self.alpha = None
        
        self.residuals = None
        self.mean = None
        self.trees = []
        
        self.n_trees = None
        
    def set_model(self, X_train, y_train, alpha=0.2, n_trees=100):
        
        self.X_train = X_train
        self.y_train = y_train
        
        self.mean = np.mean(y_train)
        
        self.alpha = alpha
        self.n_trees = n_trees
        
        self.residuals = np.zeros((n_trees, len(y_train)))
        
    
    
    
    def weighted_sum_of_prev_res(self, last_i):
        
        predictions = self.mean
        
        for i in range(last_i):
    
            predictions += alpha * self.trees[i].predict(self.X_train)
        
        return predictions
    
    
    
    def fit(self):
        
        self.residuals[0] = (y_train - self.mean)
        
        for i in range(1, self.n_trees):
            #build tree to predict previous residuals
            self.trees.append(DecisionTreeRegressor(max_depth=3))
            self.trees[i-1].fit(X_train, self.residuals[i-1])
            
            #identify new residuals
            self.residuals[i] = (self.y_train - self.weighted_sum_of_prev_res(i))
    
    def predict(self, X):
        
        predictions = self.mean
        for i in range(self.n_trees-1):
    
            predictions += alpha * self.trees[i].predict(X)
        
        return predictions
        
        
        
    
        
        
        
        

In [121]:
gbm = GBM()
gbm.set_model(X_train, y_train, alpha=0.1, n_trees = 50)
gbm.fit()

### Let's see  our GBM in action

In [153]:

train_compar = np.array([y_train, 
                         gbm.predict(X_train)]).T


train_compar = pd.DataFrame(train_compar, columns=["y_train", "y_train_predict"])

In [155]:
train_compar

Unnamed: 0,y_train,y_train_predict
0,19.202,19.221081
1,7.734,7.705965
2,13.811,13.620820
3,53.018,52.917366
4,55.299,55.274575
...,...,...
65,13.562,13.987086
66,27.569,27.133525
67,6.185,6.244582
68,8.920,8.755314


In [158]:
test_compar = np.array([y_test,
                        gbm.predict(X_test)]).T

test_compar = pd.DataFrame(test_compar, columns=["Y_test", "GBM prediction"])

In [159]:
test_compar

Unnamed: 0,Y_test,GBM prediction
0,16.606,16.916763
1,13.416,12.150776
2,20.398,20.774634
3,7.014,7.048086
4,39.952,35.660167
5,6.217,6.408673
6,36.746,35.805957
7,38.278,38.396378
8,49.544,49.770933
9,6.349,6.408673
