Creator - Sarvesh Joshi 

Date of creation - 14/7/2025

Last updated - 14/7/2025

In [1]:
import pandas as pd
import numpy as np
import math

In [2]:
class Node():
    def __init__(self,feature = None ,split_value = None,depth = 0,data=None,Gradients = None, Hessians = None):
        self.feature = feature
        self.split_value = split_value
        self.depth = depth
        self.data = data
        self.left = None
        self.right = None
        self.Gradients = Gradients
        self.Hessians = Hessians 
        self.is_leaf = True if feature ==-1 else False 

    def out(self,lambd,alpha):
        if self.is_leaf == True:
            G = np.sum(self.Gradients)
            H = np.sum(self.Hessians)
    
            if G > alpha:
                return -(G-alpha)/(H+lambd)
            elif G < -alpha:
                return -(G+alpha)/(H+lambd)
            else:
                return 0
    


class Tree():
    def __init__(self,root,category):
        self.root = root
        self.category = category
        self.leaves = []
        self.nodes = []
        
        

In [3]:
class XGBoostMultiClassifier():
    def __init__(self,max_depth = 10,loss = "softmax",min_split_loss= 0,learning_rate = 1,l2_regularization = 0,max_iter = 1,l1_regularization = 0,min_child_weight=0,subsample = 1):
        self.max_depth = max_depth
        self.loss = loss.lower()
        self.gamma = min_split_loss
        self.lambd = l2_regularization
        self.learning_rate = learning_rate
        self.max_iter = max_iter
        self.alpha = l1_regularization
        self.min_child_weight = min_child_weight
        self.subsample = subsample
        self.rounds = []
        self.num_categories = 0
    
    def label_to_onehot(self,y_train):
        if isinstance(y_train,pd.DataFrame) or isinstance(y_train,pd.Series):
            y_train=y_train.values
        unique_elems, counts = np.unique(y_train, return_counts=True)
        onehot_size = len(unique_elems)
        y_onehot = np.zeros((y_train.shape[0],onehot_size))
        for idx,label in enumerate(y_train):
            y_onehot[idx,label] = 1
        return y_onehot

    def softmax(self,y):
        return np.exp(y)/(np.sum(np.exp(y),axis=1,keepdims=True))

    
    
    class Round():
        def __init__(self,xgboost_instance,round_num):
            self.xgboost = xgboost_instance
            self.round_num = round_num 
            self.trees = []
            self.Gradients = None
            self.Hessians = None
            self.nodes = []
            

        def compute_gradients(self,y_train,y_guess):
            if self.xgboost.loss == "softmax":
                y_train_onehot = self.xgboost.label_to_onehot(y_train)
                y_prob = self.xgboost.softmax(y_guess)
                G = y_prob-y_train_onehot
                H = (y_prob)*(1-y_prob)

            else:
                raise ValueError(f"loss function can only be softmax")
            
            self.Gradients = G
            self.Hessians = H
            #print(self.Hessians)

        def find_split_value(self,X,Gradients,Hessians):
            feature =-1
                # if feature isnt found it stays at -1 and node becomes a leaf
            split_value = 0
            gamma = self.xgboost.gamma
            lambd = self.xgboost.lambd
            min_child_weight = self.xgboost.min_child_weight
            G = np.sum(Gradients)
            H = np.sum(Hessians)
            parent_score = 0.5*((G**2)/(H+lambd))
            max_gain = -np.inf
            

            for curr_feature in range(X.shape[1]):  
                sort_mask = X[:,curr_feature].argsort()
                X_sorted = X[sort_mask]
                G_sorted = Gradients[sort_mask]
                H_sorted = Hessians[sort_mask]
                G_L= 0
                H_L = 0
                for i in range(len(X_sorted)-1):
                    curr_split_value = (X_sorted[i, curr_feature] + X_sorted[i+1, curr_feature]) / 2
                    G_L += G_sorted[i]
                    H_L += H_sorted[i]
                    G_R = G - G_L
                    H_R = H - H_L

                    # if equal values come then split point will be wrongly detected
                    if X_sorted[i,curr_feature] == X_sorted[i+1,curr_feature]:
                        continue
                    
                    curr_gain  = 0.5*((G_L**2)/(H_L + lambd + 1e-12) + (G_R**2)/(H_R+lambd+1e-12)- parent_score) - gamma
                    if (curr_gain >= 0) and (curr_gain > max_gain) :
                        if (H_L >= min_child_weight) and (H_R>=min_child_weight):
                            feature = curr_feature
                            split_value = curr_split_value
                            max_gain = curr_gain
            
            return feature,split_value
        
        def create_node(self,data,depth,Gradients,Hessians):
            feature,split_value = self.find_split_value(data,Gradients=Gradients,Hessians=Hessians)

            
            new_node = Node(feature=feature,split_value=split_value,depth=depth,data=data,Gradients=Gradients,Hessians=Hessians)
            self.nodes.append(new_node)
            return new_node        
        

        def build_tree(self,X,num_categories):
            
            for category in range(num_categories):
        
                curr_depth = 1
                root = self.create_node(data = X,depth=curr_depth,Gradients=self.Gradients[:,category],Hessians=self.Hessians[:,category])
                #print(root.is_leaf)
                queue = [root]
                leaves = []
                while((curr_depth+1<=self.xgboost.max_depth) and (len(queue)!=0)):
                    curr_node = queue.pop(0)
                    curr_depth = curr_node.depth
                    curr_data = curr_node.data
                    curr_Grads = curr_node.Gradients
                    curr_Hess = curr_node.Hessians

                    if curr_depth+1 == self.xgboost.max_depth:
                        curr_node.is_leaf = True
                        leaves.append(curr_node)

                    if not curr_node.is_leaf:
                        mask_left = curr_data[:,curr_node.feature] < curr_node.split_value
                        mask_right = curr_data[:,curr_node.feature] >= curr_node.split_value


                        # Empty masks is creating NaN values ahead !
                        if (np.sum(mask_left)>0 and np.sum(mask_right) > 0) :

                            curr_node.left = self.create_node(depth=curr_depth+1,data = curr_data[mask_left],Gradients= curr_Grads[mask_left],Hessians=curr_Hess[mask_left])
                            curr_node.right = self.create_node(depth=curr_depth+1,data = curr_data[mask_right],Gradients= curr_Grads[mask_right],Hessians=curr_Hess[mask_right])
                            queue.append(curr_node.left)
                            queue.append(curr_node.right)

                        else:
                            curr_node.is_leaf = True
                            leaves.append(curr_node)
                    
                    elif curr_node.is_leaf:
                        leaves.append(curr_node)
                
                
                self.trees.append(Tree(root=root,category=category))
                self.trees[-1].leaves = leaves
                


            # for node in self.nodes:
            #     if (node.is_leaf == False) and (node.left == None) and(node.right==None):
            #         print(len(node.data))
        
        def evaluate_tree(self,X,y_guess):
            
            #print(self.tree.root.left)
            
            y_guess_new = np.zeros(y_guess.shape)
            for i in range(len(X)):
                for category in range(y_guess.shape[1]):
                    if not self.trees  or self.trees[category].root is None:
                        raise ValueError("Tree not built yet. Please call fit before predict")
                    curr_node = self.trees[category].root

                    
                    while curr_node.is_leaf == False:
                        curr_feature = curr_node.feature
                        curr_split_value = curr_node.split_value

                        if X[i,curr_feature] < curr_split_value:
                            curr_node = curr_node.left
                            
                        else :
                            curr_node = curr_node.right
                        #print(curr_node.is_leaf)
                            
                    out = (curr_node.out(lambd= self.xgboost.lambd,alpha = self.xgboost.alpha))
                    y_guess_new[i,category] = y_guess[i,category] + self.xgboost.learning_rate*out
                    #print(curr_node.out(lambd= self.xgboost.lambd,alpha = self.xgboost.alpha))
                    #print(f"Sample {i}, Class {category}, Tree output: {out}")
                
            return y_guess_new
    

    def create_round(self,round_num):
        round = self.Round(xgboost_instance=self,round_num=round_num)
        return round
    
    def fit(self,X_train,y_train):
        if isinstance(X_train,pd.DataFrame):
            X_train = X_train.values
        if (isinstance(y_train,pd.DataFrame)) or (isinstance(y_train,pd.Series)):
            y_train = y_train.values
        y_train = y_train.ravel()
        for i in range(self.max_iter):
            round_num = i+1
            self.rounds.append(self.create_round(round_num))
        
        
        
        unique_elems = np.unique(y_train)
        num_categories = len(unique_elems)
        self.num_categories = num_categories
        y_guess = np.zeros((y_train.shape[0],num_categories),dtype = float)
        
        
        for round in self.rounds:
            subsample = self.subsample
            num_select = int(len(X_train)*subsample)
            mask = np.random.choice(len(X_train),size=num_select,replace=False)
            X_train_sub = X_train[mask]
            y_train_sub = y_train[mask]
            y_guess_sub = y_guess[mask]
            
            round.compute_gradients(y_train_sub,y_guess_sub)
            round.build_tree(X_train_sub,self.num_categories)
            y_guess = round.evaluate_tree(X_train,y_guess)
            
        
    
    def predict_proba(self,X_test):
        if isinstance(X_test,pd.DataFrame):
            X_test = X_test.values
        y_guess = np.zeros((X_test.shape[0],self.num_categories))

        for round in self.rounds:
            y_guess = round.evaluate_tree(X_test,y_guess)
        
        y_probs = np.exp(y_guess)/(np.sum(np.exp(y_guess),axis=1,keepdims=True))
        #print(y_probs)
        #print("Raw logits before softmax:\n", y_guess)

        return y_probs
        
    def predict(self,X_test):
        y_probs = self.predict_proba(X_test)
        y_pred = np.argmax(y_probs, axis=1)
        return y_pred



In [4]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,classification_report


data = load_iris()
X = data.data
y = data.target 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [5]:
manual_XGB = XGBoostMultiClassifier(subsample=1,max_iter=10,max_depth=4,learning_rate=0.67)
manual_XGB.fit(X_train,y_train)
y_pred = manual_XGB.predict(X_test)
print(classification_report(y_pred,y_test))


              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      1.00      1.00        13
           2       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45



In [6]:
import xgboost as xgb
sklearn_XGB = xgb.XGBClassifier()
sklearn_XGB.fit(X_train,y_train)
y_pred_inbuilt = sklearn_XGB.predict(X_test)
print(classification_report(y_pred_inbuilt,y_test))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      1.00      1.00        13
           2       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45

