In [1]:
from sklearn import datasets


In [2]:
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(handle_unknown='ignore')
iris = datasets.load_breast_cancer()
X = iris.data[:, :]  
y = enc.fit_transform(iris.target.reshape(-1,1)).todense()

In [3]:
from sklearn.model_selection import train_test_split

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

In [5]:
from sklearn.tree import DecisionTreeClassifier,DecisionTreeRegressor
from sklearn.metrics import accuracy_score
tree = DecisionTreeClassifier()
tree.fit(X_train,y_train)
preds = tree.predict(X_test)
print(accuracy_score(y_test,preds))


0.9438596491228071


In [155]:
import numpy as np
n_estimators=200
learning_rate=0.5
trees = []

#INIT
def bce_loss_gradient(y,y_pred):  
    # binary cross entropy loss
    # gradient with respect to y_pred
    # L =  -(y*log(y_pred)+(1-y)*log(1-y_pred))
    # dL/dy_pred =  -(y / y_pred) + (1 - y) / (1 - y_pred) 
    y_pred = np.clip(y_pred, 1e-15, 1 - 1e-15)
    return -(y / y_pred) + (1 - y) / (1 - y_pred)   # avoid div by zero

for _ in range(n_estimators):
    tree = DecisionTreeRegressor(max_depth=2,min_samples_split=2)
    trees.append(tree)

#FITTING
y_pred = np.full(np.shape(y_train), np.mean(y_train, axis=0))
for i in range(len(trees)):
    gradient = bce_loss_gradient(y_train, y_pred)
    print("======")
    print(f"y_train[0]: {y_train[0]}")
    print(f"y_pred[0]: {y_pred[0]}")
    print(f"gradient[0]: {gradient[0]}")
    trees[i].fit(X_train, -gradient)
    update = trees[i].predict(X_train)
    print(f"update[0]: {update[0]}")
    print("======")
    # Update y prediction
    y_pred += np.multiply(learning_rate, update)

    y_pred2 = np.argmax(y_pred, axis=1)
    print(accuracy_score(y_train,enc.fit_transform(y_pred2.reshape(-1,1)).todense()))

# print(y_pred[0])
# y_pred = np.exp(y_pred) / np.expand_dims(np.sum(np.exp(y_pred), axis=1), axis=1)
# y_pred = np.argmax(y_pred, axis=1)
# print(accuracy_score(y_train,enc.fit_transform(y_pred.reshape(-1,1)).todense()))
# print(y_pred)
#PREDICT

y_pred = np.array([])
# Make predictions
for i in range(len(trees)):
    update = trees[i].predict(X_test)
    update = np.multiply(learning_rate, update)
    y_pred =  update if not y_pred.any() else y_pred + update

# # Turn into probability distribution
# y_pred = np.exp(y_pred) / np.expand_dims(np.sum(np.exp(y_pred), axis=1), axis=1)
# Set label to the value that maximizes probability
y_pred = np.argmax(y_pred, axis=1)

print(accuracy_score(y_test,enc.fit_transform(y_pred.reshape(-1,1)).todense()))


y_train[0]: [[0. 1.]]
y_pred[0]: [0.40140845 0.59859155]
gradient[0]: [[ 1.67058824 -1.67058824]]
update[0]: [-1.45776808  1.45776808]
0.9612676056338029
y_train[0]: [[0. 1.]]
y_pred[0]: [-0.32747559  1.32747559]
gradient[0]: [[ 1. -1.]]
update[0]: [-4.95841502e+12  4.94653506e+12]
0.704225352112676
y_train[0]: [[0. 1.]]
y_pred[0]: [-2.47920751e+12  2.47326753e+12]
gradient[0]: [[ 1. -1.]]
update[0]: [ 5.36912752e+13 -5.37342237e+13]
0.43661971830985913
y_train[0]: [[0. 1.]]
y_pred[0]: [ 2.43664301e+13 -2.43938443e+13]
gradient[0]: [[ 1.00079992e+15 -1.00000000e+15]]
update[0]: [-9.41542027e+14  9.40789474e+14]
0.9330985915492958
y_train[0]: [[0. 1.]]
y_pred[0]: [-4.46404584e+14  4.46000893e+14]
gradient[0]: [[ 1. -1.]]
update[0]: [ 4.98192174e+13 -4.98647643e+13]
0.9471830985915493
y_train[0]: [[0. 1.]]
y_pred[0]: [-4.21494975e+14  4.21068510e+14]
gradient[0]: [[ 1. -1.]]
update[0]: [-1.45082110e+13  1.44888893e+13]
0.9612676056338029
y_train[0]: [[0. 1.]]
y_pred[0]: [-4.28749080e+14 

In [119]:
from sklearn.ensemble import GradientBoostingClassifier
gbc = GradientBoostingClassifier()
gbc.fit(X_train,enc.inverse_transform(y_train).reshape(-1))
preds = gbc.predict(X_test)
print(accuracy_score(enc.inverse_transform(y_test).reshape(-1),preds))


0.9473684210526315
