In [1]:
import bisect
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder


In [2]:
from ucimlrepo import fetch_ucirepo

# fetch dataset
tic_tac_toe_endgame = fetch_ucirepo(id=101)

# data (as pandas dataframes)
X = tic_tac_toe_endgame.data.features
y = tic_tac_toe_endgame.data.targets

In [3]:

label_encoder = LabelEncoder()

X = X.apply(label_encoder.fit_transform)
y = label_encoder.fit_transform(y)

  y = column_or_1d(y, warn=True)


In [4]:
X

Unnamed: 0,top-left-square,top-middle-square,top-right-square,middle-left-square,middle-middle-square,middle-right-square,bottom-left-square,bottom-middle-square,bottom-right-square
0,2,2,2,2,1,1,2,1,1
1,2,2,2,2,1,1,1,2,1
2,2,2,2,2,1,1,1,1,2
3,2,2,2,2,1,1,1,0,0
4,2,2,2,2,1,1,0,1,0
...,...,...,...,...,...,...,...,...,...
953,1,2,2,2,1,1,1,2,2
954,1,2,1,2,2,1,2,1,2
955,1,2,1,2,1,2,2,1,2
956,1,2,1,1,2,2,2,1,2


In [9]:
y

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

In [None]:
def calculateError(weights, y, y_pred):
    return np.sum(weights * (y_pred != y)) / np.sum(weights)

In [7]:
class AdaBoost:
    def __init__(self):
        self.weights = []
        self.alphas = []
        self.stumps = []

    def initializeWeights(self, X):
        """
        Initialize the weights for the AdaBoost algorithm.\n
        wi = 1/N, where N is the number of samples in the dataset.
        """
        weights = np.ones(X.shape[0]) / X.shape[0]
        return weights

    def updateWeights(self, weights, alpha, y, y_pred):
        """
        Update the weights for the AdaBoost algorithm.\n
        """
        weights = weights * np.exp(alpha * np.not_equal(y, y_pred).astype(int))
        return weights

    def calculateAlpha(self, error):
        """
        Calculate the alpha value for the AdaBoost algorithm.\n
        """
        alpha = np.log((1 - error) / error)
        return alpha


    def fit(self, X, y, nEstimators=100):
        """
        Fit the AdaBoost classifier to the data.
        """
        self.alphas = []
        self.errors = []
        self.nEstimators = nEstimators

        for i in range(nEstimators):
            if i == 0:
                w_i = self.initializeWeights(X)
            else:
                w_i = self.updateWeights(self.weights, alpha, y, y_pred)

            h_i = DecisionTreeClassifier(max_depth=1)
            h_i.fit(X, y, sample_weight=w_i)
            y_pred = h_i.predict(X)

            error_i = calculateError(w_i, y, y_pred)

            alpha_i = self.calculateAlpha(error_i)

            self.stumps.append(h_i)
            self.errors.append(error_i)
            self.alphas.append(alpha_i)


    def predict(self, X):
        """
        Predict the class of the input data.
        """
        tableOfPredictions = pd.DataFrame(index=range(X.shape[0]), columns=range(self.nEstimators))

        for i in range(self.nEstimators):
            # add predictions to each value in X(rows) for stump i (column)
            # weighted by the importance of the stump, represented by alpha
            tableOfPredictions.iloc[:, i] = self.stumps[i].predict(X) * self.alphas[i]

        # calculate the weighted sum of the predictions
        y_pred = np.sign(tableOfPredictions.T.sum().astype(int))  # maybe sum on axis=1
        return y_pred


