In [3]:
import pandas as pd
import numpy as np
import numpy.typing as npt
from scipy.special import softmax
from sklearn.model_selection import train_test_split, cross_val_score

In [4]:
# constants

TEST_SIZE: np.float16 = 0.3
CROSS_VALIDATION_K: np.int8 = 5
RANDOM_STATE: np.int8 = 42
SQUARE_VALUES: list[str] = ["x", "o", "b"]

In [5]:
COLUMNS: list[str] = ["tl", "tm", "tr", "ml", "mm", "mr", "bl", "bm", "br", "class"]

# ttt is a acronym for tic tac toe
ttt_df: pd.DataFrame = pd.read_csv(
    "../data/tic-tac-toe.data", header=None, names=COLUMNS
)
ttt_df["class"] = ttt_df["class"] == "positive"
ttt_df.rename(columns={"class": "positive"}, inplace=True)
ttt_df.head(5)

Unnamed: 0,tl,tm,tr,ml,mm,mr,bl,bm,br,positive
0,x,x,x,x,o,o,x,o,o,True
1,x,x,x,x,o,o,o,x,o,True
2,x,x,x,x,o,o,o,o,x,True
3,x,x,x,x,o,o,o,b,b,True
4,x,x,x,x,o,o,b,o,b,True


In [6]:
# splitting data

data: npt.NDArray[np.string_] = ttt_df.to_numpy()
ttt_X: npt.NDArray[np.string_] = data[:, 0 : data.shape[1] - 1]
ttt_y: npt.NDArray[np.string_] = data[:, data.shape[1] - 1]
X_train, X_test, y_train, y_test = train_test_split(
    ttt_X, ttt_y, test_size=TEST_SIZE, random_state=RANDOM_STATE
)

In [7]:
# definiton of boosting parameters

NUMBER_OF_EXAMPLES: np.int8 = 3

weights: npt.NDArray[np.float64] = (
    np.ones(shape=NUMBER_OF_EXAMPLES, dtype=np.float64) / NUMBER_OF_EXAMPLES
)
alphas: npt.NDArray[np.float64] = np.zeros(shape=NUMBER_OF_EXAMPLES, dtype=np.float64)
errors: npt.NDArray[np.float64] = np.zeros(shape=NUMBER_OF_EXAMPLES, dtype=np.float64)
stumps_predictions: npt.NDArray[np.int_] = np.zeros(
    shape=NUMBER_OF_EXAMPLES, dtype=np.int8
)
true_labels: npt.NDArray[np.int_] = np.zeros(shape=NUMBER_OF_EXAMPLES, dtype=np.int8)

In [8]:
misclassified_instances: np.int16 = ttt_df.size
best_stump_col: np.string_ = ""
best_stump_square_val: np.string_ = ""

for col in ttt_df.columns:
    for square_value in SQUARE_VALUES:
        stump_decisions: pd.Series = ttt_df[col] == square_value
        new_stump_misclassified_instances: np.int16 = (
            stump_decisions ^ ttt_df.positive
        ).sum()
        if new_stump_misclassified_instances < misclassified_instances:
            misclassified_instances = new_stump_misclassified_instances
            best_stump_col = col
            best_stump_square_val = square_value

errors[0] = misclassified_instances / ttt_df.size
alphas = np.where(errors != 0.0, np.log((1 - errors) / errors) / 2, errors)


stumps_predictions[0] = -1
true_labels[0] = 1
weights = softmax(weights * np.exp(-1 * alphas * stumps_predictions * true_labels))
weights

  alphas = np.where(errors != 0.0, np.log((1 - errors) / errors) / 2, errors)


array([0.66379121, 0.1681044 , 0.1681044 ])