In [2]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score

class TreeNode:
    def __init__(self, feature_index=None, threshold=None, left=None, right=None, value=None):
        self.feature_index = feature_index  # Index of the feature by which the node splits the data
        self.threshold = threshold  # Threshold for splitting
        self.left = left  # Left node
        self.right = right  # Right node
        self.value = value  # Class (for leaf nodes)
       

def decision_tree_classifier(X, y, depth=0, max_depth=None):
    
    X = np.array(X)
    y = np.array(y)
    classes=np.unique(y)
    # Создание узла дерева
    node = TreeNode()

    
    if depth == max_depth or len(set(y)) == 1:
        if len(y) == 0:
            node.value = None  
        else:
            node.value = list(set(y))[0]  
    else:
        num_features = X.shape[1]
        # Choosing the best division
        best_gini = 1.0
        for feature_index in range(num_features):
            for threshold in set(X[:, feature_index]):
                left_indices = X[:, feature_index] <= threshold
                right_indices = X[:, feature_index] > threshold
                gini = calculate_gini_impurity(y[left_indices], y[right_indices],classes)
                if gini < best_gini:
                    best_gini = gini
                    node.feature_index = feature_index
                    node.threshold = threshold
                    left_X, left_y = X[left_indices], y[left_indices]
                    right_X, right_y = X[right_indices], y[right_indices]
                   
                    node.left = decision_tree_classifier(left_X, left_y, depth + 1, max_depth)
                    node.right = decision_tree_classifier(right_X, right_y, depth + 1, max_depth)
    return node

def calculate_gini_impurity(y_left, y_right,classes):
    def gini(y, classes):

        y = y.reshape(-1, )     # flattens the 2D array into 1D array for simpler calculations
        if not y.shape[0]:
            return 0
    
        probs = []
        for cls in classes:
            probs.append((y == cls).sum() / y.shape[0]) # compute class probabilities
    
        p = np.array(probs)
        return 1 - ((p*p).sum())
    
    total_samples = len(y_left) + len(y_right)
    p_left = len(y_left) / total_samples
    p_right = len(y_right) / total_samples
    gini_left = gini(y_left,classes)
    gini_right = gini(y_right,classes)
    gini_impurity = p_left * gini_left + p_right* gini_right

    return gini_impurity

In [3]:
def predict(tree, x):

    if tree.value is not None:
        return tree.value
    if x[tree.feature_index] <= tree.threshold:
        return predict(tree.left, x)
    else:
        return predict(tree.right, x)

In [6]:
#Max depth=3
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

df = pd.read_csv('/Users/asik/Downloads/glass.csv')
X, y = df.drop("Type", axis = 1), df["Type"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)

tree = decision_tree_classifier(X_train, y_train, 0, 3)

X_test=X_test.to_numpy()
y_pred = [predict(tree, x) for x in X_test]

for true_class, predicted_class in zip(y_test, y_pred):
    print("True Class: ", true_class)
    print("Predicted Class: ", predicted_class);
accuracy = accuracy_score( y_pred, y_test)
print(accuracy)

True Class:  6
Predicted Class:  1
True Class:  2
Predicted Class:  1
True Class:  1
Predicted Class:  1
True Class:  3
Predicted Class:  1
True Class:  2
Predicted Class:  1
True Class:  1
Predicted Class:  1
True Class:  7
Predicted Class:  7
True Class:  1
Predicted Class:  1
True Class:  2
Predicted Class:  1
True Class:  1
Predicted Class:  1
True Class:  7
Predicted Class:  7
True Class:  2
Predicted Class:  1
True Class:  2
Predicted Class:  1
True Class:  1
Predicted Class:  1
True Class:  3
Predicted Class:  1
True Class:  5
Predicted Class:  1
True Class:  5
Predicted Class:  1
True Class:  5
Predicted Class:  1
True Class:  2
Predicted Class:  1
True Class:  2
Predicted Class:  1
True Class:  2
Predicted Class:  1
True Class:  1
Predicted Class:  1
True Class:  2
Predicted Class:  1
True Class:  1
Predicted Class:  1
True Class:  1
Predicted Class:  1
True Class:  2
Predicted Class:  1
True Class:  1
Predicted Class:  1
True Class:  5
Predicted Class:  1
True Class:  1
Predi

In [5]:
#max depth=5
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

df = pd.read_csv('/Users/asik/Downloads/glass.csv')
X, y = df.drop("Type", axis = 1), df["Type"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)

tree = decision_tree_classifier(X_train, y_train, 0, 5)

X_test=X_test.to_numpy()
y_pred = [predict(tree, x) for x in X_test]

for true_class, predicted_class in zip(y_test, y_pred):
    print("True Class: ", true_class)
    print("Predicted Class: ", predicted_class);
accuracy = accuracy_score( y_pred, y_test)
print(accuracy)

True Class:  5
Predicted Class:  5
True Class:  1
Predicted Class:  1
True Class:  1
Predicted Class:  2
True Class:  1
Predicted Class:  1
True Class:  1
Predicted Class:  1
True Class:  1
Predicted Class:  1
True Class:  1
Predicted Class:  1
True Class:  3
Predicted Class:  1
True Class:  7
Predicted Class:  7
True Class:  1
Predicted Class:  1
True Class:  3
Predicted Class:  1
True Class:  6
Predicted Class:  6
True Class:  3
Predicted Class:  1
True Class:  3
Predicted Class:  1
True Class:  2
Predicted Class:  1
True Class:  2
Predicted Class:  1
True Class:  2
Predicted Class:  2
True Class:  1
Predicted Class:  1
True Class:  5
Predicted Class:  5
True Class:  3
Predicted Class:  2
True Class:  7
Predicted Class:  7
True Class:  7
Predicted Class:  7
True Class:  1
Predicted Class:  1
True Class:  2
Predicted Class:  1
True Class:  2
Predicted Class:  2
True Class:  1
Predicted Class:  1
True Class:  2
Predicted Class:  1
True Class:  1
Predicted Class:  1
True Class:  2
Predi

In [6]:
#Max depth=6
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

df = pd.read_csv('/Users/asik/Downloads/glass.csv')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)

tree = decision_tree_classifier(X_train, y_train, 0, 6)

X_test=X_test.to_numpy()
y_pred = [predict(tree, x) for x in X_test]

for true_class, predicted_class in zip(y_test, y_pred):
    print("True Class: ", true_class)
    print("Predicted Class: ", predicted_class);
accuracy = accuracy_score( y_pred, y_test)
print(accuracy)

  return bound(*args, **kwds)


True Class:  2
Predicted Class:  2
True Class:  6
Predicted Class:  1
True Class:  7
Predicted Class:  7
True Class:  1
Predicted Class:  1
True Class:  1
Predicted Class:  1
True Class:  2
Predicted Class:  2
True Class:  1
Predicted Class:  2
True Class:  5
Predicted Class:  5
True Class:  7
Predicted Class:  7
True Class:  5
Predicted Class:  5
True Class:  1
Predicted Class:  2
True Class:  1
Predicted Class:  1
True Class:  2
Predicted Class:  2
True Class:  1
Predicted Class:  1
True Class:  1
Predicted Class:  1
True Class:  3
Predicted Class:  3
True Class:  7
Predicted Class:  7
True Class:  7
Predicted Class:  1
True Class:  2
Predicted Class:  2
True Class:  1
Predicted Class:  1
True Class:  3
Predicted Class:  1
True Class:  7
Predicted Class:  5
True Class:  1
Predicted Class:  1
True Class:  1
Predicted Class:  1
True Class:  3
Predicted Class:  2
True Class:  2
Predicted Class:  2
True Class:  3
Predicted Class:  1
True Class:  2
Predicted Class:  3
True Class:  2
Predi