<a href="https://colab.research.google.com/github/lmalanczuk/Drzewo-Decyzyjne/blob/main/SUSProjekt.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Importy i ładowanie danych

In [None]:
import math

FILENAME = 'tic_tac_toe.txt'

def load_data(filename):
    dataset = []
    try:
        with open(filename, 'r', encoding='utf-8') as f:
            for line in f:
                line = line.strip()
                if not line:
                    continue
                dataset.append([int(x) for x in line.split(',')])
    except FileNotFoundError:
        print(f"Błąd: Nie znaleziono pliku {filename}")
        return []
    return dataset

dataset = load_data(FILENAME)
print(f"Wczytano {len(dataset)} wierszy.")

Wczytano 958 wierszy.


Entropia i operacje na zbiorach

In [None]:
def calculate_entropy(rows):
    if not rows:
        return 0.0

    label_counts = {}
    for row in rows:
        label = row[-1]
        label_counts[label] = label_counts.get(label, 0) + 1

    entropy = 0.0
    total = len(rows)
    for label in label_counts:
        prob = label_counts[label] / total
        if prob > 0:
            entropy -= prob * math.log(prob, 2)

    return entropy

def get_unique_values(rows, col_index):
    values = set()
    for row in rows:
        values.add(row[col_index])
    return list(values)

def split_dataset(rows, col_index, value):

    sub_dataset = []
    for row in rows:
        if row[col_index] == value:
            sub_dataset.append(row)
    return sub_dataset

Gain Ratio

In [None]:
def calculate_gain_ratio(rows, col_index, current_entropy):
    unique_vals = get_unique_values(rows, col_index)
    total_rows = len(rows)

    info_x_t = 0.0
    split_info = 0.0

    for value in unique_vals:
        sub_data = split_dataset(rows, col_index, value)
        prob = len(sub_data) / total_rows

        info_x_t += prob * calculate_entropy(sub_data)

        if prob > 0:
            split_info -= prob * math.log(prob, 2)

    gain = current_entropy - info_x_t

    if split_info == 0:
        return 0.0

    return gain / split_info

Struktura node'a

In [None]:
class Node:
    def __init__(self, attribute=None, prediction=None):
        self.attribute = attribute
        self.children = {}
        self.prediction = prediction

def get_majority_class(rows):
    counts = {}
    for row in rows:
        lbl = row[-1]
        counts[lbl] = counts.get(lbl, 0) + 1

    if not counts:
        return None
    sorted_counts = sorted(counts.items(), key=lambda item: item[1], reverse=True)
    return sorted_counts[0][0]

Rekurencyjna budowa struktury drzewa

In [None]:
def build_tree(rows, available_attributes):

    first_label = rows[0][-1]
    if all(row[-1] == first_label for row in rows):
        return Node(prediction=first_label)

    if not available_attributes:
        return Node(prediction=get_majority_class(rows))

    current_entropy = calculate_entropy(rows)

    best_gain_ratio = 0.0
    best_attribute = None

    for attr_idx in available_attributes:
        gr = calculate_gain_ratio(rows, attr_idx, current_entropy)
        if gr > best_gain_ratio:
            best_gain_ratio = gr
            best_attribute = attr_idx

    if best_attribute is None or best_gain_ratio == 0.0:
        return Node(prediction=get_majority_class(rows))

    node = Node(attribute=best_attribute)

    new_attributes = [a for a in available_attributes if a != best_attribute]

    unique_vals = get_unique_values(rows, best_attribute)
    for value in unique_vals:
        sub_dataset = split_dataset(rows, best_attribute, value)
        if not sub_dataset:
            node.children[value] = Node(prediction=get_majority_class(rows))
        else:
            node.children[value] = build_tree(sub_dataset, new_attributes)

    return node

Predykcja i wyświetlanie

In [None]:
def predict(node, row):
    if node.prediction is not None:
        return node.prediction

    val = row[node.attribute]

    if val in node.children:
        return predict(node.children[val], row)
    else:
        return None

def print_tree_structure(node, spacing=""):
    if node.prediction is not None:
        print(f"{spacing}Wynik (Liść): {node.prediction}")
        return

    print(f"{spacing}Atrybut [{node.attribute}] ?")
    for value, child in node.children.items():
        print(f"{spacing}  --> Gdy wartość == {value}:")
        print_tree_structure(child, spacing + "    ")

Uruchomienie

In [None]:
if dataset:
    attributes = list(range(len(dataset[0]) - 1))

    print("--- Rozpoczynam budowę drzewa ---")
    tree = build_tree(dataset, attributes)
    print("--- Budowa zakończona ---\n")

    print_tree_structure(tree)

    test_sample = dataset[0]
    features = test_sample[:-1]
    true_label = test_sample[-1]

    prediction = predict(tree, features)

    print(f"Dane wejściowe: {features}")
    print(f"Prawdziwa klasa: {true_label}")
    print(f"Predykcja modelu: {prediction}")

    if prediction == true_label:
        print("\nSUKCES: Model poprawnie sklasyfikował przykład.")
    else:
        print("\nPORAŻKA: Błędna klasyfikacja.")
else:
    print("Nie wczytano danych. Sprawdź plik CSV.")

--- Rozpoczynam budowę drzewa ---
--- Budowa zakończona ---

Atrybut [4] ?
  --> Gdy wartość == 0:
    Atrybut [0] ?
      --> Gdy wartość == 0:
        Atrybut [8] ?
          --> Gdy wartość == 0:
            Wynik (Liść): 1
          --> Gdy wartość == 1:
            Atrybut [2] ?
              --> Gdy wartość == 0:
                Atrybut [6] ?
                  --> Gdy wartość == 0:
                    Wynik (Liść): 1
                  --> Gdy wartość == 1:
                    Atrybut [1] ?
                      --> Gdy wartość == 0:
                        Wynik (Liść): 1
                      --> Gdy wartość == 1:
                        Wynik (Liść): 0
                      --> Gdy wartość == 2:
                        Wynik (Liść): 0
                  --> Gdy wartość == 2:
                    Wynik (Liść): 1
              --> Gdy wartość == 1:
                Atrybut [5] ?
                  --> Gdy wartość == 0:
                    Atrybut [3] ?
                      --> Gdy w