# Drzewo decyzyjne

In [20]:
from sklearn.model_selection import cross_val_score, cross_validate
from sklearn.tree import DecisionTreeClassifier

In [6]:
import csv
import numpy as np

class IrisData:
    def __init__(self, data, target, labels):
        self.data = data
        self.target = target
        self.labels = labels

def load_iris_data():
    data = []
    labels = []
    target = []

    with open('./iris/iris.data', newline='') as csvfile:
        data_reader = csv.reader(csvfile, delimiter=',')
        for row in data_reader:
            if len(row) != 0:
                if row[-1] not in labels:
                    labels.append(row[-1])
                
                target.append(labels.index(row[-1]))
                data.append([float(x) for x in row[:-1]])

    data = np.array(data)
    target = np.array(target)
    labels = np.array(labels)
    
    return IrisData(data, target, labels)

In [8]:
iris_data = load_iris_data()
print(iris_data.labels)

['Iris-setosa' 'Iris-versicolor' 'Iris-virginica']


In [9]:
clf = DecisionTreeClassifier(random_state=0)
print((iris_data.data[10]))
#print(iris.target)
cross_val_score(clf, iris_data.data, iris_data.target, cv=10)

[5.4 3.7 1.5 0.2]


array([1.        , 0.93333333, 1.        , 0.93333333, 0.93333333,
       0.86666667, 0.93333333, 1.        , 1.        , 1.        ])

In [48]:
RANDOM_STATES = [6, 5, 2024]
FOLDS_N = 5
SCORES_DICT = {'accuracy': 'accuracy',
                'precision': 'precision_macro', 
                'recall': 'recall_macro',
                'f1': 'f1_macro'}

res = {score_name: [] for score_name in SCORES_DICT.keys()}

for random_state in RANDOM_STATES:
    clf = DecisionTreeClassifier(random_state=random_state)
    results = cross_validate(clf, iris_data.data, iris_data.target, scoring=SCORES_DICT,
                              cv=FOLDS_N)
    for score_name, score_values in results.items():
        if 'test_' in score_name:
            res[score_name[5:]].append(score_values)

avg_scores = {}
std_dev_scores = {}

for score_name, score_values in res.items():
    scores_array = np.concatenate(score_values)
    avg_scores[score_name] = np.mean(scores_array)
    std_dev_scores[score_name] = np.std(scores_array)

for score_name in SCORES_DICT.keys():
    print(f"Average {score_name.capitalize()}: {avg_scores[score_name]}")
    print(f"Standard Deviation of {score_name.capitalize()}: {std_dev_scores[score_name]}")

{'accuracy': [], 'precision': [], 'recall': [], 'f1': []}
Average Accuracy: 0.9577777777777778
Standard Deviation of Accuracy: 0.033259176771323916
Average Precision: 0.9598653198653199
Standard Deviation of Precision: 0.032895054256567316
Average Recall: 0.9577777777777778
Standard Deviation of Recall: 0.03325917677132392
Average F1: 0.9576830966304649
Standard Deviation of F1: 0.033334371371323786
