In [187]:
from sklearn.datasets import load_iris
from ID3 import ID3
import pandas as pd
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np
from pprint import pprint
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt

In [188]:
data = load_iris()
X , y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
X_val, _, y_val, _ = train_test_split(X, y, test_size=0.4)
X_train, X_test, y_train, y_test = np.array(X_train), np.array(X_test), np.array(y_train), np.array(y_test)

In [189]:
%%time
tree_entropy = ID3(gain_type = 'entropy', min_samples_leaf = 4, max_depth=10)
tree_entropy.fit(X_train, y_train)
y_pred = tree_entropy.predict(X_test)
print(f"Точность entropy: {accuracy_score(y_test, y_pred)}")

new_one = tree_entropy.pruning(X_val, y_val)
tree_entropy.dec_tree = new_one
y_pred = tree_entropy.predict(X_test)
print(f"Accuracy (Entropy Criterion): {accuracy_score(y_test, y_pred)}")

Точность entropy: 0.9473684210526315
Accuracy (Entropy Criterion): 0.9736842105263158
CPU times: user 15.9 ms, sys: 1.12 ms, total: 17 ms
Wall time: 16.3 ms


# Classification

In [190]:
star = pd.read_csv ('https://github.com/YBIFoundation/Dataset/raw/main/Stars.csv')

In [191]:
star.replace({'Spectral Class':{'M':0, 'A':1, 'B':1, 'F':1, 'O':1, 'K':1, 'G':1 }}, inplace=True)
star.replace({'Star color':{ 'Red':0, 'Yellow':1, 'White':2, 'White ': 2, 'Blue ':3, 'Blue':3 }}, inplace=True)
y = star['Spectral Class']
X = star[['Temperature (K)', 'Luminosity (L/Lo)', 'Radius (R/Ro)',
       'Absolute magnitude (Mv)']]

In [192]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = 0.8, random_state = 200)
X_val, _, y_val, _ = train_test_split(X, y, test_size=0.5, random_state=200)

# Создание пропусков

In [193]:
X_train, X_test, y_train, y_test  = np.array(X_train),  np.array(X_test),  np.array(y_train),  np.array(y_test )
X_val, y_val = np.array(X_val), np.array(y_val)

In [194]:
num_none = 2  
shape_1 = np.random.choice(len(X_train), size=num_none, replace=True)
shape_2 = np.random.choice(len(X_train[1]), size=num_none, replace=True)

In [195]:
X_train[shape_1, shape_2] = None

## Обучение 

### Entropy

In [196]:
%%time
tree_entropy = ID3(gain_type = 'entropy', min_samples_leaf = 4, max_depth=10)
tree_entropy.fit(X_train, y_train)
y_pred = tree_entropy.predict(X_test)
print(f"Точность entropy: {accuracy_score(y_test, y_pred)}")

Точность entropy: 1.0
CPU times: user 33.1 ms, sys: 1.21 ms, total: 34.3 ms
Wall time: 33.8 ms


In [197]:
%%time
id_tree = DecisionTreeClassifier(criterion='entropy', random_state=42)
id_tree.fit(X_train, y_train)

y_pred = id_tree.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Точность entropy: {accuracy:.2f}")

Точность entropy: 1.00
CPU times: user 1.22 ms, sys: 771 µs, total: 2 ms
Wall time: 1.37 ms


### Прунинг

In [198]:
%%time
new_one = tree_entropy.pruning(X_val, y_val)
tree_entropy.dec_tree = new_one
y_pred = tree_entropy.predict(X_test)
print(f"Accuracy (Entropy Criterion): {accuracy_score(y_test, y_pred)}")

Accuracy (Entropy Criterion): 1.0
CPU times: user 776 µs, sys: 492 µs, total: 1.27 ms
Wall time: 921 µs


# Regression

In [199]:
star = pd.read_csv ('boston.csv')
y = star['TAX']
X = star[star.columns[~(star.columns == 'TAX')]]

In [200]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = 0.8, random_state = 200)
X_val, _ , y_val, _ = train_test_split(X, y, test_size=0.5, random_state=200)
X_train, X_test, y_train, y_test  = np.array(X_train),  np.array(X_test),  np.array(y_train),  np.array(y_test )
X_val, y_val = np.array(X_val), np.array(y_val)
num_none = 2
shape_1 = np.random.choice(len(X_train), size=num_none, replace=True)
shape_2 = np.random.choice(len(X_train[1]), size=num_none, replace=True)
X_train[shape_1, shape_2] = None

In [201]:
%%time
regressor = DecisionTreeRegressor(random_state=42, max_depth=300,  criterion='squared_error')
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Mean Absolute Error (MAE): {mae:.2f}")

Mean Squared Error (MSE): 1967.08
Mean Absolute Error (MAE): 21.11
CPU times: user 4.47 ms, sys: 2.09 ms, total: 6.56 ms
Wall time: 6.42 ms


In [202]:
%%time
tree_entropy = ID3(ID3_type='regression', max_depth=300)
tree_entropy.fit(X_train, y_train)
y_pred = tree_entropy.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Mean Absolute Error (MAE): {mae:.2f}")

Mean Squared Error (MSE): 550.44
Mean Absolute Error (MAE): 5.13
CPU times: user 277 ms, sys: 2.97 ms, total: 280 ms
Wall time: 284 ms


In [203]:
%%time
new_tree = tree_entropy.pruning(X_val, y_val)
tree_entropy.dec_tree = new_tree
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Mean Absolute Error (MAE): {mae:.2f}")

Mean Squared Error (MSE): 550.44
Mean Absolute Error (MAE): 5.13
CPU times: user 12.6 ms, sys: 1.66 ms, total: 14.3 ms
Wall time: 13.2 ms
