In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tree.base import DecisionTree
from metrics import *
import time

In [8]:


np.random.seed(42)
# Real Input and Real Output
times_fit=[]
times_predict=[]
N = [7,30,45]
P = [2,5,35]
for i in N:
    for j in P:
        X = pd.DataFrame(np.random.randn(i, j))
        y = pd.Series(np.random.randn(i))
        for criteria in ["entropy", "gini_index"]:
            tf1=time.time()
            tree = DecisionTree(criterion=criteria) # Split based on Inf. Gain
            tree.fit(X, y)  
            tf2=time.time()
            times_fit.append(tf2-tf1)
            tp1=time.time()
            y_hat = tree.predict(X)
            tp2=time.time()
            times_predict.append(tp2-tp1)
            tree.plot()
            print("Criteria :", criteria)
            print("RMSE: ", rmse(y_hat, y))
            print("MAE: ", mae(y_hat, y))

def plot_times(times, title, xlabel, ylabel):
    plt.figure(figsize=(10, 6))
    
    for criteria in ["entropy", "gini_index"]:
        subset = [t for t in times if t[2] == criteria]
        ns = [t[0] for t in subset]
        ps = [t[1] for t in subset]
        times_ = [t[3] for t in subset]
        
        plt.plot(ns, times_, label=f'{criteria} (varying N)')
        plt.plot(ps, times_, label=f'{criteria} (varying P)', linestyle='--')
    
    
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.legend()
    plt.grid(True)
    plt.show()

# Plot the results
plot_times(times_fit, "Decision Tree Training Time", "Number of Samples (N) / Features (P)", "Time (seconds)")
plot_times(times_predict, "Decision Tree Prediction Time", "Number of Samples (N) / Features (P)", "Time (seconds)")







1_ 0
1 1_2_ 1_1_
1_1_ 1
2 1_1_2_ 1_1_1_
1_1_2_ 2
3 1_1_2_2_ 1_1_2_1_
1_2_ 3
4 1_2_2_ 1_2_1_
1_2_2_ 4
5 1_2_2_2_ 1_2_2_1_
Criteria : entropy
RMSE:  1.2523943495841927
MAE:  1.1028104511505268
1_ 0
1 1_2_ 1_1_
1_1_ 1
2 1_1_2_ 1_1_1_
1_1_2_ 2
3 1_1_2_2_ 1_1_2_1_
1_2_ 3
4 1_2_2_ 1_2_1_
1_2_2_ 4
5 1_2_2_2_ 1_2_2_1_
Criteria : gini_index
RMSE:  1.2523943495841927
MAE:  1.1028104511505268
1_ 0
1 1_2_ 1_1_
1_1_ 1
2 1_1_2_ 1_1_1_
1_1_1_ 2
3 1_1_1_2_ 1_1_1_1_
1_1_2_ 3
4 1_1_2_2_ 1_1_2_1_
1_1_2_1_ 4
5 1_1_2_1_2_ 1_1_2_1_1_
Criteria : entropy
RMSE:  1.2129542009632865
MAE:  1.1588361134474499
1_ 0
1 1_2_ 1_1_
1_1_ 1
2 1_1_2_ 1_1_1_
1_1_1_ 2
3 1_1_1_2_ 1_1_1_1_
1_1_2_ 3
4 1_1_2_2_ 1_1_2_1_
1_1_2_1_ 4
5 1_1_2_1_2_ 1_1_2_1_1_
Criteria : gini_index
RMSE:  1.2129542009632865
MAE:  1.1588361134474499
1_ 0
1 1_2_ 1_1_
1_1_ 1
2 1_1_2_ 1_1_1_
1_1_1_ 2
3 1_1_1_2_ 1_1_1_1_
1_2_ 3
4 1_2_2_ 1_2_1_
1_2_2_ 4
5 1_2_2_2_ 1_2_2_1_
Criteria : entropy
RMSE:  0.6267492329345382
MAE:  0.5855071660457322
1_ 0
1 1_2_ 1_1

TypeError: 'float' object is not subscriptable

<Figure size 1000x600 with 0 Axes>

In [None]:
# Real Input and Discrete Output


times_fit=[]
times_predict=[]
N = [7,30,45]
P = [2,5,35]
for i in N:
    for j in P:
        X = pd.DataFrame(np.random.randn(i, j))
        y = pd.Series(np.random.randint(j, size=N), dtype="category")

        for criteria in ["information_gain", "gini_index"]:
            tf1=time.time()
            tree = DecisionTree(criterion=criteria)  # Split based on Inf. Gain
            tree.fit(X, y)
            tf2=time.time()
            times_fit.append(tf2-tf1)
            tp1=time.time()
            y_hat = tree.predict(X)
            tp2=time.time()
            times_predict.append(tp2-tp1)
            tree.plot()
            print("Criteria :", criteria)
            print("Accuracy: ", accuracy(y_hat, y))
            for cls in y.unique():
                print("Precision: ", precision(y_hat, y, cls))
                print("Recall: ", recall(y_hat, y, cls))

## plot func.

In [None]:
# Discrete Input and Discrete Output

times_fit=[]
times_predict=[]
N = [7,30,45]
P = [2,5,35]
for i in N:
    for j in P:
        X = pd.DataFrame({k: pd.Series(np.random.randint(j, size=N), dtype="category") for k in range(5)})
        y = pd.Series(np.random.randint(j, size=i), dtype="category")

        for criteria in ["information_gain", "gini_index"]:
            tf1=time.time()
            tree = DecisionTree(criterion=criteria)  # Split based on Inf. Gain
            tree.fit(X, y)
            tf2=time.time()
            times_fit.append(tf2-tf1)
            tp1=time.time()
            y_hat = tree.predict(X)
            tp2=time.time()
            times_predict.append(tp2-tp1)
            tree.plot()
            print("Criteria :", criteria)
            print("Accuracy: ", accuracy(y_hat, y))
            for cls in y.unique():
                print("Precision: ", precision(y_hat, y, cls))
                print("Recall: ", recall(y_hat, y, cls))

## Graph func.

In [None]:
# Discrete Input and Real Output

times_fit=[]
times_predict=[]
N = [7,30,45]
P = [2,5,35]
for i in N:
    for j in P:
        X = pd.DataFrame({k: pd.Series(np.random.randint(j, size=i), dtype="category") for k in range(5)})
        y = pd.Series(np.random.randn(i))

        for criteria in ["information_gain", "gini_index"]:
            tf1=time.time()
            tree = DecisionTree(criterion=criteria)  # Split based on Inf. Gain
            tree.fit(X, y)
            tf2=time.time()
            times_fit.append(tf2-tf1)
            tp1=time.time()
            y_hat = tree.predict(X)
            tp2=time.time()
            times_predict.append(tp2-tp1)
            tree.plot()
            print("Criteria :", criteria)
            print("RMSE: ", rmse(y_hat, y))
            print("MAE: ", mae(y_hat, y))


## Graph Func.