In [7]:
class DecisionNode:
    def __init__(self, feature_index=None, threshold=None, left=None, right=None, value=None):
        self.feature_index = feature_index
        self.threshold = threshold
        self.left = left
        self.right = right
        self.value = value

def build_tree(X, y, depth=0, max_depth=10):
    n_samples, n_features = X.shape
    unique_classes = np.unique(y)
    
    # Stopping criteria
    if len(unique_classes) == 1 or depth >= max_depth:
        leaf_value = unique_classes[0]
        return DecisionNode(value=leaf_value)
    
    best_feature, best_threshold = find_best_split(X, y, n_features)
    if best_feature is None:
        leaf_value = np.bincount(y).argmax()
        return DecisionNode(value=leaf_value)
    
    left_indices, right_indices = split_dataset(X[:, best_feature], best_threshold)
    left_subtree = build_tree(X[left_indices, :], y[left_indices], depth + 1, max_depth)
    right_subtree = build_tree(X[right_indices, :], y[right_indices], depth + 1, max_depth)
    
    return DecisionNode(best_feature, best_threshold, left_subtree, right_subtree)

def find_best_split(X, y, n_features):
    best_feature, best_threshold = None, None
    best_gini = float('inf')
    
    for feature_index in range(n_features):
        thresholds = np.unique(X[:, feature_index])
        for threshold in thresholds:
            gini = calculate_gini_index(y, X[:, feature_index], threshold)
            if gini < best_gini:
                best_gini = gini
                best_feature = feature_index
                best_threshold = threshold
    
    return best_feature, best_threshold

def calculate_gini_index(y, feature_values, threshold):
    left_indices = feature_values <= threshold
    right_indices = feature_values > threshold
    left_gini = gini(y[left_indices])
    right_gini = gini(y[right_indices])
    weighted_gini = (len(left_indices) / len(y)) * left_gini + (len(right_indices) / len(y)) * right_gini
    return weighted_gini

def gini(y):
    _, counts = np.unique(y, return_counts=True)
    probabilities = counts / len(y)
    return 1 - np.sum(probabilities ** 2)

def split_dataset(feature_values, threshold):
    left_indices = np.where(feature_values <= threshold)[0]
    right_indices = np.where(feature_values > threshold)[0]
    return left_indices, right_indices

def predict_tree(node, X):
    if node.value is not None:
        return node.value
    
    if X[node.feature_index] <= node.threshold:
        return predict_tree(node.left, X)
    else:
        return predict_tree(node.right, X)


In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the dataset from a CSV file
df = pd.read_csv('dataset.csv')

# Create a 'class' column based on the class columns
df['class'] = np.argmax(df[['class_0', 'class_1', 'class_2']].values, axis=1)

# Drop the individual class columns to create the feature set
X = df.drop(['class_0', 'class_1', 'class_2', 'class'], axis=1).values

# Extract the target variable
y = df['class'].values

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the decision tree model
tree = build_tree(X_train, y_train, max_depth=10)

# Predict function for the test set
def predict_tree_set(tree, X):
    return [predict_tree(tree, x) for x in X]

# Predict the classes on the test set
y_pred = predict_tree_set(tree, X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')


Accuracy: 100.00%


In [12]:
# Function to predict the class of a new tree based on user inputs
def predict_tree_class(narati, ratio, angel, tree):
    new_data = np.array([narati, ratio, angel])
    predicted_class = predict_tree(tree, new_data)
    return predicted_class

# Example: Predict the class for a new tree with specific features
narati = 30
ratio = 4
angel = 20
predicted_class = predict_tree_class(narati, ratio, angel, tree)
print(f'The predicted class for the tree with narati={narati}, ratio={ratio}, angel={angel} is: class_{predicted_class}')


The predicted class for the tree with narati=30, ratio=4, angel=20 is: class_1
