In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.metrics import classification_report,accuracy_score

In [2]:
import numpy as np

class Node:
    def __init__(self, feature=None, threshold=None, value=None, true_branch=None, false_branch=None):
        self.feature = feature          
        self.threshold = threshold      
        self.value = value              
        self.true_branch = true_branch  
        self.false_branch = false_branch  

class DecisionTree:
    def __init__(self, max_depth=None):
        self.max_depth = max_depth      
        self.root = None                

    def fit(self, X, y, depth=0):
        
        if depth == self.max_depth or len(np.unique(y)) == 1:
            return Node(value=np.bincount(y).argmax())

        num_samples, num_features = X.shape
        best_gini = float('inf')
        best_feature = None
        best_threshold = None

        for feature in range(num_features):
            thresholds = np.unique(X[:, feature])
            for threshold in thresholds:
                y_true = y[X[:, feature] <= threshold]
                y_false = y[X[:, feature] > threshold]

                if len(y_true) == 0 or len(y_false) == 0:
                    continue

                gini = self.calculate_gini(y_true, y_false)
                if gini < best_gini:
                    best_gini = gini
                    best_feature = feature
                    best_threshold = threshold

        if best_feature is None:
            return Node(value=np.bincount(y).argmax())

        true_indices = X[:, best_feature] <= best_threshold
        false_indices = ~true_indices

        true_branch = self.fit(X[true_indices], y[true_indices], depth + 1)
        false_branch = self.fit(X[false_indices], y[false_indices], depth + 1)

        return Node(best_feature, best_threshold, true_branch=true_branch, false_branch=false_branch)

    def calculate_gini(self, y_true, y_false):
        gini_true = 1.0 - sum((np.bincount(y_true) / len(y_true)) ** 2)
        gini_false = 1.0 - sum((np.bincount(y_false) / len(y_false)) ** 2)
        weighted_gini = (len(y_true) * gini_true + len(y_false) * gini_false) / (len(y_true) + len(y_false))
        return weighted_gini

    def predict(self, X):
        predictions = [self._predict(x, self.root) for x in X]
        return np.array(predictions)

    def _predict(self, x, node):
        if node.value is not None:
            return node.value

        if x[node.feature] <= node.threshold:
            return self._predict(x, node.true_branch)
        else:
            return self._predict(x, node.false_branch)

In [3]:
data = np.genfromtxt('Date_Fruit_Datasets.csv', delimiter=',', skip_header=1)

X = data[:, :-1]
y = data[:, -1].astype(int)

  y = data[:, -1].astype(int)


In [4]:
dt = DecisionTree(max_depth=2)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=21)

dt.root = dt.fit(X_train, y_train)

y_pred = dt.predict(X_test)

In [5]:
score = accuracy_score(y_test, y_pred)
print("Accuracy:", score * 100)

Accuracy: 100.0


In [6]:
report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       180

    accuracy                           1.00       180
   macro avg       1.00      1.00      1.00       180
weighted avg       1.00      1.00      1.00       180



In [2]:
import pandas as pd

# Load your dataset
df = pd.read_csv("Date_Fruit_Datasets.csv")

# Define a class to represent a node in the Decision Tree
class Node:
    def __init__(self, feature=None, value=None, result=None):
        self.feature, self.value, self.result = feature, value, result
        self.left, self.right = None, None

# Calculate Gini Impurity
def gini_impurity(labels):
    total = len(labels)
    return 1.0 - sum((labels.value_counts() / total) ** 2)

# Split dataset based on a feature and value
def split_dataset(X, y, feature, value):
    return X[X[feature] <= value], y[X[feature] <= value], X[X[feature] > value], y[X[feature] > value]

# Build the Decision Tree recursively
def build_tree(X, y, depth=0, max_depth=None):
    if len(set(y)) == 1:
        return Node(result=y.iloc[0])
    if len(y) == 0:
        return Node(result=y.value_counts().idxmax())
    if max_depth is not None and depth >= max_depth:
        return Node(result=y.value_counts().idxmax())
    
    best_gini, best_feature, best_value, left_X, left_y, right_X, right_y = 1.0, None, None, None, None, None, None
    
    for feature in X.columns:
        for value in X[feature].unique():
            l_X, l_y, r_X, r_y = split_dataset(X, y, feature, value)
            impurity = (len(l_y) * gini_impurity(l_y) + len(r_y) * gini_impurity(r_y)) / len(y)
            if impurity < best_gini:
                best_gini, best_feature, best_value, left_X, left_y, right_X, right_y = impurity, feature, value, l_X, l_y, r_X, r_y
    
    if best_gini == 1.0:
        return Node(result=y.value_counts().idxmax())
    
    node = Node(feature=best_feature, value=best_value)
    node.left = build_tree(left_X, left_y, depth + 1, max_depth)
    node.right = build_tree(right_X, right_y, depth + 1, max_depth)
    return node

# Make predictions using the Decision Tree
def predict_tree(node, sample):
    if node.result is not None:
        return node.result
    return predict_tree(node.left, sample) if sample[node.feature] <= node.value else predict_tree(node.right, sample)

# Split the dataset into training and testing sets
X_train, y_train = df.iloc[:80, :-1], df.iloc[:80, -1]
X_test, y_test = df.iloc[80:, :-1], df.iloc[80:, -1]

# Build the Decision Tree with a maximum depth of 5
tree = build_tree(X_train, y_train, max_depth=5)

# Make predictions for test data
y_pred = [predict_tree(tree, sample) for _, sample in X_test.iterrows()]

# Calculate accuracy
accuracy = (y_pred == y_test).mean()
print("Accuracy:", accuracy)


Accuracy: 0.08679706601466992
