<a href="https://colab.research.google.com/github/mrasifimran/Pythoncode/blob/main/DecisionTree21_06.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split

In [None]:
# Load the dataset
df = pd.read_csv("/content/creditcard.csv")

In [None]:
df.isnull().any()

In [None]:
df.dropna(inplace = True)

In [None]:
# (Assuming the last column contains the target variable)
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Define the DecisionTreeClassifier with max_depth
class Node:
    def __init__(self, feature_index=None, threshold=None, value=None, left=None, right=None):
        self.feature_index = feature_index
        self.threshold = threshold
        self.value = value
        self.left = left
        self.right = right

In [None]:
class DecisionTreeClassifier:
    def __init__(self, max_depth=None):
        self.max_depth = max_depth

    def fit(self, X, y):
        self.n_classes = len(set(y))
        self.n_features = X.shape[1]
        self.tree = self._build_tree(X, y)

    def predict(self, X):
        return [self._predict(x, self.tree) for x in X]

    def _build_tree(self, X, y, depth=0):
        n_samples, n_features = X.shape
        n_labels = len(set(y))

        # Stopping criteria
        if depth == self.max_depth or n_labels == 1:
            value = self._most_common_label(y)
            return Node(value=value)

        best_feature, best_threshold = self._best_split(X, y)
        left_indices = X[:, best_feature] < best_threshold
        right_indices = ~left_indices

        # Split the data
        left_X, left_y = X[left_indices], y[left_indices]
        right_X, right_y = X[right_indices], y[right_indices]

        # Recursive construction of the tree
        left_node = self._build_tree(left_X, left_y, depth + 1)
        right_node = self._build_tree(right_X, right_y, depth + 1)

        return Node(feature_index=best_feature, threshold=best_threshold, left=left_node, right=right_node)

    def _best_split(self, X, y):
        best_gini = float('inf')
        best_feature = None
        best_threshold = None

        for feature in range(self.n_features):
            values = X[:, feature]
            thresholds = np.unique(values)

            for threshold in thresholds:
                left_indices = values < threshold
                right_indices = ~left_indices

                gini = self._gini_index(y[left_indices]) * np.sum(left_indices) / len(y) + \
                       self._gini_index(y[right_indices]) * np.sum(right_indices) / len(y)

                if gini < best_gini:
                    best_gini = gini
                    best_feature = feature
                    best_threshold = threshold

        return best_feature, best_threshold

    def _gini_index(self, y):
        if len(y) == 0:
            return 0

        proportions = np.array([np.sum(y == c) / len(y) for c in range(self.n_classes)])
        gini = 1 - np.sum(proportions ** 2)

        return gini

    def _most_common_label(self, y):
        labels, counts = np.unique(y, return_counts=True)
        most_common_label = labels[np.argmax(counts)]
        return most_common_label

    def _predict(self, x, node):
        if node.value is not None:
            return node.value

        if x[node.feature_index] < node.threshold:
            return self._predict(x, node.left)
        else:
            return self._predict(x, node.right)

In [None]:
# Instantiate and fit the decision tree classifier
tree = DecisionTreeClassifier(max_depth=5)
tree.fit(X_train, y_train)

# Predict on the testing set
y_pred = tree.predict(X_test)