In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
cd /content/drive/MyDrive/

/content/drive/MyDrive


In [None]:
import pandas as pd
data=pd.read_csv("play_tennis_data.csv", index_col=0)

In [None]:
!pip install MultiColumnLabelEncoder
from MultiColumnLabelEncoder import MultiColumnLabelEncoder
Mcle = MultiColumnLabelEncoder()
data = Mcle.fit_transform(data)


Collecting MultiColumnLabelEncoder
  Downloading MultiColumnLabelEncoder-1.1.3-py3-none-any.whl (14 kB)
Installing collected packages: MultiColumnLabelEncoder
Successfully installed MultiColumnLabelEncoder-1.1.3


In [None]:
target = data["play"]
features = data.drop('play', axis=1)
target=target.values.tolist()
features=features.values.tolist()
print(features)


[[0, 0, 0, 0], [0, 0, 0, 1], [1, 0, 0, 0], [2, 1, 0, 0], [2, 2, 1, 0], [2, 2, 1, 1], [1, 2, 1, 1], [0, 1, 0, 0], [0, 2, 1, 0], [2, 1, 1, 0], [0, 1, 1, 1], [1, 1, 0, 1], [1, 0, 1, 0], [2, 1, 0, 1]]


In [None]:
data.head()

Unnamed: 0,outlook,temparature,humidity,wind,play
1,0,0,0,0,0
2,0,0,0,1,0
3,1,0,0,0,1
4,2,1,0,0,1
5,2,2,1,0,1


In [None]:
import numpy as np

class tree_node:
    def __init__(self, gini, number_of_samples, number_of_samples_per_class, predicted_class):
        self.gini = gini
        self.number_of_samples = number_of_samples
        self.number_of_samples_per_class = number_of_samples_per_class
        self.predicted_class = predicted_class
        self.feature_index = 0
        self.condition = None
        self.left = None
        self.right = None

class DecisionTree:
    def __init__(self, max_depth=None):
        self.max_depth = max_depth

    def bootstraping_start(self, X, y):
        self.num_classes = len(np.unique(y))
        self.num_features = X.shape[1] #number of columns
        self.tree = self.construct_tree(X, y, depth=0)

    def construct_tree(self, X, y, depth):
        number_of_samples_per_class = [np.sum(y == i) for i in range(self.num_classes)]
        predicted_class = np.argmax(number_of_samples_per_class) #max value er index

        node = tree_node(gini=self._gini(y),number_of_samples=len(y), number_of_samples_per_class=number_of_samples_per_class,
            predicted_class=predicted_class,
        )

        if depth < self.max_depth:
            #print(self.max_depth)
            idx, thr = self.do_split(X, y)
            if idx is not None:
                indices_left = X[:, idx] == thr
                X_left, y_left = X[indices_left], y[indices_left]
                X_right, y_right = X[indices_left == False], y[indices_left == False] #to track which samples are used or not used humdity normal baad
                node.feature_index = idx
                node.condition = thr
                node.left = self.construct_tree(X_left, y_left, depth + 1)
                node.right = self.construct_tree(X_right, y_right, depth + 1)
        return node

    def _gini(self, y):
        m = len(y)
        class_frequencies = np.bincount(y, minlength=self.num_classes)
        if m == 0:
          m= np.nan
        gini_impurity = 1.0 - np.sum((class_frequencies / m) ** 2)
       # print(gini_impurity)
        return gini_impurity
    # def compare_gini(self,gini,best_gini):
    #   if self.gini < self.best_gini:
    #     best_gini = self.gini
    #     best_index = self.idx
    #     best_condition = self.thr


    def do_split(self, X, y):
        m, n = X.shape #keeping row and column
        if m <= 1:
            return None, None

        num_parent = [np.sum(y == c) for c in range(self.num_classes)]
        best_gini = 1.0 - sum((num / m) ** 2 for num in num_parent)
        best_index, best_condition = None, None

        for idx in range(n):
            unique_values = np.unique(X[:, idx])
            #print(unique_values)
            for thr in unique_values:
                indices_left = X[:, idx] == thr
                indices_right = ~indices_left #then not

                gini_left = self._gini(y[indices_left])
                gini_right = self._gini(y[indices_right])
                gini = (sum(indices_left) * gini_left + sum(indices_right) * gini_right) / m
                #self.compare_gini(gini,best_gini)
                if gini < best_gini:
                    best_gini = gini
                    best_index = idx
                    best_condition = thr
                # #print(best_index)
        return best_index, best_condition

    def predict(self, X):
        return [self._predict_tree(x, self.tree) for x in X]

    def _predict_tree(self, x, node):
        if node.left is None and node.right is None:
            return node.predicted_class
        if x[node.feature_index] == node.condition:
            return self._predict_tree(x, node.left)
        else:
            return self._predict_tree(x, node.right)

class RandomForest: #  create a tree
    def __init__(self, num_trees=10, max_depth=None):
        self.num_trees = num_trees
        self.max_depth = max_depth
        self.trees = []

    def bootstraping_start(self, X, y):
        for _ in range(self.num_trees):
            tree = DecisionTree(max_depth=self.max_depth)
            indices = np.random.choice(len(X), len(X), replace=True)
            X_subset = X[indices]
            y_subset = y[indices]
            tree.bootstraping_start(X_subset, y_subset)
            self.trees.append(tree)

    def predict(self, X):
        #tree_predictions = [tree.predict(X) for tree in self.trees]
        tree_predictions = []
        for tree in self.trees:
          prediction = tree.predict(X)
          tree_predictions.append(prediction)

        all_predictions = np.array(tree_predictions).T
        final_predictions = [np.bincount(row).argmax() for row in all_predictions] #bagging counting 0 ><== 1
        return final_predictions
    def accuracy(self, X_test, y_test):
        y_pred = self.predict(X_test)
        accuracy = np.mean(y_pred == y_test)
        return accuracy




In [None]:
data.head()

Unnamed: 0,outlook,temparature,humidity,wind,play
1,0,0,0,0,0
2,0,0,0,1,0
3,1,0,0,0,1
4,2,1,0,0,1
5,2,2,1,0,1


In [None]:
X = np.array(features)
y = np.array(target)
decision_tree_size=len(data.columns)-2
#print(decision_tree_size)
forest = RandomForest(num_trees=decision_tree_size, max_depth=100)
forest.bootstraping_start(X, y)

my_predict = np.array([[2,1,0,0]])
x_test = np.array([1])
predictions = forest.predict(my_predict)
accuracy=forest.accuracy(my_predict,x_test)
print("Accuracy:", accuracy)

if predictions[0]==1:
  print("YES")
else:
  print("NO")

Accuracy: 1.0
YES
