In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split


In [None]:
df = pd.read_csv('DecisionTreeDataset-1 .csv')
df

Unnamed: 0,Day,Temperature,Outlook,Humidity,Windy,Play Golf?
0,07-May,hot,sunny,high,0,0
1,07-Jun,hot,sunny,high,1,0
2,07-Jul,hot,overcast,high,0,1
3,07-Sep,cool,rain,normal,0,1
4,07-Oct,cool,overcast,normal,1,1
5,07-Dec,mild,sunny,high,0,0
6,Jul-14,cool,sunny,normal,0,1
7,Jul-15,mild,rain,normal,0,1
8,Jul-20,mild,sunny,normal,1,1
9,Jul-21,mild,overcast,high,1,1


In [None]:
df['Windy'] = df['Windy'].astype(int)
X = df.drop(columns=['Play Golf?'])
y = df['Play Golf?']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
class DecisionTree:
    def __init__(self):
        self.tree = {}

    def entropy(self, target_col):
        elements, counts = np.unique(target_col, return_counts=True)
        entropy = np.sum([(-counts[i] / np.sum(counts)) * np.log2(counts[i] / np.sum(counts)) for i in range(len(elements))])
        return entropy

    def information_gain(self, data, feature, target):
        total_entropy = self.entropy(data[target])
        vals, counts = np.unique(data[feature], return_counts=True)
        weighted_entropy = np.sum([(counts[i] / np.sum(counts)) * self.entropy(data[data[feature] == vals[i]][target]) for i in range(len(vals))])
        information_gain = total_entropy - weighted_entropy
        return information_gain

    def build_tree(self, data, features, target):
        if len(pd.unique(data[target])) == 1:
            return pd.unique(data[target])[0]

        if len(features) == 0:
            return pd.unique(data[target])[pd.argmax(pd.unique(data[target], return_counts=True)[1])]

        best_feature = max(features, key=lambda x: self.information_gain(data, x, target))
        tree = {best_feature: {}}

        features = [f for f in features if f != best_feature]

        for val in pd.unique(data[best_feature]):
            sub_data = data.where(data[best_feature] == val).dropna()
            subtree = self.build_tree(sub_data, features, target)
            tree[best_feature][val] = subtree

        return tree


In [None]:
dt = DecisionTree()
features = df.columns[1:-1]
target = 'Play Golf?'
dt.tree = dt.build_tree(df, features, target)

In [None]:
sample_input = {'Temperature': 'hot', 'Outlook': 'sunny', 'Humidity': 'high', 'Windy': 0}


In [None]:
def predict(input, tree):
    for key in input.keys():
        if key in tree.keys():
            try:
                subtree = tree[key][input[key]]
                if isinstance(subtree, dict):
                    return predict(input, subtree)
                else:
                    return subtree
            except:
                return "Unable to make a prediction."

In [None]:
print("Sample input:", sample_input)
print("Prediction:", predict(sample_input, dt.tree))

Sample input: {'Temperature': 'hot', 'Outlook': 'sunny', 'Humidity': 'high', 'Windy': 0}
Prediction: 0.0


In [None]:
def calculate_accuracy(X_test, y_test, tree):
    correct_predictions = 0
    for i in range(len(X_test)):
        input_data = X_test.iloc[i].to_dict()
        if y_test.iloc[i] == predict(input_data, tree):
            correct_predictions += 1
    accuracy = correct_predictions / len(X_test)
    return accuracy


In [None]:
accuracy = calculate_accuracy(X_test, y_test, dt.tree)
print("Accuracy:", accuracy)

Accuracy: 1.0
