In [15]:
train_data = [
['medium', 'skiing', 'design', 'single', 'twenties', 'no', 'highRisk'],
['high', 'golf', 'trading', 'married', 'forties', 'yes', 'lowRisk'],
['low', 'speedway', 'transport', 'married', 'thirties', 'yes', 'medRisk'],
['medium', 'football', 'banking', 'single', 'thirties', 'yes', 'lowRisk'],
['high', 'flying', 'media', 'married', 'fifties', 'yes', 'highRisk'],
['low', 'football', 'security', 'single', 'twenties', 'no', 'medRisk'],
['medium', 'golf', 'media', 'single', 'thirties', 'yes', 'medRisk'],
['medium', 'golf', 'transport', 'married', 'forties', 'yes', 'lowRisk'],
['high', 'skiing', 'banking', 'single', 'thirties', 'yes', 'highRisk'],
['low', 'golf', 'unemployed', 'married', 'forties', 'yes', 'highRisk']
]

num_golf=sum(1 for example in train_data if example[1]=='golf')
ucond_golf=num_golf/ len(train_data)
print("Unconditional probability of golf is ",ucond_golf)
num_s_mid=sum(1 for example in train_data if example[3]=='single' and example[6]=='medRisk')
num_mid=sum(1 for example in train_data  if example[6]=='medRisk')
cond_p=num_s_mid/num_mid
print("conditional probability of single given midRisk is ",cond_p)

Unconditional probability of golf is  0.4
conditional probability of single given midRisk is  0.6666666666666666


In [18]:
# Node class for Decision Tree
class Node:
    def __init__(self, feature=None, threshold=None, label=None):
        self.feature = feature
        self.threshold = threshold
        self.label = label
        self.left = None
        self.right = None
# Function to calculate Gini Index
def gini_index(groups, classes):
    total_samples = sum([len(group) for group in groups])
    gini = 0.0
    for group in groups:
        group_size = float(len(group))
        if group_size == 0:
            continue
        score = 0.0
        for class_val in classes:
            p = [row[-1] for row in group].count(class_val) / group_size
            score += p * p
        gini += (1.0 - score) * (group_size / total_samples)
    return gini
# Function to split the dataset based on a feature and threshold
def split_dataset(dataset, feature, threshold):
    left, right = [], []
    for row in dataset:
        if row[feature] < threshold:
            left.append(row)
        else:
            right.append(row)
    return left, right
# Function to find the best split point for a dataset
def find_best_split(dataset):
    class_values = list(set(row[-1] for row in dataset))
    best_feature, best_threshold, best_gini, best_groups = None, None, float('inf'), None
    for feature in range(len(dataset[0]) - 1):
        for row in dataset:
            groups = split_dataset(dataset, feature, row[feature])
            gini = gini_index(groups, class_values)
            if gini < best_gini:
                best_feature, best_threshold, best_gini, best_groups = feature, row[feature], gini, groups
    return {'feature': best_feature, 'threshold': best_threshold, 'groups': best_groups}
# Function to create a terminal node with the most common class label
def create_terminal_node(group):
    class_labels = [row[-1] for row in group]
    return max(set(class_labels), key=class_labels.count)
# Recursive function to build the Decision Tree
def build_tree(node, max_depth, min_size, depth):
    left, right = node['groups']
    del(node['groups'])
# Check for no split
    if not left or not right:
        node['left'] = node['right'] = create_terminal_node(left + right)
        return
# Check for maximum depth
    if depth >= max_depth:
        node['left'], node['right'] = create_terminal_node(left), create_terminal_node(right)
        return
# Process left child
    if len(left) <= min_size:
        node['left'] = create_terminal_node(left)
    else:
        node['left'] = find_best_split(left)
        build_tree(node['left'], max_depth, min_size, depth + 1)
# Process right child
    if len(right) <= min_size:
        node['right'] = create_terminal_node(right)
    else:
        node['right'] = find_best_split(right)
        build_tree(node['right'], max_depth, min_size, depth + 1)
# Function to build the Decision Tree
def decision_tree(dataset, max_depth, min_size):
    root = find_best_split(dataset)
    build_tree(root, max_depth, min_size, 1)
    return root
# Function to make a prediction with the Decision Tree
def predict(node, row):
    if row[node['feature']] < node['threshold']:
        if isinstance(node['left'], dict):
            return predict(node['left'], row)
        else:
            return node['left']
    else:
        if isinstance(node['right'], dict):
            return predict(node['right'], row)
        else:
            return node['right']
# Example usage
dataset = [
[2.771244718, 1.784783929, 0],
[1.728571309, 1.169761413, 0],
[3.678319846, 2.81281357, 0],
[3.961043357, 2.61995032, 0],
[2.999208922, 2.209014212, 0],
[7.497545867, 3.162953546, 1],
[9.00220326, 3.339047188, 1],
[7.444542326, 0.476683375, 1],
[10.12493903, 3.234550982, 1],
[6.642287351, 3.319983761, 1]
]
tree = decision_tree(dataset, max_depth=3, min_size=1)
# Test the Decision Tree
test_data = [
[3.095607236, 1.783283623],
[8.675418651, 0.242820951],
[7.673756466, 3.508563011]
]
print("Test Results:")
for data in test_data:
    prediction = predict(tree, data)
    print(f"Input: {data}, Prediction: {prediction}")

Test Results:
Input: [3.095607236, 1.783283623], Prediction: 0
Input: [8.675418651, 0.242820951], Prediction: 1
Input: [7.673756466, 3.508563011], Prediction: 1
