In [None]:
import math
import csv

def load_csv(filename):
    lines = csv.reader(open(filename, "r"))
    dataset = list(lines)
    headers = dataset.pop(0)
    return dataset, headers

class Node:
    def __init__(self, attribute):
        self.attribute = attribute
        self.children = []
        self.answer = ""

def sub_tables(data, col, delete):
    dic = {}
    coldata = [row[col] for row in data]
    attr = list(set(coldata))
    counts = [0] * len(attr)
    r = len(data)
    c = len(data[0])
    
    for y in range(r):
        for x in range(len(attr)):
            if data[y][col] == attr[x]:
                counts[x] += 1

    for x in range(len(attr)):
        dic[attr[x]] = [[0 for i in range(c)] for j in range(counts[x])]
    
    pos = [0] * len(attr)
    for y in range(r):
        for x in range(len(attr)):
            if data[y][col] == attr[x]:
                if delete:
                    new_row = data[y][:col] + data[y][col+1:]
                else:
                    new_row = data[y]
                dic[attr[x]][pos[x]] = new_row
                pos[x] += 1

    return attr, dic

def entropy(S):
    attr = list(set(S))
    if len(attr) <= 1:
        return 0
    counts = [0.0, 0.0]
    for i in range(2):
        counts[i] = sum([1 for x in S if attr[i] == x]) / (len(S) * 1.0)
    sums = 0
    for cnt in counts:
        if cnt != 0:
            sums -= cnt * math.log(cnt, 2)
    return sums

def compute_gain(data, col):
    attr, dic = sub_tables(data, col, delete=False)
    total_size = len(data)
    total_entropy = entropy([row[-1] for row in data])
    
    for x in range(len(attr)):
        subset = dic[attr[x]]
        ratio = len(subset) / (total_size * 1.0)
        sub_entropy = entropy([row[-1] for row in subset])
        total_entropy -= ratio * sub_entropy

    return total_entropy

def build_tree(data, features):
    lastcol = [row[-1] for row in data]
    if len(set(lastcol)) == 1:
        node = Node("")
        node.answer = lastcol[0]
        return node

    n = len(data[0]) - 1
    gains = [0] * n
    for col in range(n):
        gains[col] = compute_gain(data, col)

    split = gains.index(max(gains))
    node = Node(features[split])

    fea = features[:split] + features[split+1:]
    attr, dic = sub_tables(data, split, delete=True)
    
    for x in range(len(attr)):
        child = build_tree(dic[attr[x]], fea)
        node.children.append((attr[x], child))

    return node

def print_tree(node, level=0):
    if node.answer != "":
        print("  " * level + "=>", node.answer)
        return
    print("  " * level + node.attribute)
    for value, child in node.children:
        print("  " * (level+1) + f"({value})")
        print_tree(child, level + 2)

def classify(node, x_test, features):
    while node.answer == "":
        pos = features.index(node.attribute)
        value = x_test[pos]
        found = False
        for val, child in node.children:
            if val == value:
                node = child
                found = True
                break
        if not found:
            return "Unknown"
    return node.answer

# ---------- Main Program ----------

dataset, features = load_csv("D:\\MLT_LAB\\Decision_Tree_Algorithm\\Training_Dataset.csv")
node1 = build_tree(dataset, features)

print("The decision tree for the dataset using ID3 algorithm is:")
print_tree(node1, 0)

testdata, features = load_csv("D:\\MLT_LAB\\Decision_Tree_Algorithm\\TestDataset.csv")
for xtest in testdata:
    print("\nThe test instance:", xtest)
    print("The label for test instance:", classify(node1, xtest, features))
