In [27]:
from math import log2

def most_common(list:list): 
    """Finds the most frequent element in a given list

    Args:
        list (list)

    Returns:
        _type_: The most common element
    """
    num = 0
    most_common = list[0]

    for element in list: 
        if list.count(element) > num: 
            most_common = element
            num = list.count(element)
    
    return most_common

def entropy(y:list) -> float: 
    """calculates entropy of the given list

    Args:
        y (list): 

    Returns:
        float: entropy
    """
    unique_values = list(set(y))
    length = len(y)
    entropy = 0

    for uv in unique_values: 
        entropy -= (unique_values.count(uv)/length) * log2(unique_values.count(uv)/length)

    return entropy


def find_best_feature_value(X : list, y : list) -> (tuple) : 
    """Are given two lists, one with features and one with values, and finds
    the best feature value for that set of data. 

    Args:
        X (list): 2d list. 
        y (list): 1d list

    Returns: 
        tuple: (the feature(column), the best value(split)) 
    """
    parent_entropy = entropy(y)
    best_information_gain = 0
    best_feature_value = None
    best_value = None

    num_features = len(X[1])


    for feature in range(num_features):         #Each column feature 
        for value in list(set(X[feature])):     #Each unique feature in that column
            list_evaluates_true = []
            list_evaluates_false = []

            for i in range(len(y)):             #Devides into two sets. 
                if X[i][feature] >= value: 
                    list_evaluates_true.append(y[i])
                else: 
                    list_evaluates_false.append(y[i])  
        
        avrage_entropy = ((len(list_evaluates_true)/len(y)) * entropy(list_evaluates_true)) 
        + ((len(list_evaluates_false)/len(y)) * entropy(list_evaluates_false))

        information_gain = parent_entropy - avrage_entropy


        if information_gain >= best_information_gain: 
            best_information_gain = information_gain
            best_feature_value = feature
            best_value = value


    return (best_feature_value, best_value)


In [28]:
class Node: 
    def __init__(self, feature_value = None, X = None, y = None, value = None):
        self.feature_value = feature_value
        self.X = X
        self.y = y
        self.left = None
        self.right = None
        self.value = value

    def add_decision(self, featureValue, value, trueNode, falseNode): 
        self.right = trueNode
        self.left = falseNode
        self.feature_value = featureValue
        self.value = value
    



In [29]:
def learn(X: list, y : list, impurity_measure='entropy') -> Tree: 
    if y == [y[0]]*len(y): 
        return Node(X = X, y = y, value=y[0]) 
            
    elif X== X[0]*len(y):
        return Node(X = X, y = y, value=most_common(y))       
    
    else: 
        feature_value, value = find_best_feature_value(X, y)
        X_true = []
        X_false = []
        y_true = []
        y_false = []
        
        for i in range(len(y)): 
            if X[i][feature_value] >= value: 
                X_true.append(X[i])
                y_true.append(y[i])
            else: 
                X_false.append(X[i])
                y_false.append(y[i])

        #Recursivly adding nodes
        true_node = learn(X_true, y_true, impurity_measure='entropy')
        false_node = learn(X_false, y_false, impurity_measure='entropy')

        decision_node = Node(X = X, y = y)
        decision_node.add_decision(feature_value, value, true_node, false_node)   

        return decision_node




In [31]:
import pandas as pd

file = open("magic04.data", "r")
data = [line.strip().split(',') for line in file.readlines()]
file.close()



df = pd.DataFrame(data)
dfX = df.iloc[:, :10]


y = list(df[10])
newX = []

X = dfX.values.tolist()
for line in X: 
    line = [eval(el) for el in line]
    newX.append(line)
   


learn(newX, y, impurity_measure='entropy')

IndexError: list index out of range