In [16]:
import numpy as np

def train_threshold_function(T):
    T.sort(key=lambda x: x[0])
    best_threshold = None
    min_misclassification = len(T)
    
    for i in range(len(T) - 1):
        threshold = (T[i][0] + T[i + 1][0]) / 2
        misclassification = sum([1 for x, y in T if (x >= threshold) != y])
        
        if misclassification < min_misclassification:
            min_misclassification = misclassification
            best_threshold = threshold
            
    def classify(x):
        return int(x >= best_threshold)
    return classify

In [17]:

def projection(x):
    return np.dot(x, np.ones(len(x)))/np.sqrt(len(x))
    # return np.dot(x, np.ones(len(x)))/np.linalg.norm(x)

def train_multidimension_threshold_function(T):
    print(T)
    T.sort(key=lambda x: projection(x[0]))
    T_projections = [projection(x) for x,_ in T]
    best_threshold = None
    min_misclassification = len(T)
    
    for i in range(len(T) - 1):
        threshold = (T_projections[i][0] + T_projections[i + 1][0]) / 2
        misclassification = sum([1 for x,y in T if all(x >= threshold != y)])
        
        if misclassification < min_misclassification:
            min_misclassification = misclassification
            best_threshold = threshold
        
    def classify(x):
        return int(all(x >= best_threshold))
    return classify

In [18]:
def train_multiclass_threshold_function(T):
    # number of classes = number of dimensions according to the problem statement
    thresholds = [0]*len(T[0][0])
    
    for i in range(len(T[0][0])):
        thresholds[i] = train_multidimension_threshold_function([(x, y[i]) for x, y in T])
        
    def classify(x):
        return [int(all(x > threshold)) for threshold in thresholds]    
    return classify

In [19]:
import pandas as pd
data = pd.read_csv('iris.data', header=None)
data.columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class']
data['class'] = data['class'].astype('category').cat.codes
def func(x):
    y = [0]*3
    y[x] = 1
    return y
data['class'] = data['class'].apply(func)
data.drop(['sepal_length'], inplace=True, axis=1)
data

Unnamed: 0,sepal_width,petal_length,petal_width,class
0,3.5,1.4,0.2,"[1, 0, 0]"
1,3.0,1.4,0.2,"[1, 0, 0]"
2,3.2,1.3,0.2,"[1, 0, 0]"
3,3.1,1.5,0.2,"[1, 0, 0]"
4,3.6,1.4,0.2,"[1, 0, 0]"
...,...,...,...,...
145,3.0,5.2,2.3,"[0, 0, 1]"
146,2.5,5.0,1.9,"[0, 0, 1]"
147,3.0,5.2,2.0,"[0, 0, 1]"
148,3.4,5.4,2.3,"[0, 0, 1]"


In [20]:
classify = train_multiclass_threshold_function([(list(x[:-1]), list(x[-1])) for x in data.values])
data['predicted class'] = [classify(x[:-1]) for x in data.values]
data

[([3.5, 1.4, 0.2], 1), ([3.0, 1.4, 0.2], 1), ([3.2, 1.3, 0.2], 1), ([3.1, 1.5, 0.2], 1), ([3.6, 1.4, 0.2], 1), ([3.9, 1.7, 0.4], 1), ([3.4, 1.4, 0.3], 1), ([3.4, 1.5, 0.2], 1), ([2.9, 1.4, 0.2], 1), ([3.1, 1.5, 0.1], 1), ([3.7, 1.5, 0.2], 1), ([3.4, 1.6, 0.2], 1), ([3.0, 1.4, 0.1], 1), ([3.0, 1.1, 0.1], 1), ([4.0, 1.2, 0.2], 1), ([4.4, 1.5, 0.4], 1), ([3.9, 1.3, 0.4], 1), ([3.5, 1.4, 0.3], 1), ([3.8, 1.7, 0.3], 1), ([3.8, 1.5, 0.3], 1), ([3.4, 1.7, 0.2], 1), ([3.7, 1.5, 0.4], 1), ([3.6, 1.0, 0.2], 1), ([3.3, 1.7, 0.5], 1), ([3.4, 1.9, 0.2], 1), ([3.0, 1.6, 0.2], 1), ([3.4, 1.6, 0.4], 1), ([3.5, 1.5, 0.2], 1), ([3.4, 1.4, 0.2], 1), ([3.2, 1.6, 0.2], 1), ([3.1, 1.6, 0.2], 1), ([3.4, 1.5, 0.4], 1), ([4.1, 1.5, 0.1], 1), ([4.2, 1.4, 0.2], 1), ([3.1, 1.5, 0.1], 1), ([3.2, 1.2, 0.2], 1), ([3.5, 1.3, 0.2], 1), ([3.1, 1.5, 0.1], 1), ([3.0, 1.3, 0.2], 1), ([3.4, 1.5, 0.2], 1), ([3.5, 1.3, 0.3], 1), ([2.3, 1.3, 0.3], 1), ([3.2, 1.3, 0.2], 1), ([3.5, 1.6, 0.6], 1), ([3.8, 1.9, 0.4], 1), ([3.0, 1.

IndexError: invalid index to scalar variable.