In [204]:
import numpy as np

def train_threshold_function(T):
    T.sort(key=lambda x: x[0])
    best_threshold = None
    min_misclassification = len(T)
    
    for i in range(len(T) - 1):
        threshold = (T[i][0] + T[i + 1][0]) / 2
        misclassification = sum([1 for x, y in T if (x >= threshold) != y])
        
        if misclassification < min_misclassification:
            min_misclassification = misclassification
            best_threshold = threshold
            
    def classify(x):
        return int(x >= best_threshold)
    return classify

In [205]:

def train_multidimension_threshold_function(T):
    T.sort(key=lambda x: min(x[0]))
    T_projections = [min(x) for x,_ in T]
    best_threshold = None
    min_misclassification = len(T)
    
    for i in range(len(T) - 1):
        threshold = (T_projections[i] + T_projections[i + 1]) / 2
        
        misclassification = sum([1 for x,y in T if int(all(x >= threshold)) != y])
        
        if misclassification < min_misclassification:
            min_misclassification = misclassification
            best_threshold = threshold
    
    def classify(x):
        return int(all(np.array(x) >= best_threshold))
    return classify

In [206]:
def train_multiclass_threshold_function(T):
    # number of classes = number of dimensions according to the problem statement
    thresholds = [0]*len(T[0][0])
    
    for i in range(len(T[0][0])):
        thresholds[i] = train_multidimension_threshold_function([(x, y[i]) for x, y in T])

    def classify(x):
        # return [int(all(np.array(x) > threshold)) for threshold in thresholds]
        return [classify(x) for classify in thresholds]
    return classify

In [218]:
import pandas as pd
from sklearn.model_selection import train_test_split
data = pd.read_csv('iris.data', header=None)
data.columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
data['class no.'] = data['species'].astype('category').cat.codes
def func(x):
    y = [0]*3
    y[x] = 1
    return y
data.drop(['sepal_length'], inplace=True, axis=1)
X = data.values[:, :3]
Y = data['class no.'].apply(func)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)
data

Unnamed: 0,sepal_width,petal_length,petal_width,species,class no.
0,3.5,1.4,0.2,Iris-setosa,0
1,3.0,1.4,0.2,Iris-setosa,0
2,3.2,1.3,0.2,Iris-setosa,0
3,3.1,1.5,0.2,Iris-setosa,0
4,3.6,1.4,0.2,Iris-setosa,0
...,...,...,...,...,...
145,3.0,5.2,2.3,Iris-virginica,2
146,2.5,5.0,1.9,Iris-virginica,2
147,3.0,5.2,2.0,Iris-virginica,2
148,3.4,5.4,2.3,Iris-virginica,2


In [230]:
classify = train_multiclass_threshold_function(list(zip(X_train,Y_train)))

data['predicted class'] = [classify(x) for x in X]
Y_pred = data['predicted class'][len(X_train):]
data['all correct'] = [int(x == y) for x, y in zip(data['predicted class'],Y)]
data['somewhat correct'] = [int(x[y]) for x,y in zip(data['predicted class'],data['class no.'])]
print(f"{sum(data['all correct'][:len(X_train)])} all correct classifications out of {len(X_train)} samples of training data: {round(sum(data['all correct'][:len(X_train)])*100/len(X_train),2)}%")
print(f"{sum(data['somewhat correct'][:len(X_train)])} somewhat correct classifications out of {len(X_train)} samples of training data: {round(sum(data['somewhat correct'][:len(X_train)])*100/len(X_train),2)}%")
print(f"{sum(data['all correct'][len(X_train):])} all correct classifications out of {len(X_test)} samples of validation data: {round(sum(data['all correct'][len(X_train):])*100/len(X_test),2)}%")
print(f"{sum(data['somewhat correct'][len(X_train):])} somewhat correct classifications out of {len(X_test)} samples of validation data: {round(sum(data['somewhat correct'][len(X_train):])*100/len(X_test),2)}%")
data

49 all correct classifications out of 120 samples of training data: 40.83%
68 somewhat correct classifications out of 120 samples of training data: 56.67%
0 all correct classifications out of 30 samples of validation data: 0.0%
27 somewhat correct classifications out of 30 samples of validation data: 90.0%


Unnamed: 0,sepal_width,petal_length,petal_width,species,class no.,predicted class,all correct,somewhat correct
0,3.5,1.4,0.2,Iris-setosa,0,"[0, 0, 0]",0,0
1,3.0,1.4,0.2,Iris-setosa,0,"[0, 0, 0]",0,0
2,3.2,1.3,0.2,Iris-setosa,0,"[0, 0, 0]",0,0
3,3.1,1.5,0.2,Iris-setosa,0,"[0, 0, 0]",0,0
4,3.6,1.4,0.2,Iris-setosa,0,"[0, 0, 0]",0,0
...,...,...,...,...,...,...,...,...
145,3.0,5.2,2.3,Iris-virginica,2,"[0, 1, 1]",0,1
146,2.5,5.0,1.9,Iris-virginica,2,"[0, 1, 1]",0,1
147,3.0,5.2,2.0,Iris-virginica,2,"[0, 1, 1]",0,1
148,3.4,5.4,2.3,Iris-virginica,2,"[0, 1, 1]",0,1
