In [6]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier

In [7]:
class WeakLearner:
    def __init__(self, model, i):
        self.__class = i
        self.__model = model
        self.miss_data = None
        self.error_rate = None
    
    def __sign(self, val):
        return 1 if val > 0 else -1
    
    def name(self):
        return self.__name
    
    def model(self):
        return self.__model
    
    def miss_classify(self, data, eval_data):
        self.miss_data = []
        y_pred = self.__model.predict(data)
        for i in range(len(y_pred)):
            if self.__sign(y_pred[i]) != self.__sign(eval_data[i]):
                self.miss_data.append(i)
        
    def calc_error_rate(self, w):
        self.error_rate = np.sum(w[self.miss_data])
    
    def calc_voting_power(self):
        self.__alpha = 1/2*np.log((1-self.error_rate)/self.error_rate)
        

In [8]:
def ShallowTree():
    return DecisionTreeClassifier(max_depth=2)

In [9]:
def classify(data, classification):
    return [1 if np.where(d == 1)[0][0] == classification else -1 for d in data]

# Local tests

In [10]:
import glob
from PIL import Image
import os
from sklearn.model_selection import train_test_split

IMAGE_DIR = "./data/data/data"

def load():
    file_list = glob.glob(IMAGE_DIR + "/*.jpg")
    X = []
    Y = []

    for fname in file_list:
        with Image.open(fname) as img:
            np_img = np.array(img).flatten()
        label = int(os.path.split(fname)[-1].split('.')[0].split('_')[3])-1

        X.append(np_img)
        tempy = np.zeros(15)
        tempy[label] = 1
        Y.append(tempy)
    X, Y = np.array(X), np.array(Y)
    return X, Y


In [11]:
# create test, train split
X, Y = load()
Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, train_size=0.8, random_state=2021)
Ytrain_classes = []
for i in range(Ytrain.shape[1]):
    Ytrain_0 = classify(Ytrain, i)
    Ytrain_classes.append(Ytrain_0)
    print(f"{Ytrain_0.count(1)} datapoints have classification {i}")

794 datapoints have classification 0
807 datapoints have classification 1
790 datapoints have classification 2
821 datapoints have classification 3
770 datapoints have classification 4
811 datapoints have classification 5
784 datapoints have classification 6
824 datapoints have classification 7
799 datapoints have classification 8
790 datapoints have classification 9
788 datapoints have classification 10
802 datapoints have classification 11
815 datapoints have classification 12
796 datapoints have classification 13
809 datapoints have classification 14


In [12]:
model = ShallowTree()
model.fit(Xtrain,Ytrain_classes[0])

DecisionTreeClassifier(max_depth=2)

In [13]:
WL = WeakLearner(model, 0)
WL.miss_classify(Xtrain, Ytrain_classes[0])
WL.calc_error_rate(np.array([1 for d in range(Xtrain.shape[0])]))
print(WL.error_rate)

679
