# Boost check

In [17]:
import numpy as np
from sklearn.metrics import accuracy_score

import data_loader
#import decision_stump_sol as decision_stump
#import boosting_sol as boosting
import decision_stump
import boosting


#################
# Toy example 1 #
#################
'''
Toy example of XOR

          ┃
    ○     ╋     ×
          ┃
          ┃
━━━━╋━━━━━╋━━━━━╋━━━
          ┃
          ┃
    ×     ╋     ○
          ┃

Given a simple set of decision stumps:
s in {1, -1}
b in {-2, -0.5, 0.5, 2}
d in {0, 1}

'''
# data
features_1, labels_1 = data_loader.toy_data_1()
# clfs
h_set = set()
s_set = {1, -1}
b_set = {-2, -0.5, 0.5, 2}
d_set = {0, 1}
for s in s_set:
    for b in b_set:
        for d in d_set:
            h_set.add(decision_stump.DecisionStump(s,b,d))
# boost
Ada_1 = boosting.AdaBoost(h_set, T=1)
Ada_1.train(features_1, labels_1)

# check
print('━━━━━━━━━━ Toy example 1 ━━━━━━━━━━')
print('This toy example checks the format. Any of the stump is correct.')
print('(Can you explain why?)')
print('Ada_1: s = {:01d}, b = {:.1f}, d = {:01d}'.format(
    Ada_1.clfs_picked[0].s, Ada_1.clfs_picked[0].b, Ada_1.clfs_picked[0].d))

if Ada_1.betas[0] == 0:
    print('Betas are correct')
else:
    print('▁▂▃▄▅▆▇█ Betas are not correct █▇▆▅▄▃▂▁')


#################
# Toy example 2 #
#################
'''
Toy example of another XOR (linearly transformed from toy example 1)
          
          ┃
          ×     
          ┃
          ┃
━━━━○━━━━━╋━━━━━○━━━
          ┃
          ┃
          ×     
          ┃

Given a simple set of decision stumps:
s in {1, -1}
b in {-2, -0.5, 0.5, 2}
d in {0, 1}

'''
# data
features_2, labels_2 = data_loader.toy_data_2()
# clfs
h_set = set()
s_set = {1, -1}
b_set = {-2, -0.5, 0.5, 2}
d_set = {0, 1}
for s in s_set:
    for b in b_set:
        for d in d_set:
            h_set.add(decision_stump.DecisionStump(s,b,d))
# boost
Ada_2_2 = boosting.AdaBoost(h_set, T=2)
Ada_2_2.train(features_2, labels_2)
Ada_2_3 = boosting.AdaBoost(h_set, T=3)
Ada_2_3.train(features_2, labels_2)

# check
print('━━━━━━━━━━ Toy example 2 ━━━━━━━━━━')
Ada_2_2_acc = accuracy_score(Ada_2_2.predict(features_2), labels_2)
Ada_2_3_acc = accuracy_score(Ada_2_3.predict(features_2), labels_2)
print('Ada:', Ada_2_2_acc, Ada_2_3_acc)

if Ada_2_2_acc == 0.75 and Ada_2_3_acc == 1:
    print('Correct training accuracies')
else:
    print('▁▂▃▄▅▆▇█ Incorrect training accuracies █▇▆▅▄▃▂▁')

━━━━━━━━━━ Toy example 1 ━━━━━━━━━━
This toy example checks the format. Any of the stump is correct.
(Can you explain why?)


IndexError: list index out of range

In [3]:
features_1

[[1.0, 1.0], [-1.0, 1.0], [-1.0, -1.0], [1.0, -1.0]]

In [4]:
labels_1

[1, -1, 1, -1]

In [5]:
features_2, labels_2 = data_loader.toy_data_2()

In [6]:
features_2

[[0.0, 1.414], [-1.414, 0.0], [0.0, -1.414], [1.414, 0.0]]

In [8]:
features_2[0]

[0.0, 1.414]

In [15]:
features_2[0][1]

1.414

In [9]:
len(features_2)

4

In [16]:
np.zeros(len(features_2))

array([0., 0., 0., 0.])

In [21]:
type(h_set)

set

In [22]:
print(h_set)

{<decision_stump.DecisionStump object at 0x7fd3282d0e10>, <decision_stump.DecisionStump object at 0x7fd3282d0e48>, <decision_stump.DecisionStump object at 0x7fd3282e1048>, <decision_stump.DecisionStump object at 0x7fd3282d0e80>, <decision_stump.DecisionStump object at 0x7fd3282e1080>, <decision_stump.DecisionStump object at 0x7fd3282d0eb8>, <decision_stump.DecisionStump object at 0x7fd3282e10b8>, <decision_stump.DecisionStump object at 0x7fd3282d0ef0>, <decision_stump.DecisionStump object at 0x7fd3282e10f0>, <decision_stump.DecisionStump object at 0x7fd3282d0f28>, <decision_stump.DecisionStump object at 0x7fd3282d0fd0>, <decision_stump.DecisionStump object at 0x7fd3282e1128>, <decision_stump.DecisionStump object at 0x7fd3282d0f60>, <decision_stump.DecisionStump object at 0x7fd3282d0f98>, <decision_stump.DecisionStump object at 0x7fd3282d0da0>, <decision_stump.DecisionStump object at 0x7fd3282d0dd8>}


In [55]:
for hs in h_set:
    print(hs)

<decision_stump.DecisionStump object at 0x7fd3282d0e10>
<decision_stump.DecisionStump object at 0x7fd3282d0e48>
<decision_stump.DecisionStump object at 0x7fd3282e1048>
<decision_stump.DecisionStump object at 0x7fd3282d0e80>
<decision_stump.DecisionStump object at 0x7fd3282e1080>
<decision_stump.DecisionStump object at 0x7fd3282d0eb8>
<decision_stump.DecisionStump object at 0x7fd3282e10b8>
<decision_stump.DecisionStump object at 0x7fd3282d0ef0>
<decision_stump.DecisionStump object at 0x7fd3282e10f0>
<decision_stump.DecisionStump object at 0x7fd3282d0f28>
<decision_stump.DecisionStump object at 0x7fd3282d0fd0>
<decision_stump.DecisionStump object at 0x7fd3282e1128>
<decision_stump.DecisionStump object at 0x7fd3282d0f60>
<decision_stump.DecisionStump object at 0x7fd3282d0f98>
<decision_stump.DecisionStump object at 0x7fd3282d0da0>
<decision_stump.DecisionStump object at 0x7fd3282d0dd8>


In [14]:
np.sign(-11)

-1

In [29]:
a = [1,2,3]
print(np.array(a) * a[0])
print(type(np.array(a) * a[0]))
print(sum(np.array(a) * a[0]))


[1 2 3]
<class 'numpy.ndarray'>
6


In [37]:
b = np.zeros((2,3))
b[1,:] = np.array(a)
b[0,:] = 2 * np.array(a)
print(b)
(np.sum(b,axis=0)).astype(int).tolist()

[[2. 4. 6.]
 [1. 2. 3.]]


[3, 6, 9]

In [41]:
print(features_2)
print(np.array(features_2)[:,0])

[[0.0, 1.414], [-1.414, 0.0], [0.0, -1.414], [1.414, 0.0]]
[ 0.    -1.414  0.     1.414]


In [44]:
np.array(features_2)[:,0] > -0.1

array([ True, False,  True,  True])

In [47]:
aboo = np.array(features_2)[:,0] > -0.1
b = aboo *2 -1
print(b.tolist())

[1, -1, 1, 1]


In [49]:
d = [1,4,4]
print(d[0])

1


In [None]:

class Boosting(Classifier):
  # Boosting from pre-defined classifiers
    def __init__(self, clfs: Set[Classifier], T=0):
        self.clfs = clfs      # set of weak classifiers to be considered
        self.num_clf = len(clfs)
        if T < 1:
            self.T = self.num_clf
        else:
            self.T = T
    
        self.clfs_picked = [] # list of classifiers h_t for t=0,...,T-1
        self.betas = []       # list of weights beta_t for t=0,...,T-1
        return

    @abstractmethod
    def train(self, features: List[List[float]], labels: List[int]):
        return

    def predict(self, features: List[List[float]]) -> List[int]:
        '''
        Inputs:
        - features: the features of all test examples
   
        Returns:
        - the prediction (-1 or +1) for each example (in a list)
        '''
        ########################################################
        # TODO: implement "predict"
        ########################################################
        for t in range(self.T):
            
        self.cl
        features