In [73]:
import numpy as np
import pandas as pd

# Input data

In [74]:
df = pd.DataFrame({'pearson':[1, 2, 3, 4, 5, 6],
                   'age':[18, 20, 35, 50, 55, 57],
                   'buy':['no', 'no', 'yes','yes', 'yes', 'yes']
                   })
df

Unnamed: 0,pearson,age,buy
0,1,18,no
1,2,20,no
2,3,35,yes
3,4,50,yes
4,5,55,yes
5,6,57,yes


# Calculate intial weights

In [75]:
df['weights'] = 1/len(df)
df

Unnamed: 0,pearson,age,buy,weights
0,1,18,no,0.166667
1,2,20,no,0.166667
2,3,35,yes,0.166667
3,4,50,yes,0.166667
4,5,55,yes,0.166667
5,6,57,yes,0.166667


# First tree

### Find the best split point

In [76]:
''' 
Normaly the ideal split point is done for all possibilities using Gini impurity.
We want to demostrate the AdaBoost algorithm so we choose a split point manually.

'''

' \nNormaly the ideal split point is done for all possibilities using Gini impurity.\nWe want to demostrate the AdaBoost algorithm so we choose a split point manually.\n\n'

In [77]:
# Let's say we skip Gini--impurity to calculate the best split point and we choose that the best split point is
split_point1 = 52

### Predict with the best split point

In [78]:
df['pred-1'] = np.where(df['age'] > split_point1, 'yes', 'no')
df

Unnamed: 0,pearson,age,buy,weights,pred-1
0,1,18,no,0.166667,no
1,2,20,no,0.166667,no
2,3,35,yes,0.166667,no
3,4,50,yes,0.166667,no
4,5,55,yes,0.166667,yes
5,6,57,yes,0.166667,yes


### Calculate error

<img src="pics/error.png" style="width: 50%;"/>

In [79]:
# Calculate error - where the prediction is diffrent from the reality
error = df['weights'].where(df['buy'] != df['pred-1']).sum()
error

0.3333333333333333

### Calculate importance of the tree

<img src="pics/importance-new.png" style="width: 30%;"/>

In [80]:
lr=1

alpha1 = lr*0.5*np.log((1-error)/error)
alpha1

0.34657359027997275

### Calculate new weights for wrong guesses

In [81]:
''' 
Just weights' of the wrong guesses are corrected - more weight is put on them

'''

" \nJust weights' of the wrong guesses are corrected - more weight is put on them\n\n"

In [82]:
# Calculate new weights
df['weights_new_1'] = np.where(df['buy'] != df['pred-1'], 
                                  df['weights'] * np.exp(alpha1),
                                  df['weights']
                                  )
df

Unnamed: 0,pearson,age,buy,weights,pred-1,weights_new_1
0,1,18,no,0.166667,no,0.166667
1,2,20,no,0.166667,no,0.166667
2,3,35,yes,0.166667,no,0.235702
3,4,50,yes,0.166667,no,0.235702
4,5,55,yes,0.166667,yes,0.166667
5,6,57,yes,0.166667,yes,0.166667


### Calculate normalized weights

<img src="pics/w_norm.png" style="width: 30%;"/>

In [83]:
df['weights_norm_1'] = df['weights_new_1']/df['weights_new_1'].sum()
df

Unnamed: 0,pearson,age,buy,weights,pred-1,weights_new_1,weights_norm_1
0,1,18,no,0.166667,no,0.166667,0.146447
1,2,20,no,0.166667,no,0.166667,0.146447
2,3,35,yes,0.166667,no,0.235702,0.207107
3,4,50,yes,0.166667,no,0.235702,0.207107
4,5,55,yes,0.166667,yes,0.166667,0.146447
5,6,57,yes,0.166667,yes,0.166667,0.146447


# Second tree

### Best split point

In [84]:
''' 
This time let's assume the correct best split point 

'''

" \nThis time let's assume the correct best split point \n\n"

In [85]:
split_point2 = 40

### Predict with the best split point

In [86]:
df['pred-2'] = np.where(df['age'] > split_point2, 'yes', 'no')
df

Unnamed: 0,pearson,age,buy,weights,pred-1,weights_new_1,weights_norm_1,pred-2
0,1,18,no,0.166667,no,0.166667,0.146447,no
1,2,20,no,0.166667,no,0.166667,0.146447,no
2,3,35,yes,0.166667,no,0.235702,0.207107,no
3,4,50,yes,0.166667,no,0.235702,0.207107,yes
4,5,55,yes,0.166667,yes,0.166667,0.146447,yes
5,6,57,yes,0.166667,yes,0.166667,0.146447,yes


### Calculate error

In [87]:
# Calculate error - where the prediction is diffrent from the reality
error = df['weights_norm_1'].where(df['buy'] != df['pred-2']).sum()
error

0.20710678118654752

### Calculate importance of the tree

In [88]:
lr=1

alpha2 = lr*0.5*np.log((1-error)/error)
alpha2

0.6712270232267631

### Calculate new weights for wrong guesses

In [89]:
# Calculate new weights
df['weights_new_2'] = np.where(df['buy'] != df['pred-2'], 
                                  df['weights'] * np.exp(alpha2),
                                  df['weights']
                                  )
df

Unnamed: 0,pearson,age,buy,weights,pred-1,weights_new_1,weights_norm_1,pred-2,weights_new_2
0,1,18,no,0.166667,no,0.166667,0.146447,no,0.166667
1,2,20,no,0.166667,no,0.166667,0.146447,no,0.166667
2,3,35,yes,0.166667,no,0.235702,0.207107,no,0.326106
3,4,50,yes,0.166667,no,0.235702,0.207107,yes,0.166667
4,5,55,yes,0.166667,yes,0.166667,0.146447,yes,0.166667
5,6,57,yes,0.166667,yes,0.166667,0.146447,yes,0.166667


# Predict on unseen data

### Unseen data

In [90]:
X_unseen = {'age': 70}  

### Predict on unseen data

In [92]:
# Tree 1 Prediction
h1 = 1 if X_unseen['age'] > split_point1 else -1  

# Tree 2 Prediction
h2 = 1 if X_unseen['age'] > split_point2 else -1  

print(h1)
print(h2)

1
1


### Weighted sum of predictions

In [100]:
print(f'alpha1={round(alpha1,2)}')
print(f'alpha2={round(alpha2,2)}')

alpha1=0.35
alpha2=0.67


In [93]:
# Compute weighted sum of predictions
F_x = alpha1 * h1 + alpha2 * h2
F_x

1.017800613506736

### Final classification

<img src="pics/sign.png" style="width: 60%;"/>

<img src="pics/final_final.png" style="width: 60%;"/>

In [101]:
# Final classification
y_pred = np.sign(F_x)

print(f"Final Prediction: {y_pred}")

Final Prediction: 1.0
