 **ADA Boosting**

In [235]:
import numpy as np
import pandas as pd

#Weak Classifier
class DecisionStump:
  def __init__(self):
    self.polarity = 1
    self.featureindex = None
    self.best_value = None
    self.learning_rate = None
  
  def predict_classification(self, feature_data):
    n_samples = feature_data.shape[0]
    column_data = feature_data[:, self.featureindex]
    # print(column_data)
    predictions = np.ones(n_samples)
    if self.polarity == 1:
      predictions[column_data <= self.best_value] = -1
    else:
      predictions[column_data > self.best_value] = -1
    return predictions


In [236]:
class AdaboostClassifer:


  def __init__(self, ntimes):
    self.ntimes = ntimes
    self.classifier_list = []


  def train_classifier(self, X, labels):
    n_samples, n_features = X.shape
    #weight intialisation
    weights = np.full(n_samples, (1 / n_samples))
    for _ in range(self.ntimes):
      classifyer = DecisionStump()
      minimum_error = float("inf")
      for feature_i in range(n_features):
        X_column = X[:, feature_i]
        best_values = np.unique(X_column)
        for best_value in best_values:
          polarity = 1
          predictions = np.ones(n_samples)
          predictions[X_column < best_value] = -1
          #get misclassified labels
          misclassified = weights[labels != predictions]
          error = sum(misclassified)

          if error > 0.5:
            error = 1 - error
            polarity = -1

          if error < minimum_error:
            classifyer.polarity = polarity
            classifyer.best_value = best_value
            classifyer.featureindex = feature_i
            minimum_error = error
      classifyer.learning_rate = 1/2 * np.log((1.0 - minimum_error + 1e-10) / (minimum_error + 1e-10))
      predictions = classifyer.predict_classification(X)
      #updating weights
      weights *= np.exp(-classifyer.learning_rate * labels * predictions)
      weights /= np.sum(weights)
      self.classifier_list.append(classifyer)


  def predict_classification(self, feature_data):
    classifier_pred = [classify.learning_rate * classify.predict_classification(feature_data) for classify in self.classifier_list]
    label_pred = np.sum(classifier_pred, axis=0)
    label_pred = np.sign(label_pred)
    return label_pred

In [237]:
def accuracy(labels, pred_labels):
  correct=0
  for i in range(len(labels)):
    if(labels[i]==pred_labels[i]):
      correct+=1
  return (correct/len(labels))*100

In [238]:
df=pd.read_csv('Assignement1-2b.csv')
feature_data = np.array(df.iloc[:, 0:3].values)
labels=np.array(df.iloc[:,-1]) 
# print(X)
labels[labels=='W']=0
labels[labels=='M']=1
labels[labels == 0] = -1
labels=labels.astype(dtype='float64')
# print(len(feature_data[0:90]))
for i in [10,25,50]:
  classifier = AdaboostClassifer(i)
  classifier.train_classifier(feature_data[0:90], labels[0:90])  
  labels_pred = classifier.predict_classification(feature_data[90:120])
  # print(y[90:120])
  # print(y_pred)
  accura = accuracy(labels[90:120], labels_pred) 
  error_rate=100-accura
  print('--------------------------------------------------')
  print(f"Accuracy for boosting {i} times is {accura}")
  print(f"Error rate for boosting {i} times is {error_rate}")
  print('---------------------------------------------------')

--------------------------------------------------
Accuracy for boosting 10 times is 70.0
Error rate for boosting 10 times is 30.0
---------------------------------------------------
--------------------------------------------------
Accuracy for boosting 25 times is 63.33333333333333
Error rate for boosting 25 times is 36.66666666666667
---------------------------------------------------
--------------------------------------------------
Accuracy for boosting 50 times is 70.0
Error rate for boosting 50 times is 30.0
---------------------------------------------------
