In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from astropy.table import Table, Column

In [2]:
iris = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header = None)

iris.tail()

#samples
X = iris.iloc[:,0:4].values
ITemp = iris.iloc[:,4:5].values

for i in range (0,150):
    if ITemp[i] == 'Iris-setosa':
        ITemp[i] = 0
    elif ITemp[i] == 'Iris-versicolor':
        ITemp[i] = 1
    else:
        ITemp[i] = 2
        
y = []

for i in range (0,150):
    y.append(ITemp[i][0])
      
#classification
y = np.asarray(y)  



In [3]:
# 50% training data and 50% test data
X_train, X_test, y_train, y_test = train_test_split(
            X,y, test_size=0.50, random_state=0)

maxDepth = 10
depth = np.zeros(maxDepth)

accuracyTree = np.zeros((maxDepth,2))
accuracyAda = np.zeros((maxDepth,2))

In [4]:
def AdaBoost(depthIndex):
    
    depth[depthIndex] = depthIndex + 1
    
    decisionTree = DecisionTreeClassifier(max_depth=depthIndex+1)
    adaBoost = AdaBoostClassifier(base_estimator=decisionTree)
    
    #Accuracy and error for Decision Tree
    decisionTree = decisionTree.fit(X_train, y_train)
    yTrainPred = decisionTree.predict(X_train)
    yTestPred = decisionTree.predict(X_test)
    accuracyTree[depthIndex,0] = accuracy_score(y_train, yTrainPred)
    accuracyTree[depthIndex,1] = accuracy_score(y_test, yTestPred)
    
    #Accuracy and error for AdaBoost classifier
    adaBoost = adaBoost.fit(X_train, y_train)
    yTrainPred = adaBoost.predict(X_train)
    yTestPred = adaBoost.predict(X_test)
    accuracyAda[depthIndex,0] = accuracy_score(y_train, yTrainPred)
    accuracyAda[depthIndex,1] = accuracy_score(y_test, yTestPred)


In [5]:
for i in range (0,10):
    AdaBoost(i)

In [6]:
tree = Table([depth,accuracyTree[:,0],accuracyTree[:,1]],
            names = ('Depth','Training Accuracy','Test Accuracy'))

ada = Table([depth,accuracyAda[:,0],accuracyAda[:,1]],
            names = ('Depth','Training Accuracy','Test Accuracy'))

In [7]:
print ("Decision Tree:")
tree


Decision Tree:


Depth,Training Accuracy,Test Accuracy
float64,float64,float64
1.0,0.733333333333,0.6
2.0,0.973333333333,0.893333333333
3.0,0.986666666667,0.96
4.0,1.0,0.96
5.0,1.0,0.96
6.0,1.0,0.96
7.0,1.0,0.96
8.0,1.0,0.96
9.0,1.0,0.96
10.0,1.0,0.96


In [8]:
print("Adaboost Classifier:")
ada

Adaboost Classifier:


Depth,Training Accuracy,Test Accuracy
float64,float64,float64
1.0,0.986666666667,0.96
2.0,1.0,0.96
3.0,1.0,0.96
4.0,1.0,0.96
5.0,1.0,0.96
6.0,1.0,0.96
7.0,1.0,0.96
8.0,1.0,0.96
9.0,1.0,0.96
10.0,1.0,0.96


In [9]:
'''

For the decision tree, I have depths varying from 1-10.

Comparing Results:

From Decision Tree:
    As the depth of the tree increases, both the training accuracy and test accuracy increases along with the depth
of the tree. The training accuracy converges at the depth of 4. The test accuracy starts at 0.6 and the highest it goes
is 0.96

For Adaboost Classifier:
    As the depth of the tree increases, the training accuracy increases as well. The training accuracy converges at 2. 
The test accuracy in this case seems to remain the same at 0.96 for all depths

'''

'\n\nFor the decision tree, I have depths varying from 1-10.\n\nComparing Results:\n\nFrom Decision Tree:\n    As the depth of the tree increases, both the training accuracy and test accuracy increases along with the depth\nof the tree. The training accuracy converges at the depth of 4. The test accuracy starts at 0.6 and the highest it goes\nis 0.96\n\nFor Adaboost Classifier:\n    As the depth of the tree increases, the training accuracy increases as well. The training accuracy converges at 2. \nThe test accuracy in this case seems to remain the same at 0.96 for all depths\n\n'