### Trying different simple algorithms on iris dataset

In [1]:
#import a dataset
from sklearn import datasets
iris = datasets.load_iris()

In [2]:
X = iris.data
Y = iris.target

In [3]:
print("Number of examples: ", len(X))

Number of examples:  150


<h4><center>Below you can see what each column means</center><h4>
![iris dataset](iris2.png)

### Defining a couple of algorithms used for predictions

In [5]:
#import a function dividing our data into training and testing sets
from sklearn.cross_validation import train_test_split
#import decision tree clasifier, kNN and SVM
from sklearn import tree, neighbors, svm
#importing metrics to assess the algorithm's accuracy
from sklearn.metrics import accuracy_score

In [6]:
def decision_tree(X, Y):
    """
    Inputs:
    -> X - feature array
    -> Y - labels vector
    Returns:
    -> accuracy - accuracy on testing set
    """
    #divide data
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = .5)
    #build Decision Tree Classifier and utilize on our training data
    my_classifier = tree.DecisionTreeClassifier()
    my_classifier.fit(X_train, y_train)
    #predict the labels of testing data
    predictions = my_classifier.predict(X_test)
    #work out the accuracy
    accuracy = accuracy_score(predictions, y_test)
    
    return accuracy

In [7]:
def kNN(X, Y):
    """
    Inputs:
    -> X - feature array
    -> Y - labels vector
    Returns:
    -> accuracy - accuracy on testing set
    """
    #divide data
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = .5)
    #build kNN Classifier and utilize in our training data
    my_classifier = neighbors.KNeighborsClassifier(n_neighbors = 4)
    my_classifier.fit(X_train, y_train)
    #predict the labels of testing data
    predictions = my_classifier.predict(X_test)
    #work out the accuracy
    accuracy = accuracy_score(predictions, y_test)
    
    return accuracy

In [8]:
def SVM(X, Y):
    """
    Inputs:
    -> X - feature array
    -> Y - labels vector
    Returns:
    -> accuracy - accuracy on testing set
    """
    #divide data
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = .5)
    #build kNN Classifier and utilize in our training data
    my_classifier = svm.SVC()
    my_classifier.fit(X_train, y_train)
    #predict the labels of testing data
    predictions = my_classifier.predict(X_test)
    #work out the accuracy
    accuracy = accuracy_score(predictions, y_test)
    
    return accuracy

### Building the main function

In [9]:
def model(X, Y, num_of_iter, algorithm):
    """
    Inputs:
    -> X - feature array
    -> Y - labels vector
    -> num_of_iter - specifies how many times perform learning
    -> algorithm - specifies what algorithm to use (among decision_tree, kNN and SVM)
    Returns:
    -> performance - mean accuracacy over every training cycle
    """
    #list for the algorithm's accuracy
    results = []
    
    if algorithm == "decision_tree":
        
        for i in range(num_of_iter):
            accuracy = decision_tree(X, Y)
            results.append(accuracy)
            
    elif algorithm == "kNN":
        
        for i in range(num_of_iter):
            accuracy = kNN(X, Y)
            results.append(accuracy) 
            
    elif algorithm == "SVM":
        
        for i in range(num_of_iter):
            accuracy = SVM(X, Y)
            results.append(accuracy) 
    else:
        return "Ops! It seems like we don't support the given algorithm!"
    
    performance = round(sum(results)/float(len(results)),4)
    
    return performance

### Now we can compare the algorithms' performance 

In [10]:
#we will perform learning 10,000 times to gain a good estimation of an algorithm's performance
num_of_iter = 10000

In [11]:
algorithm = "decision_tree"
decision_tree_performance = model(X, Y, num_of_iter, algorithm)

algorithm = "kNN"
kNN_performance = model(X, Y, num_of_iter, algorithm)

algorithm = "SVM"
SVM_performance = model(X, Y, num_of_iter, algorithm)

In [12]:
print("-----------------------------------------")
print("Training performed on", num_of_iter, "iterations")
print("-----------------------------------------")
print("Results:")
print("Decision Tree's Accuracy:", decision_tree_performance)
print("KNN's Accuracy:", kNN_performance)
print("SVM's Accuracy:", SVM_performance)
print("-----------------------------------------")

-----------------------------------------
Training performed on 10000 iterations
-----------------------------------------
Results:
Decision Tree's Accuracy: 0.9439
KNN's Accuracy: 0.9527
SVM's Accuracy: 0.9641
-----------------------------------------
