# Iris classification
https://github.com/jwheeldon/test_ml.git

Machine learning test using the following techniques on iris dataset:
* Principal components analysis (PCA)
* Support vector machines (SVM)


In [12]:
# Import packages
from sklearn import datasets, svm, metrics
import numpy as np
import pandas as pd
import matplotlib as mpl

In [13]:
# Import data and define variables
iris = datasets.load_iris()
n_samples = len(iris.data)
data_and_classes = list(zip(iris.data, iris.target))
data_pd = pd.DataFrame(data_and_classes)

In [14]:
# Shuffle data and reshape
np.random.seed(10)
data_pd = data_pd.sample(frac=1)
data_pd = pd.concat([data_pd[0].apply(pd.Series), data_pd[1]], axis=1)
data_pd.columns = [1,2,3,4,5]

# Define training data and targets
training = data_pd[:][:n_samples//2]
training_data = training[[1,2,3,4]].values
training_target = training[5].values

Support vector machine to predict species class based on 4 iris features

In [15]:
# Linear support vector machine classifier
clf = svm.LinearSVC()

# Train model to fit training_data to training_target
clf.fit(training_data, training_target)

# Predict iris.targets and define expected vs predicted classifications
expected = data_pd[5][n_samples//2:].values
predicted = clf.predict(data_pd[[1,2,3,4]][n_samples//2:])

# Generate confusion matrix and classification report via metrics
metrics.confusion_matrix(expected,predicted)

array([[27,  0,  0],
       [ 0, 23,  0],
       [ 0,  3, 22]])