In [2]:
#Author: Terry Su
#Date: July 16, 2021
#Purpose: Playing with python machine learning with iris flower species/propreties

#Typical machine learning procedure
#1. import the data
#2. clean the data
#3. split the data --> training set + test set
#4. create a model
#5. check the output
#6. improve

#1/2.
from sklearn.datasets import load_iris

iris = load_iris()

In [3]:
X = iris.data #a numpy array
y = iris.target

feature_names = iris.feature_names
target_names = iris.target_names

In [4]:
#3.
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.6) #60% train data, %40 test data

print(X_train.shape)
print(X_test.shape)

(60, 4)
(90, 4)


In [5]:
#4. (using K nearest neighbors algorithm to create the model)
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors = 3)
knn.fit(X_train, y_train)

y_predict = knn.predict(X_test)

In [6]:
#5.
from sklearn import metrics

print(metrics.accuracy_score(y_test, y_predict))

0.9666666666666667


In [9]:
sample = [[3,5,4,2], [2,3,5,4]] #predicting iris species using propreties of unidentified iris flowers 
#ideally we would also have sample_target = [[correct species 1], [correct species 2]...]

predictions = knn.predict(sample)
predict_species = [iris.target_names[j] for j in predictions]

print('predicted species: ', predict_species)

predicted species:  ['versicolor', 'virginica']


In [13]:
#saving our model (knn) as a joblib file

import joblib
joblib.dump(knn, 'mlbrain.joblib')

In [15]:
model = joblib.load('mlbrain.joblib')
model.predict(X_test)

array([1, 1, 2, 0, 0, 2, 0, 1, 1, 0, 1, 0, 2, 0, 2, 0, 2, 0, 2, 1, 2, 2,
       0, 2, 0, 2, 2, 0, 0, 2, 2, 0, 1, 0, 1, 1, 1, 2, 2, 0, 2, 1, 2, 2,
       2, 0, 0, 2, 2, 1, 1, 2, 0, 2, 0, 0, 0, 0, 2, 1, 1, 1, 2, 0, 2, 2,
       1, 1, 1, 0, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 0, 1, 2, 0, 0, 1, 1, 0,
       0, 2])