In [11]:
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier

In [2]:
df = pd.read_csv ('../data/Iris_Data.csv')

In [None]:
# We need to split our database into to two pieces.
# One of them will be our training set, the other one, our test set.
# We usually split into a range from 67% to 80%. Here,we're going to use 50 samples to test and 100 to training.

In [8]:
# Here we going to shuffle the database.
# We choose to keep the random state at a fixed value to always have the same shuffling result.
shuffled_df = df.sample (frac = 1, random_state = 12345)

In [9]:
# Split the database into training set and test set.
x_training_set = shuffled_df.iloc [:100, :-1].values
y_training_set = shuffled_df.iloc [:100, -1].values

x_test_set = shuffled_df.iloc [100:, :-1].values
y_test_set = shuffled_df.iloc [100:, -1].values

In [13]:
# Creating the classifier.
classifier = KNeighborsClassifier (n_neighbors = 10)
classifier = classifier.fit (x_training_set, y_training_set)

y_answer_training = classifier.predict (x_training_set)
y_answer_test = classifier.predict (x_test_set)

In [23]:
# Performance analysis.
print ("Performance with the test set.")
examples = len (y_test_set)
correct_answers = sum (y_answer_test == y_test_set)
incorrect_answers = sum (y_answer_test != y_test_set)
accuracy = (correct_answers / examples) * 100

print (f'Number of examples: {examples}')
print (f'Correct answers: {correct_answers}')
print (f'Incorrect answers: {incorrect_answers}')
print (f'Accuracy: {accuracy}%')

Number of examples: 50
Correct answers: 45
Incorrect answers: 5
Accuracy: 90.0%


In [24]:
print ("Performance with the training set.")
examples = len (y_training_set)
correct_answers = sum (y_answer_training == y_training_set)
incorrect_answers = sum (y_answer_training != y_training_set)
accuracy = (correct_answers / examples) * 100

print (f'Number of examples: {examples}')
print (f'Correct answers: {correct_answers}')
print (f'Incorrect answers: {incorrect_answers}')
print (f'Accuracy: {accuracy}%')


Performance with the training set.
Number of examples: 100
Correct answers: 97
Incorrect answers: 3
Accuracy: 97.0%


In [26]:
# Determining the best K.

for k in range (1, 15):
    classifier = KNeighborsClassifier (n_neighbors = k)
    classifier = classifier.fit (x_training_set, y_training_set)

    y_answer_training = classifier.predict (x_training_set)
    y_answer_test = classifier.predict (x_test_set)
    
    examples_training = len (y_training_set)
    correct_answers_training = sum (y_answer_training == y_training_set)
    incorrect_answers_training = sum (y_answer_training != y_training_set)
    accuracy_training = (correct_answers_training / examples_training) * 100
    
    examples_test = len (y_test_set)
    correct_answers_test = sum (y_answer_test == y_test_set)
    incorrect_answers_test = sum (y_answer_test != y_test_set)
    accuracy_test = (correct_answers_test / examples_test) * 100
    
    print (f'K: {k}\tAccuracy with the training set: {accuracy_training}\tAccuracy with the test set: {accuracy_test}')


K: 1	Accuracy with the training set: 100.0	Accuracy with the test set: 94.0
K: 2	Accuracy with the training set: 97.0	Accuracy with the test set: 90.0
K: 3	Accuracy with the training set: 99.0	Accuracy with the test set: 96.0
K: 4	Accuracy with the training set: 97.0	Accuracy with the test set: 96.0
K: 5	Accuracy with the training set: 97.0	Accuracy with the test set: 96.0
K: 6	Accuracy with the training set: 97.0	Accuracy with the test set: 90.0
K: 7	Accuracy with the training set: 97.0	Accuracy with the test set: 92.0
K: 8	Accuracy with the training set: 97.0	Accuracy with the test set: 90.0
K: 9	Accuracy with the training set: 98.0	Accuracy with the test set: 90.0
K: 10	Accuracy with the training set: 97.0	Accuracy with the test set: 90.0
K: 11	Accuracy with the training set: 97.0	Accuracy with the test set: 90.0
K: 12	Accuracy with the training set: 96.0	Accuracy with the test set: 90.0
K: 13	Accuracy with the training set: 96.0	Accuracy with the test set: 90.0
K: 14	Accuracy with 