In [2]:
# SYSTEM: Please ensure that the necessary libraries are installed
%pip install numpy
%pip install pandas
%pip install matplotlib
%pip install scikit-learn

Note: you may need to restart the kernel to use updated packages.
Collecting pandas
  Downloading pandas-2.3.2-cp313-cp313-win_amd64.whl.metadata (19 kB)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pandas-2.3.2-cp313-cp313-win_amd64.whl (11.0 MB)
   ---------------------------------------- 0.0/11.0 MB ? eta -:--:--
   ------ --------------------------------- 1.8/11.0 MB 14.4 MB/s eta 0:00:01
   ---------------------------------------- 11.0/11.0 MB 34.4 MB/s  0:00:00
Downloading pytz-2025.2-py2.py3-none-any.whl (509 kB)
Downloading tzdata-2025.2-py2.py3-none-any.whl (347 kB)
Installing collected packages: pytz, tzdata, pandas

   ---------------------------------------- 0/3 [pytz]
   ------------- -------------------------- 1/3 [tzdata]
   -------------------------- ------------- 2/3 [pandas]
   -----------------

In [3]:
## PREVIOUS: we've imported these previously and shouldn't have any issues doing it again ...
import numpy             as np
import pandas            as pd
import matplotlib.pyplot as plt

## NEW IMPORTS - importing classes from specific files ...
from sklearn.datasets        import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors       import KNeighborsClassifier
from sklearn.metrics         import accuracy_score

In [4]:
iris = load_iris(as_frame = True)
df = iris.frame.copy()

print("'df.head()' prints the first five rows:")
df.head()

'df.head()' prints the first five rows:


Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [5]:
print(df.shape)
print(df.columns)

(150, 5)
Index(['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)',
       'petal width (cm)', 'target'],
      dtype='object')


In [6]:
# NOTE: Cleaning up the column names, removing extra spaces and '(cm)' from the column names
df.columns = [name.replace(" (cm)", '')
                  .replace(' ', '_') for name in df.columns]

df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [7]:
print("Data Types:")
print(df.dtypes)

print("\nMissing values per column:")
print(df.isna().sum())

#NOTE: Check if there are any duplicated rows
print('\nNumber of duplicationed rows:', df.duplicated().sum())

Data Types:
sepal_length    float64
sepal_width     float64
petal_length    float64
petal_width     float64
target            int64
dtype: object

Missing values per column:
sepal_length    0
sepal_width     0
petal_length    0
petal_width     0
target          0
dtype: int64

Number of duplicationed rows: 1


In [8]:
#STEPs: separate data into TEST and TRAIN
#  X (Features) and Y (Target)

X = df.drop(columns = ["target"])
Y = df["target"]

#INFO: we need training data (80%) and test data (20%)
#      X_train, X_test, Y_train, Y_test

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 42, stratify = Y)

print('X_train.shape:', X_train.shape)
print('X_test.shape:', X_test.shape, end ='\n\n')

print('Y_train.shape:', Y_train.shape)
print('Y_test.shape:', Y_test.shape)

X_train.shape: (120, 4)
X_test.shape: (30, 4)

Y_train.shape: (120,)
Y_test.shape: (30,)


In [9]:
knn_quick = KNeighborsClassifier(n_neighbors = 5)

#Fit (learn) the model using the training data
knn_quick.fit(X_train, Y_train)

0,1,2
,n_neighbors,5
,weights,'uniform'
,algorithm,'auto'
,leaf_size,30
,p,2
,metric,'minkowski'
,metric_params,
,n_jobs,


In [10]:
#Pink an example from X-test
one_example = X_test.iloc[[0]]

print('one_example')
print(one_example)

print('one_example.shape')
print(one_example.shape)

#Ask model to predict class for this example
one_predict = knn_quick.predict(one_example)[0]
print('one_predict', one_predict)
print(int(one_predict), "label is: ", iris.target_names[int(one_predict)])

one_example
    sepal_length  sepal_width  petal_length  petal_width
38           4.4          3.0           1.3          0.2
one_example.shape
(1, 4)
one_predict 0
0 label is:  setosa


In [11]:
#Use our quick model to predict every row in the test set
test_predictions = knn_quick.predict(X_test)
test_accuracy    = accuracy_score(Y_test, test_predictions)

print(test_accuracy)

1.0
