In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris

In [6]:
data = load_iris()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target, name='target')

df = pd.concat([X, y], axis=1)
df.head(2)
# lets check the shape
print("DataFrame shape:", df.shape)

df.target.value_counts(normalize=True)


DataFrame shape: (150, 5)


target
0    0.333333
1    0.333333
2    0.333333
Name: proportion, dtype: float64

In [8]:
# Split the data into training and testing sets
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['target'])
print("Training set shape:", train_df.shape)
print("Testing set shape:", test_df.shape)

# Verify the distribution of target variable in both sets
print("Training set target distribution:\n", train_df['target'].value_counts(normalize=True))
print("Testing set target distribution:\n", test_df['target'].value_counts(normalize=True))

Training set shape: (120, 5)
Testing set shape: (30, 5)
Training set target distribution:
 target
0    0.333333
2    0.333333
1    0.333333
Name: proportion, dtype: float64
Testing set target distribution:
 target
0    0.333333
2    0.333333
1    0.333333
Name: proportion, dtype: float64


In [10]:
train_df.head(2)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
8,4.4,2.9,1.4,0.2,0
106,4.9,2.5,4.5,1.7,2


In [14]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
# Separate features and target variable
X_train = train_df.drop('target', axis=1)
y_train = train_df['target']
X_test = test_df.drop('target', axis=1)
y_test = test_df['target']
# Initialize the KNN classifier
knn = KNeighborsClassifier(n_neighbors=3)
# Fit the model
knn.fit(X_train, y_train)
# Make predictions
y_pred = knn.predict(X_test)
# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))  

Accuracy: 1.0


In [15]:
# save the model
import pickle
with open('knn_model.pkl', 'wb') as f:
    pickle.dump(knn, f)
