In [1]:
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder,StandardScaler
from sklearn.neighbors import KNeighborsClassifier,KNeighborsRegressor
from sklearn.metrics import confusion_matrix,classification_report,mean_squared_error,r2_score

# **KNN Classifier**

In [2]:
iris = load_iris()

In [3]:
X,y = iris.data, iris.target

In [4]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [5]:
knn_clf = KNeighborsClassifier()

In [6]:
knn_clf.fit(X_train,y_train)

In [7]:
y_pred_clf = knn_clf.predict(X_test)
print("Predict: ",y_pred_clf)

Predict:  [1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0]


In [8]:
matrix = confusion_matrix(y_test,y_pred_clf)
report = classification_report(y_test,y_pred_clf)

In [9]:
print("Matrix: ",matrix)
print("Report: ",report)

Matrix:  [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
Report:                precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



# **KNN Regression**

In [10]:
tips = sns.load_dataset("tips")
tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [11]:
tips.duplicated().sum()

np.int64(1)

In [12]:
tips.drop_duplicates()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.00,Female,Yes,Sat,Dinner,2
241,22.67,2.00,Male,Yes,Sat,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2


In [13]:
tips_encoded = pd.get_dummies(tips, columns=['sex', 'smoker', 'day', 'time'], drop_first=True)
display(tips_encoded.head())

Unnamed: 0,total_bill,tip,size,sex_Female,smoker_No,day_Fri,day_Sat,day_Sun,time_Dinner
0,16.99,1.01,2,True,True,False,False,True,True
1,10.34,1.66,3,False,True,False,False,True,True
2,21.01,3.5,3,False,True,False,False,True,True
3,23.68,3.31,2,False,True,False,False,True,True
4,24.59,3.61,4,True,True,False,False,True,True


In [14]:
knn_reg = KNeighborsRegressor()

In [15]:
knn_reg.fit(X_train,y_train)

In [16]:
y_pred_reg = knn_reg.predict(X_test)
print("Predict: ",y_pred_reg)

Predict:  [1.  0.  2.  1.  1.  0.  1.  2.  1.4 1.  1.8 0.  0.  0.  0.  1.  2.  1.
 1.  2.  0.  1.8 0.  2.  2.  2.  2.  2.  0.  0. ]


In [17]:
mse = mean_squared_error(y_test,y_pred_reg)
rmse = np.sqrt(mse)
r2 = r2_score(y_test,y_pred_reg)

In [18]:
print(f"MSE: {mse}\nRMSE: {rmse}\nR2 Score: {r2}")

MSE: 0.007999999999999997
RMSE: 0.08944271909999157
R2 Score: 0.9885532591414944
