In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns 

In [2]:
from sklearn.datasets import make_classification

X,y = make_classification(
    n_samples=1000, ## 1000 datapoints
    n_features=3,    ## 3 Independent Features
    n_classes=2,    ## Output Feature Binary 
    n_redundant=1, 
    random_state=999
)

In [3]:
X

array([[-0.33504974,  0.02852654,  1.16193084],
       [-1.37746253, -0.4058213 ,  0.44359618],
       [-1.04520026, -0.72334759, -3.10470423],
       ...,
       [-0.75602574, -0.51816111, -2.20382324],
       [ 0.56066316, -0.07335845, -2.15660348],
       [-1.87521902, -1.11380394, -4.04620773]], shape=(1000, 3))

In [14]:
dataset = pd.DataFrame(X,columns=['Age','Salary','Size'])
dataset

Unnamed: 0,Age,Salary,Size
0,-0.335050,0.028527,1.161931
1,-1.377463,-0.405821,0.443596
2,-1.045200,-0.723348,-3.104704
3,2.085399,0.758149,0.519318
4,1.762419,0.822554,1.945117
...,...,...,...
995,-1.040534,-0.211906,1.119179
996,-1.615211,-0.505609,0.273764
997,-0.756026,-0.518161,-2.203823
998,0.560663,-0.073358,-2.156603


In [6]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.30,random_state=42)

In [13]:
X_train.shape,X_test.shape,y_train.shape,y_test.shape

((700, 3), (300, 3), (700,), (300,))

In [8]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)



In [9]:
X_train_scaled

array([[-0.72035848, -0.57568561,  0.11778199],
       [-0.07822988,  0.15786262,  0.68236819],
       [-1.00500985, -0.58934746,  0.81397139],
       ...,
       [-0.46129151, -0.2387724 ,  0.47002092],
       [ 1.20495523,  1.35125753,  0.98274034],
       [ 0.15484952, -0.21374024, -1.05070557]], shape=(700, 3))

In [11]:
y_train.shape

(700,)

### K Nearest Neighbour Classifier 

In [26]:
from sklearn.neighbors import KNeighborsClassifier

classifier = KNeighborsClassifier(n_neighbors=5,algorithm='auto',p=2)

classifier.fit(X_train,y_train)

In [27]:
y_pred = classifier.predict(X_test)

In [28]:
from sklearn.metrics import classification_report,confusion_matrix,r2_score,accuracy_score

accuracy = accuracy_score(y_test,y_pred)
report = classification_report(y_test,y_pred)

print("Classification report : ",report)
print("Accuracy : ",accuracy)

Classification report :                precision    recall  f1-score   support

           0       0.89      0.94      0.91       154
           1       0.93      0.88      0.90       146

    accuracy                           0.91       300
   macro avg       0.91      0.91      0.91       300
weighted avg       0.91      0.91      0.91       300

Accuracy :  0.9066666666666666


## K Nearest Neighbour Regressor 

In [58]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns 

In [59]:
from sklearn.datasets import make_regression

X,y = make_regression(n_samples=1000,n_features=2,random_state=42, noise=10)

In [60]:
X

array([[-0.16711808,  0.14671369],
       [-0.02090159,  0.11732738],
       [ 0.15041891,  0.364961  ],
       ...,
       [ 0.30263547, -0.75427585],
       [ 0.38193545,  0.43004165],
       [ 0.07736831, -0.8612842 ]], shape=(1000, 2))

In [61]:
y

array([-1.49969499e+01, -1.26780889e+01,  1.77754545e+01,  6.66146467e+00,
       -1.41955300e+01, -2.52448482e+01, -3.92316263e+01, -5.20180382e+01,
        5.76368853e+00, -5.01186029e+01,  3.68932300e+01, -5.73879093e+01,
       -1.43015904e+01, -3.14639243e+01,  3.73110627e+01,  1.56178696e+01,
       -6.66080007e+00, -4.89229382e+01, -9.58772599e+00,  8.53715394e+01,
        1.66537460e+01, -3.41293946e+01, -3.72378063e+01, -4.87281315e+01,
       -5.51790322e+01, -3.55366169e+01, -3.72747912e+00,  7.34218222e+01,
        1.01718119e+02, -1.90116475e+01, -3.43901341e+01, -9.62943086e+00,
        1.16658030e+01,  2.08905862e+01, -1.70900105e+01, -1.50447947e+01,
        4.68228521e+01, -1.89552845e+01,  5.71898896e+01, -5.48067724e+01,
       -5.16310769e+01, -8.08939793e+00,  6.06663511e+01,  3.55254095e+01,
        1.04233163e+01, -4.30586696e+01,  1.12998389e+00,  2.31342762e+01,
       -6.13582958e+00,  2.60353115e+01,  5.87540393e+01,  2.13374470e+01,
       -4.73477124e+01,  

In [62]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.30,random_state=42)
X_train.shape,X_test.shape

((700, 2), (300, 2))

In [70]:
from sklearn.neighbors import KNeighborsRegressor

regression = KNeighborsRegressor(n_neighbors=5,algorithm='kd_tree')

regression.fit(X_train,y_train)

In [71]:
y_pred = regression.predict(X_test)

In [72]:
from sklearn.metrics import accuracy_score, classification_report, mean_absolute_error,r2_score

mae = mean_absolute_error(y_test,y_pred)
score = r2_score(y_test,y_pred)

#print(f"Accuracy :{accuracy}")
#print(f"Classification :{report}")
print(f"Mean Absolute Error :{mae}")
print(f"R2 Score : {score}")

Mean Absolute Error :9.305447831862232
R2 Score : 0.9172710669776581
