# Q1. Write a Python code to implement the KNN classifier algorithm on load_iris dataset in sklearn.datasets.

In [26]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier

classifier = KNeighborsClassifier(n_neighbors = 5,
                                  algorithm = 'auto')

data = load_iris()
x = pd.DataFrame(data.data, columns = data.feature_names)
y = data.target
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size = 0.2, random_state = 42)


In [27]:
classifier.fit(x_train, y_train)

In [28]:
y_pred = classifier.predict(x_test)

In [29]:
y_pred

array([1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2,
       0, 2, 2, 2, 2, 2, 0, 0])

In [30]:
y_test

array([1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2,
       0, 2, 2, 2, 2, 2, 0, 0])

# Q2. Write a Python code to implement the KNN regressor algorithm on load_boston dataset in sklearn.datasets.

In [104]:
import pandas as pd
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

scaler = StandardScaler()

regressor = KNeighborsRegressor(n_neighbors = 5,
                               weights = 'uniform',
                               algorithm = 'auto')

df = pd.read_csv("HousingData.csv")
df.dropna(axis = 0, inplace = True)

df.rename({'MEDV':'Price'}, inplace = True, axis = 1)
x = df[['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX','PTRATIO', 'B', 'LSTAT']]
y = df['Price']

x_train, x_test, y_train, y_test = train_test_split(x, y , test_size = 0.2, random_state = 42)

transformed_x_train = scaler.fit_transform(x_train)
transformed_x_test = scaler.fit_transform(x_test)

regressor.fit(transformed_x_train, y_train)

# Q3. Write a Python code snippet to find the optimal value of K for the KNN classifier algorithm using cross-validation on load_iris dataset in sklearn.datasets.

In [26]:
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score,precision_score, recall_score, f1_score, confusion_matrix
from sklearn.datasets import load_iris
import warnings

warnings.filterwarnings("ignore")

iris = load_iris()


x = pd.DataFrame(iris.data, columns = iris.feature_names)
y = pd.DataFrame(iris.target, columns = ['species'])

x_train, x_test, y_train, y_test = train_test_split(x,y,test_size = 0.2, random_state = 42)

parameters = {'n_neighbors': range(1, 21)}  # Search from K=1 to K=20

# Create a grid search using cross-validation
grid_search = GridSearchCV(estimator = classifier,
                           param_grid = parameters,
                           cv = 5)

grid_search.fit(x,y)

best_param = grid_search.best_params_['n_neighbors']

classifier = KNeighborsClassifier(n_neighbors = best_param,
                                  algorithm = 'auto')
classifier.fit(x_train, y_train)
y_pred = classifier.predict(x_test)
print(accuracy_score(y_test, y_pred))
print(precision_score(y_test, y_pred, average = 'weighted'))
print(recall_score(y_test, y_pred, average = 'weighted'))
print(f1_score(y_test, y_pred, average = 'weighted'))
print( confusion_matrix(y_test, y_pred))

1.0
1.0
1.0
1.0
[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]


# Q4. Implement the KNN regressor algorithm with feature scaling on load_boston dataset in sklearn.datasets.

In [69]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

boston = pd.read_csv("HousingData.csv")
boston.isnull().sum()

CRIM       20
ZN         20
INDUS      20
CHAS       20
NOX         0
RM          0
AGE        20
DIS         0
RAD         0
TAX         0
PTRATIO     0
B           0
LSTAT      20
MEDV        0
dtype: int64

In [80]:
boston.dropna(axis = 0, inplace = True)
boston.rename({'MEDV' : 'Price'}, inplace = True, axis = 1)

x = boston[['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX','PTRATIO', 'B', 'LSTAT']]
y = boston['Price']

x_train, x_test, y_train, y_test = train_test_split(x,y, test_size = 0.25, random_state = 42)
x_test_transformed = scaler.fit_transform(x_test)
x_train_transformed = scaler.fit_transform(x_train)

transformed_df_test = pd.DataFrame(x_test_transformed, columns = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX','PTRATIO', 'B', 'LSTAT'])
transformed_df_train = pd.DataFrame(x_train_transformed, columns = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX','PTRATIO', 'B', 'LSTAT'])

In [79]:
transformed_df_train

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,-0.288670,-0.482424,-0.446827,-0.26968,-0.164136,-0.979779,1.003925,0.005211,-0.662693,-0.610462,1.122034,0.234453,1.042751
1,-0.407971,-0.482424,0.241447,-0.26968,-1.027864,-0.029474,-0.553306,0.583367,-0.548569,-0.080539,0.045535,0.336972,-0.104457
2,-0.410462,3.550968,-1.425674,-0.26968,-1.318624,1.037107,-1.900100,1.838035,-0.776817,-0.057246,-0.750138,0.446479,-1.132714
3,-0.391161,0.048286,-0.486451,-0.26968,-0.283861,0.178674,0.870649,1.216857,-0.548569,-0.587168,-1.592615,0.400799,0.967416
4,-0.314955,-0.482424,-0.446827,-0.26968,-0.164136,-0.630951,1.070563,0.146541,-0.662693,-0.610462,1.122034,0.421866,0.892081
...,...,...,...,...,...,...,...,...,...,...,...,...,...
290,-0.408557,-0.482424,-0.982478,-0.26968,-0.925243,0.195900,-0.753220,0.471795,-0.776817,-0.959861,-0.048074,0.446479,-0.463950
291,-0.381126,-0.482424,1.571035,-0.26968,0.571316,0.256192,1.028476,-0.938607,-0.662693,0.146570,1.215642,0.446479,0.298652
292,-0.410499,-0.482424,-0.997154,-0.26968,-0.985105,-0.342414,-0.735684,2.009266,-0.776817,-0.348412,0.092339,0.329046,-0.343678
293,0.517495,-0.482424,1.014839,-0.26968,1.332423,-0.096943,1.024969,-0.725114,1.619787,1.480110,0.747599,0.446479,0.660789


# Q5. Write a Python code snippet to implement the KNN classifier algorithm with weighted voting on load_iris dataset in sklearn.datasets.

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score
iris = load_iris()

x = pd.DataFrame(iris.data,columns = iris.feature_names)
y = iris.target

def custom_weight(distance):
    epslion = 1e-6
    return 1/(distance + epslion)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 42)
classifier = KNeighborsClassifier(n_neighbors = 5,
                                 weights =custom_weight)

classifier.fit(x_train, y_train)
y_pred = classifier.predict(x_test)
print(accuracy_score(y_test, y_pred))

1.0


# Q6. Implement a function to standardise the features before applying KNN classifier.

In [63]:
import numpy as np
from sklearn.preprocessing import StandardScaler

standard = StandardScaler()

def stand(df):
    standard.fit(df)
    transformed_df = pd.DataFrame(standard.transform(df))
    return transformed_df
    
arr = np.random.normal(loc = 0, scale = 1, size = 100)
print(arr)
df = pd.DataFrame(arr)
stand(df)

[ 0.32218742  1.05084413 -0.19902478 -0.9971476  -0.4461895  -1.56551222
 -2.33595869  1.08313602 -1.28859446 -1.51146927 -1.18055171 -0.08697272
 -0.62797765  0.8016418   0.96476762 -0.05431935  0.48085384 -0.10146884
  1.90632881 -0.52708439 -1.51839898 -0.86154257  0.63241702 -0.15192546
 -0.28931252  0.49148562 -0.58100288 -1.18597105  1.22606183  0.5158356
  0.43972545  1.28147663 -1.34508506 -0.2908104   0.29740623 -0.12180542
 -2.90428456  1.61781301  2.18018797 -1.42800659  1.41436684 -0.26837063
  2.32073716 -0.38114093  0.10667879  0.65532219 -0.21007625 -0.78221491
 -0.66498803 -1.05797972 -0.34477492  0.27083455 -1.67099477 -1.2069834
 -0.09653764  0.56052947  0.52869801 -0.97780121 -0.89505033  0.29671078
  0.18933224  0.08731309 -1.73749073 -0.43458306  0.2365648  -0.38475485
  1.17232404 -0.50936745  0.63927571  0.24076178  0.33409102 -0.34851916
 -0.01771847 -0.41750373  0.39829234  0.13313101 -0.96883869  1.16127022
 -0.54509353 -0.59093433 -0.5337134  -1.12136284  0.5

Unnamed: 0,0
0,0.439298
1,1.169092
2,-0.082727
3,-0.882095
4,-0.330278
...,...
95,1.346359
96,1.000135
97,0.136554
98,0.756781


# Q7. Write a Python function to calculate the euclidean distance between two points.

In [57]:
import numpy as np

def euclidean_dist(point1, point2):
    dist =  np.sqrt((point1[0] - point2[0])**2+ (point1[1] - point2[1])**2)
    
    return dist

euclidean_dist([12,5], [5, 6])

7.0710678118654755

# Q8. Write a Python function to calculate the manhattan distance between two points.

In [56]:
def manhattan_dist(point1, point2):
    dist = abs(point1[0] - point2[0]) + abs(point1[1] - point2[1])
    
    return dist

manhattan_dist([12,5], [5, 6])

8