In [42]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import log_loss,accuracy_score
from sklearn.preprocessing import LabelEncoder,StandardScaler,MinMaxScaler
from sklearn.model_selection import train_test_split, GridSearchCV,StratifiedKFold
from sklearn.pipeline import Pipeline

In [2]:
image=pd.read_csv("Image_Segmention.csv")
image.head()

Unnamed: 0,Class,region.centroid.col,region.centroid.row,region.pixel.count,short.line.density.5,short.line.density.2,vedge.mean,vegde.sd,hedge.mean,hedge.sd,intensity.mean,rawred.mean,rawblue.mean,rawgreen.mean,exred.mean,exblue.mean,exgreen.mean,value.mean,saturation.mean,hue-mean
0,BRICKFACE,188,133,9,0.0,0.0,0.333333,0.266667,0.5,0.077778,6.666666,8.333334,7.777778,3.888889,5.0,3.333333,-8.333333,8.444445,0.53858,-0.924817
1,BRICKFACE,105,139,9,0.0,0.0,0.277778,0.107407,0.833333,0.522222,6.111111,7.555555,7.222222,3.555556,4.333334,3.333333,-7.666666,7.555555,0.532628,-0.965946
2,BRICKFACE,34,137,9,0.0,0.0,0.5,0.166667,1.111111,0.474074,5.851852,7.777778,6.444445,3.333333,5.777778,1.777778,-7.555555,7.777778,0.573633,-0.744272
3,BRICKFACE,39,111,9,0.0,0.0,0.722222,0.374074,0.888889,0.429629,6.037037,7.0,7.666666,3.444444,2.888889,4.888889,-7.777778,7.888889,0.562919,-1.175773
4,BRICKFACE,16,128,9,0.0,0.0,0.5,0.077778,0.666667,0.311111,5.555555,6.888889,6.666666,3.111111,4.0,3.333333,-7.333334,7.111111,0.561508,-0.985811


In [3]:
lbl=LabelEncoder()
image['Class']=lbl.fit_transform(image['Class'])
X = image.drop("Class",axis =1)
y = image['Class']

In [5]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=23,stratify=y)

In [8]:
knn=KNeighborsClassifier(n_neighbors=1)
scaler  = StandardScaler()
pipe = Pipeline([('SCL',scaler),('knn',knn)])
pipe.fit(X_train,y_train)

In [11]:
y_pred = pipe.predict(X_test)
print(accuracy_score(y_test,y_pred))

0.8253968253968254


In [10]:
y_pred_proba=pipe.predict_proba(X_test)
log_loss(y_test,y_pred_proba)

6.29333630603633

### GCV

In [35]:
kfold=StratifiedKFold(n_splits=5,shuffle=True,random_state=23)
scaler=StandardScaler()
knn=KNeighborsClassifier()
pipe=Pipeline([('SCL',scaler),('KNN',knn)])

In [36]:
pipe.get_params()

{'memory': None,
 'steps': [('SCL', StandardScaler()), ('KNN', KNeighborsClassifier())],
 'verbose': False,
 'SCL': StandardScaler(),
 'KNN': KNeighborsClassifier(),
 'SCL__copy': True,
 'SCL__with_mean': True,
 'SCL__with_std': True,
 'KNN__algorithm': 'auto',
 'KNN__leaf_size': 30,
 'KNN__metric': 'minkowski',
 'KNN__metric_params': None,
 'KNN__n_jobs': None,
 'KNN__n_neighbors': 5,
 'KNN__p': 2,
 'KNN__weights': 'uniform'}

In [39]:
params={'KNN__n_neighbors':np.arange(1,11)}

In [40]:
gcv=GridSearchCV(pipe,param_grid=params,cv=kfold,scoring='neg_log_loss')
gcv.fit(X,y)

In [41]:
print("best parameter :",gcv.best_params_)
print("best score :",gcv.best_score_)

best parameter : {'KNN__n_neighbors': 10}
best score : -0.8809357429167448


### Using MinMaxScaler

In [43]:
kfold=StratifiedKFold(n_splits=5,shuffle=True,random_state=23)
scaler=MinMaxScaler()
knn=KNeighborsClassifier()
pipe=Pipeline([('SCL',scaler),('KNN',knn)])

In [44]:
params={'KNN__n_neighbors':np.arange(1,11)}

In [45]:
gcv=GridSearchCV(pipe,param_grid=params,cv=kfold,scoring='neg_log_loss')
gcv.fit(X,y)

In [46]:
print("best parameter :",gcv.best_params_)
print("best score :",gcv.best_score_)

best parameter : {'KNN__n_neighbors': 10}
best score : -1.0377096814415157
