## Which is the best model: LR,KNN,GaussianNB?

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score,log_loss
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import GridSearchCV,StratifiedKFold,KFold
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder
import warnings as ws
ws.filterwarnings("ignore")

In [2]:
img=pd.read_csv("Image_Segmention.csv")

In [3]:
img.head()

Unnamed: 0,Class,region.centroid.col,region.centroid.row,region.pixel.count,short.line.density.5,short.line.density.2,vedge.mean,vegde.sd,hedge.mean,hedge.sd,intensity.mean,rawred.mean,rawblue.mean,rawgreen.mean,exred.mean,exblue.mean,exgreen.mean,value.mean,saturation.mean,hue-mean
0,BRICKFACE,188,133,9,0.0,0.0,0.333333,0.266667,0.5,0.077778,6.666666,8.333334,7.777778,3.888889,5.0,3.333333,-8.333333,8.444445,0.53858,-0.924817
1,BRICKFACE,105,139,9,0.0,0.0,0.277778,0.107407,0.833333,0.522222,6.111111,7.555555,7.222222,3.555556,4.333334,3.333333,-7.666666,7.555555,0.532628,-0.965946
2,BRICKFACE,34,137,9,0.0,0.0,0.5,0.166667,1.111111,0.474074,5.851852,7.777778,6.444445,3.333333,5.777778,1.777778,-7.555555,7.777778,0.573633,-0.744272
3,BRICKFACE,39,111,9,0.0,0.0,0.722222,0.374074,0.888889,0.429629,6.037037,7.0,7.666666,3.444444,2.888889,4.888889,-7.777778,7.888889,0.562919,-1.175773
4,BRICKFACE,16,128,9,0.0,0.0,0.5,0.077778,0.666667,0.311111,5.555555,6.888889,6.666666,3.111111,4.0,3.333333,-7.333334,7.111111,0.561508,-0.985811


In [4]:
img.Class.unique()

array(['BRICKFACE', 'SKY', 'FOLIAGE', 'CEMENT', 'WINDOW', 'PATH', 'GRASS'],
      dtype=object)

In [5]:
lbl=LabelEncoder()
img['Class']=lbl.fit_transform(img['Class'])

In [6]:
img.head()

Unnamed: 0,Class,region.centroid.col,region.centroid.row,region.pixel.count,short.line.density.5,short.line.density.2,vedge.mean,vegde.sd,hedge.mean,hedge.sd,intensity.mean,rawred.mean,rawblue.mean,rawgreen.mean,exred.mean,exblue.mean,exgreen.mean,value.mean,saturation.mean,hue-mean
0,0,188,133,9,0.0,0.0,0.333333,0.266667,0.5,0.077778,6.666666,8.333334,7.777778,3.888889,5.0,3.333333,-8.333333,8.444445,0.53858,-0.924817
1,0,105,139,9,0.0,0.0,0.277778,0.107407,0.833333,0.522222,6.111111,7.555555,7.222222,3.555556,4.333334,3.333333,-7.666666,7.555555,0.532628,-0.965946
2,0,34,137,9,0.0,0.0,0.5,0.166667,1.111111,0.474074,5.851852,7.777778,6.444445,3.333333,5.777778,1.777778,-7.555555,7.777778,0.573633,-0.744272
3,0,39,111,9,0.0,0.0,0.722222,0.374074,0.888889,0.429629,6.037037,7.0,7.666666,3.444444,2.888889,4.888889,-7.777778,7.888889,0.562919,-1.175773
4,0,16,128,9,0.0,0.0,0.5,0.077778,0.666667,0.311111,5.555555,6.888889,6.666666,3.111111,4.0,3.333333,-7.333334,7.111111,0.561508,-0.985811


In [7]:
X=img.drop("Class",axis=1)
y=img['Class']

## Logistic Regression

In [8]:
lr=LogisticRegression()

In [9]:
kfold=StratifiedKFold(n_splits=5,shuffle=True,random_state=23)

In [10]:
params={'penalty':['l1','l2','elasticnet',None],
        'solver':['lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'],
       'multi_class':['OVR','multinomial']}

In [11]:
gcv=GridSearchCV(lr,param_grid=params,cv=kfold,scoring='neg_log_loss')

In [12]:
gcv.fit(X,y)

In [13]:
print("best parameter :",gcv.best_params_)
print("best score :",gcv.best_score_)

best parameter : {'multi_class': 'multinomial', 'penalty': 'l2', 'solver': 'newton-cg'}
best score : -0.5465525609071172


## Gaussian NB

In [14]:
nb=GaussianNB()

In [15]:
params={'var_smoothing':np.linspace(0,1,20)}

In [16]:
gcv=GridSearchCV(nb,param_grid=params,cv=kfold,scoring='neg_log_loss')
gcv.fit(X,y)

In [17]:
print("best parameter :",gcv.best_params_)
print("best score :",gcv.best_score_)

best parameter : {'var_smoothing': 0.15789473684210525}
best score : -0.9814572848069819


## KNN

In [18]:
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.pipeline import Pipeline

In [19]:
scaler=StandardScaler()
knn=KNeighborsClassifier()
pipe=Pipeline([('SCL',scaler),('KNN',knn)])

In [20]:
params={'KNN__n_neighbors':np.arange(1,11),'SCL':[StandardScaler(),MinMaxScaler()]}

In [21]:
gcv=GridSearchCV(pipe,param_grid=params,cv=kfold,scoring='neg_log_loss')
gcv.fit(X,y)

In [22]:
print("best parameter :",gcv.best_params_)
print("best score :",gcv.best_score_)

best parameter : {'KNN__n_neighbors': 10, 'SCL': StandardScaler()}
best score : -0.8809357429167448


## **Conclusion**

Logistic regression is the best model because log loss is less and nearer to zero among the three