In [12]:
import numpy as np
import pandas as pd
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import GridSearchCV,StratifiedKFold,KFold
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis,QuadraticDiscriminantAnalysis
import warnings as ws
ws.filterwarnings("ignore")

In [2]:
vehicle = pd.read_csv("Vehicle.csv")
vehicle.head()

Unnamed: 0,Comp,Circ,D.Circ,Rad.Ra,Pr.Axis.Ra,Max.L.Ra,Scat.Ra,Elong,Pr.Axis.Rect,Max.L.Rect,Sc.Var.Maxis,Sc.Var.maxis,Ra.Gyr,Skew.Maxis,Skew.maxis,Kurt.maxis,Kurt.Maxis,Holl.Ra,Class
0,95,48,83,178,72,10,162,42,20,159,176,379,184,70,6,16,187,197,van
1,91,41,84,141,57,9,149,45,19,143,170,330,158,72,9,14,189,199,van
2,104,50,106,209,66,10,207,32,23,158,223,635,220,73,14,9,188,196,saab
3,93,41,82,159,63,9,144,46,19,143,160,309,127,63,6,10,199,207,van
4,85,44,70,205,103,52,149,45,19,144,241,325,188,127,9,11,180,183,bus


In [3]:
lbl = LabelEncoder()
vehicle['Class'] = lbl.fit_transform(vehicle['Class'])
vehicle.head()

Unnamed: 0,Comp,Circ,D.Circ,Rad.Ra,Pr.Axis.Ra,Max.L.Ra,Scat.Ra,Elong,Pr.Axis.Rect,Max.L.Rect,Sc.Var.Maxis,Sc.Var.maxis,Ra.Gyr,Skew.Maxis,Skew.maxis,Kurt.maxis,Kurt.Maxis,Holl.Ra,Class
0,95,48,83,178,72,10,162,42,20,159,176,379,184,70,6,16,187,197,3
1,91,41,84,141,57,9,149,45,19,143,170,330,158,72,9,14,189,199,3
2,104,50,106,209,66,10,207,32,23,158,223,635,220,73,14,9,188,196,2
3,93,41,82,159,63,9,144,46,19,143,160,309,127,63,6,10,199,207,3
4,85,44,70,205,103,52,149,45,19,144,241,325,188,127,9,11,180,183,0


In [5]:
X = vehicle.drop('Class',axis = 1)
y = vehicle['Class']

### LOGISTIC REGRESSION

In [4]:
lr=LogisticRegression()
kfold=StratifiedKFold(n_splits=5,shuffle=True,random_state=23)
params={'penalty':['l1','l2','elasticnet',None],
        'solver':['lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'],
       'multi_class':['OVR','multinomial']}

In [6]:
gcv=GridSearchCV(lr,param_grid=params,cv=kfold,scoring='neg_log_loss')
gcv.fit(X,y)

In [7]:
print("best parameter :",gcv.best_params_)
print("best score :",gcv.best_score_)

best parameter : {'multi_class': 'multinomial', 'penalty': 'l2', 'solver': 'newton-cg'}
best score : -0.43019987069217275


### Gaussian NB

In [8]:
nb=GaussianNB()
params={'var_smoothing':np.linspace(0,1,20)}

In [9]:
gcv=GridSearchCV(nb,param_grid=params,cv=kfold,scoring='neg_log_loss',n_jobs=2,verbose=3)
gcv.fit(X,y)

Fitting 5 folds for each of 20 candidates, totalling 100 fits






In [10]:
print("best parameter :",gcv.best_params_)
print("best score :",gcv.best_score_)

best parameter : {'var_smoothing': 0.3684210526315789}
best score : -1.2441401372989114


### LDA

In [13]:
lda = LinearDiscriminantAnalysis()
kfold=StratifiedKFold(n_splits=5,random_state=23,shuffle=True)
params={'solver':['svd','Lsqr','eigen']}
gcv=GridSearchCV(lda,param_grid=params,cv=kfold,scoring='neg_log_loss')
gcv.fit(X,y)

In [14]:
print("best parameter :",gcv.best_params_)
print("best score :",gcv.best_score_)

best parameter : {'solver': 'svd'}
best score : -0.47946220433359044


### QDA 

In [15]:
qda = QuadraticDiscriminantAnalysis()
params={}
gcv=GridSearchCV(qda,param_grid=params,cv=kfold,scoring='neg_log_loss')
gcv.fit(X,y)

In [16]:
print("best parameter :",gcv.best_params_)
print("best score :",gcv.best_score_)

best parameter : {}
best score : -0.4284929344345912
[CV 1/5] END ................var_smoothing=0.0;, score=-2.743 total time=   0.0s
[CV 3/5] END ................var_smoothing=0.0;, score=-1.978 total time=   0.0s
[CV 4/5] END ................var_smoothing=0.0;, score=-2.809 total time=   0.0s
[CV 5/5] END ................var_smoothing=0.0;, score=-2.587 total time=   0.0s
[CV 2/5] END var_smoothing=0.05263157894736842;, score=-1.465 total time=   0.0s
[CV 4/5] END var_smoothing=0.05263157894736842;, score=-1.475 total time=   0.0s
[CV 1/5] END var_smoothing=0.10526315789473684;, score=-1.310 total time=   0.0s
[CV 3/5] END var_smoothing=0.10526315789473684;, score=-1.243 total time=   0.0s
[CV 5/5] END var_smoothing=0.10526315789473684;, score=-1.306 total time=   0.0s
[CV 2/5] END var_smoothing=0.15789473684210525;, score=-1.277 total time=   0.0s
[CV 3/5] END var_smoothing=0.15789473684210525;, score=-1.221 total time=   0.0s
[CV 1/5] END var_smoothing=0.21052631578947367;, score=-

### As the best score is in QDA model it is the best model