In [None]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import LabelEncoder,StandardScaler,MinMaxScaler
from sklearn.model_selection import train_test_split, GridSearchCV,StratifiedKFold,KFold
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder


from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB,BernoulliNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis,QuadraticDiscriminantAnalysis
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier,plot_tree



import warnings
warnings.filterwarnings('ignore')

In [None]:
vehicle = pd.read_csv("Vehicle.csv")
vehicle.head()

Unnamed: 0,Comp,Circ,D.Circ,Rad.Ra,Pr.Axis.Ra,Max.L.Ra,Scat.Ra,Elong,Pr.Axis.Rect,Max.L.Rect,Sc.Var.Maxis,Sc.Var.maxis,Ra.Gyr,Skew.Maxis,Skew.maxis,Kurt.maxis,Kurt.Maxis,Holl.Ra,Class
0,95,48,83,178,72,10,162,42,20,159,176,379,184,70,6,16,187,197,van
1,91,41,84,141,57,9,149,45,19,143,170,330,158,72,9,14,189,199,van
2,104,50,106,209,66,10,207,32,23,158,223,635,220,73,14,9,188,196,saab
3,93,41,82,159,63,9,144,46,19,143,160,309,127,63,6,10,199,207,van
4,85,44,70,205,103,52,149,45,19,144,241,325,188,127,9,11,180,183,bus


In [None]:
lbl = LabelEncoder()
vehicle['Class'] = lbl.fit_transform(vehicle['Class'])
vehicle.head()

Unnamed: 0,Comp,Circ,D.Circ,Rad.Ra,Pr.Axis.Ra,Max.L.Ra,Scat.Ra,Elong,Pr.Axis.Rect,Max.L.Rect,Sc.Var.Maxis,Sc.Var.maxis,Ra.Gyr,Skew.Maxis,Skew.maxis,Kurt.maxis,Kurt.Maxis,Holl.Ra,Class
0,95,48,83,178,72,10,162,42,20,159,176,379,184,70,6,16,187,197,3
1,91,41,84,141,57,9,149,45,19,143,170,330,158,72,9,14,189,199,3
2,104,50,106,209,66,10,207,32,23,158,223,635,220,73,14,9,188,196,2
3,93,41,82,159,63,9,144,46,19,143,160,309,127,63,6,10,199,207,3
4,85,44,70,205,103,52,149,45,19,144,241,325,188,127,9,11,180,183,0


In [None]:
X = vehicle.drop('Class',axis = 1)
y = vehicle['Class']

### 1. For Logistic Regression

In [None]:
lr=LogisticRegression()
kfold=StratifiedKFold(n_splits=5,shuffle=True,random_state=23)
params={'penalty':['l1','l2','elasticnet',None],
        'solver':['lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'],
       'multi_class':['OVR','multinomial']}

In [None]:
gcv=GridSearchCV(lr,param_grid=params,cv=kfold,scoring='neg_log_loss')
gcv.fit(X,y)

In [None]:
print("best parameter :",gcv.best_params_)
print("best score :",gcv.best_score_)

best parameter : {'multi_class': 'multinomial', 'penalty': 'l2', 'solver': 'newton-cg'}
best score : -0.4306283465495343


### 2. Gaussian Naive Bayes

In [None]:
nb=GaussianNB()
params={'var_smoothing':np.linspace(0,1,20)}

In [None]:
gcv=GridSearchCV(nb,param_grid=params,cv=kfold,scoring='neg_log_loss')
gcv.fit(X,y)

In [None]:
print("best parameter :",gcv.best_params_)
print("best score :",gcv.best_score_)

best parameter : {'var_smoothing': 0.3684210526315789}
best score : -1.2441401372989112


### 3. Bernoulli Naive Bayes

In [None]:
nb=BernoulliNB()

In [None]:
params={'alpha':np.linspace(0,5,20)}

In [None]:
gcv=GridSearchCV(nb,param_grid=params,cv=kfold,scoring='neg_log_loss')
gcv.fit(X,y)

In [None]:
print("best parameter :",gcv.best_params_)
print("best score :",gcv.best_score_)

best parameter : {'alpha': 5.0}
best score : -1.3886251445059252


### 4. Quadratic Discriminant Analysis (QDA)

In [None]:
qda = QuadraticDiscriminantAnalysis()
params={}
gcv=GridSearchCV(qda,param_grid=params,cv=kfold,scoring='neg_log_loss')
gcv.fit(X,y)

In [None]:
print("best parameter :",gcv.best_params_)
print("best score :",gcv.best_score_)

best parameter : {}
best score : -0.42849293443459463


### 5. Linear Discriminant Analysis (LDA)

In [None]:
lda = LinearDiscriminantAnalysis()
kfold=StratifiedKFold(n_splits=5,random_state=23,shuffle=True)
params={'solver':['svd','Lsqr','eigen']}
gcv=GridSearchCV(lda,param_grid=params,cv=kfold,scoring='neg_log_loss')
gcv.fit(X,y)

In [None]:
print("best parameter :",gcv.best_params_)
print("best score :",gcv.best_score_)

best parameter : {'solver': 'svd'}
best score : -0.47946220433359077


### 6. k-Nearest Neighbors (KNN)

In [None]:
scaler=StandardScaler()
knn=KNeighborsClassifier()
pipe=Pipeline([('SCL',scaler),('KNN',knn)])

In [None]:
params={'KNN__n_neighbors':np.arange(1,11)}

In [None]:
gcv=GridSearchCV(pipe,param_grid=params,cv=kfold,scoring='neg_log_loss')
gcv.fit(X,y)

In [None]:
print("best parameter :",gcv.best_params_)
print("best score :",gcv.best_score_)

best parameter : {'KNN__n_neighbors': 10}
best score : -1.0711613659010997


### 7. Support Vector Machine

In [None]:
svm=SVC(kernel='linear',probability=True,random_state=23 )

In [None]:
scaler=StandardScaler()
pipe=Pipeline([('SCL',scaler),('SVM',svm)])
print(pipe.get_params())

{'memory': None, 'steps': [('SCL', StandardScaler()), ('SVM', SVC(kernel='linear', probability=True, random_state=23))], 'verbose': False, 'SCL': StandardScaler(), 'SVM': SVC(kernel='linear', probability=True, random_state=23), 'SCL__copy': True, 'SCL__with_mean': True, 'SCL__with_std': True, 'SVM__C': 1.0, 'SVM__break_ties': False, 'SVM__cache_size': 200, 'SVM__class_weight': None, 'SVM__coef0': 0.0, 'SVM__decision_function_shape': 'ovr', 'SVM__degree': 3, 'SVM__gamma': 'scale', 'SVM__kernel': 'linear', 'SVM__max_iter': -1, 'SVM__probability': True, 'SVM__random_state': 23, 'SVM__shrinking': True, 'SVM__tol': 0.001, 'SVM__verbose': False}


In [None]:
kfold=StratifiedKFold(n_splits=5,shuffle=True,random_state=23)
params={'SVM__C':np.linspace(0.001,5,10),
        'SVM__decision_function_shape':['ovo','ovr'],
        'SVM__kernel':['linear', 'poly', 'rbf', 'sigmoid', 'precomputed']
        }
gcv= GridSearchCV(pipe,param_grid=params,cv=kfold,scoring='neg_log_loss',verbose=2)
gcv.fit(X,y)

Fitting 5 folds for each of 100 candidates, totalling 500 fits
[CV] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__kernel=linear; total time=   0.1s
[CV] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__kernel=linear; total time=   0.1s
[CV] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__kernel=linear; total time=   0.1s
[CV] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__kernel=linear; total time=   0.1s
[CV] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__kernel=linear; total time=   0.1s
[CV] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__kernel=poly; total time=   0.1s
[CV] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__kernel=poly; total time=   0.1s
[CV] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__kernel=poly; total time=   0.1s
[CV] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__kernel=poly; total time=   0.1s
[CV] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__kernel=poly; total 

In [None]:
print("best parameter :",gcv.best_params_)
print("best score :",gcv.best_score_)

best parameter : {'SVM__C': 5.0, 'SVM__decision_function_shape': 'ovo', 'SVM__kernel': 'rbf'}
best score : -0.3916952533396386


### 8. Decision Tree

In [None]:
dtc = DecisionTreeClassifier(random_state=23)

In [None]:
params={'max_depth':[2,3,4,5,6,None]}
gcv=GridSearchCV(dtc,param_grid=params,cv=kfold,scoring='neg_log_loss')
gcv.fit(X,y)

In [None]:
print("best parameter :",gcv.best_params_)
print("best score :",gcv.best_score_)

best parameter : {'max_depth': 2}
best score : -0.9675908446430928


In [None]:
svm=SVC(C=5.0,decision_function_shape= 'ovo',kernel='rbf', random_state=23 )
scaler=StandardScaler()
pipe=Pipeline([('SCL',scaler),('SVM',svm)])

In [None]:
X_train,X_test,y_train,y_test  =train_test_split(X,y,random_state=23,stratify=y)

In [None]:
model = pipe.fit(X_train,y_train)

In [None]:
y_pred = model.predict(X_test)

In [None]:
y_pred

array([2, 0, 3, 0, 1, 2, 0, 1, 1, 2, 3, 0, 3, 0, 3, 1, 3, 1, 3, 2, 2, 0,
       0, 2, 2, 3, 3, 1, 2, 0, 3, 0, 1, 1, 3, 1, 1, 2, 3, 3, 2, 0, 2, 3,
       1, 0, 3, 1, 1, 0, 2, 0, 0, 1, 3, 3, 0, 0, 2, 3, 0, 1, 2, 2, 2, 2,
       3, 0, 1, 0, 2, 0, 1, 3, 2, 2, 1, 3, 2, 0, 3, 2, 3, 2, 0, 3, 0, 2,
       0, 2, 2, 1, 0, 0, 3, 1, 3, 3, 3, 3, 2, 0, 1, 0, 0, 0, 1, 3, 3, 0,
       1, 3, 2, 0, 3, 0, 2, 3, 2, 3, 2, 1, 0, 0, 3, 0, 2, 2, 2, 1, 2, 1,
       1, 2, 2, 2, 1, 0, 0, 2, 0, 2, 2, 1, 2, 0, 1, 2, 2, 0, 1, 2, 1, 0,
       2, 3, 1, 1, 1, 3, 3, 3, 2, 2, 3, 3, 2, 1, 3, 2, 0, 0, 2, 0, 1, 3,
       0, 3, 1, 2, 1, 0, 2, 2, 0, 3, 1, 0, 1, 3, 0, 2, 3, 1, 2, 2, 2, 1,
       0, 2, 0, 3, 0, 3, 0, 2, 3, 0, 1, 1, 0, 2])

In [None]:
X_test['Predicted'] = y_pred

In [None]:
testing = pd.DataFrame({'Predicted': y_pred, 'Actual': y_test})

Unnamed: 0,Predicted,Actual
721,2,2
717,0,0
774,3,3
758,0,0
404,1,1
...,...,...
464,0,0
421,1,1
501,1,1
738,0,0
