<a href="https://colab.research.google.com/github/ppajewski/classification-Models-Performance-Evaluation/blob/master/Classification_Models_Performance_Evaluation_ipnyb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Classification Models Performance Evaluation

## Importing the libraries

In [51]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [52]:
dataset = pd.read_csv('Data.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

## Splitting the dataset into the Training set and Test set

In [53]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)

## Feature Scaling

In [54]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

## Training the Logistic Regression model on the Training set

In [55]:
from sklearn.linear_model import LogisticRegression
classifier_LR = LogisticRegression(random_state = 0)
classifier_LR.fit(X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=0, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

## Training the Decision Tree Classification model on the Training set

In [56]:
from sklearn.tree import DecisionTreeClassifier
classifier_DT = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
classifier_DT.fit(X_train, y_train)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='entropy',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=0, splitter='best')

## Training the K-NN model on the Training set

In [57]:
from sklearn.neighbors import KNeighborsClassifier
classifier_KNN = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
classifier_KNN.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='uniform')

## Training the Kernel SVM model on the Training set

In [58]:
from sklearn.svm import SVC
classifier_KSVM = SVC(kernel = 'rbf', random_state = 0)
classifier_KSVM.fit(X_train, y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=0, shrinking=True, tol=0.001,
    verbose=False)

## Training the Naive Bayes model on the Training set

In [59]:
from sklearn.naive_bayes import GaussianNB
classifier_NB = GaussianNB()
classifier_NB.fit(X_train, y_train)

GaussianNB(priors=None, var_smoothing=1e-09)

## Training the Random Forest Classification model on the Training set

In [60]:
from sklearn.ensemble import RandomForestClassifier
classifier_RF = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
classifier_RF.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='entropy', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10,
                       n_jobs=None, oob_score=False, random_state=0, verbose=0,
                       warm_start=False)

## Training the SVM model on the Training set

In [61]:
from sklearn.svm import SVC
classifier_SVM = SVC(kernel = 'linear', random_state = 0)
classifier_SVM.fit(X_train, y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
    max_iter=-1, probability=False, random_state=0, shrinking=True, tol=0.001,
    verbose=False)

## Training XGBoost on the Training set

In [62]:
from xgboost import XGBClassifier
classifier_XGB = XGBClassifier()
classifier_XGB.fit(X_train, y_train)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0,
              learning_rate=0.1, max_delta_step=0, max_depth=3,
              min_child_weight=1, missing=None, n_estimators=100, n_jobs=1,
              nthread=None, objective='binary:logistic', random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
              silent=None, subsample=1, verbosity=1)

## Making the Confusion Matrix

In [63]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = classifier_LR.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(f"Accuracy score for the Logistic Regression is: {accuracy_score(y_test, y_pred)}")
print("and the confusion matrix: ")
print(cm)
y_pred = classifier_DT.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(f"Accuracy score for the Decision Tree is: {accuracy_score(y_test, y_pred)}")
print("and the confusion matrix: ")
print(cm)
y_pred = classifier_KNN.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(f"Accuracy score for the K-NN is: {accuracy_score(y_test, y_pred)}")
print("and the confusion matrix: ")
print(cm)
y_pred = classifier_KSVM.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(f"Accuracy score for the Kernel SVM is: {accuracy_score(y_test, y_pred)}")
print("and the confusion matrix: ")
print(cm)
y_pred = classifier_NB.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(f"Accuracy score for the Naive Bayes is: {accuracy_score(y_test, y_pred)}")
print("and the confusion matrix: ")
print(cm)
y_pred = classifier_RF.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(f"Accuracy score for the Random Forest is: {accuracy_score(y_test, y_pred)}")
print("and the confusion matrix: ")
print(cm)
y_pred = classifier_SVM.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(f"Accuracy score for the Linear SVM is: {accuracy_score(y_test, y_pred)}")
print("and the confusion matrix: ")
print(cm)
y_pred = classifier_XGB.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(f"Accuracy score for the XGBoost is: {accuracy_score(y_test, y_pred)}")
print("and the confusion matrix: ")
print(cm)

Accuracy score for the Logistic Regression is: 0.9560975609756097
and the confusion matrix: 
[[126   4]
 [  5  70]]
Accuracy score for the Decision Tree is: 0.9463414634146341
and the confusion matrix: 
[[126   4]
 [  7  68]]
Accuracy score for the K-NN is: 0.9512195121951219
and the confusion matrix: 
[[126   4]
 [  6  69]]
Accuracy score for the Kernel SVM is: 0.9560975609756097
and the confusion matrix: 
[[124   6]
 [  3  72]]
Accuracy score for the Naive Bayes is: 0.9463414634146341
and the confusion matrix: 
[[121   9]
 [  2  73]]
Accuracy score for the Random Forest is: 0.9560975609756097
and the confusion matrix: 
[[126   4]
 [  5  70]]
Accuracy score for the Linear SVM is: 0.9609756097560975
and the confusion matrix: 
[[126   4]
 [  4  71]]
Accuracy score for the XGBoost is: 0.9463414634146341
and the confusion matrix: 
[[126   4]
 [  7  68]]
