# Comparision of classifiers

In [3]:
import sklearn

from sklearn.datasets import load_iris, load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [4]:
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC

In [6]:
from catboost import CatBoostClassifier

## Load dataset

In [7]:
# load sample
dataset = load_breast_cancer()

data = dataset['data']
target = dataset['target']

# train_test_split
x_train, x_valid, y_train, y_valid = train_test_split(data, target, test_size=0.2, shuffle=True, stratify=target, random_state=1234)

## Compared models/algorithms

In [9]:
dict_classifiers = {}
dict_classifiers['5NN'] = KNeighborsClassifier(n_neighbors=5)
dict_classifiers['MLP'] = MLPClassifier(hidden_layer_sizes=(32,32))
dict_classifiers['SVM'] = SVC()
dict_classifiers['Decision tree'] = DecisionTreeClassifier()
dict_classifiers['Random forest'] = RandomForestClassifier(max_depth=2,)
dict_classifiers['Gradient boosting'] = GradientBoostingClassifier()
dict_classifiers['CatBoost'] = CatBoostClassifier(verbose=False)

## Train/Test

In [10]:
for name, clf in dict_classifiers.items():
    print("="*20)
    print(name)
    
    clf.fit(x_train, y_train)
    y_hat_valid = clf.predict(x_valid)
    accuracy = accuracy_score(y_valid, y_hat_valid)
    print(f"Accuracy: {accuracy:.4f}")

5NN
Accuracy: 0.9211
MLP




Accuracy: 0.9211
SVM
Accuracy: 0.9123
Decision tree
Accuracy: 0.9561
Random forest
Accuracy: 0.9561
Gradient boosting
Accuracy: 0.9737
CatBoost
Accuracy: 0.9561
