In [8]:
from typing import List
from sklearn.base import BaseEstimator
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from src.data import load_dataset
from sklearn.metrics import accuracy_score, confusion_matrix
import pandas as pd
from pandas.io.formats import style
from tqdm import tqdm

RANDOM_STATE = 42

X, y = load_dataset()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RANDOM_STATE)

models : List[BaseEstimator] = [
    GaussianNB(),
    RandomForestClassifier(random_state=RANDOM_STATE),
    GradientBoostingClassifier(random_state=RANDOM_STATE),
    SVC(random_state=RANDOM_STATE),
    MLPClassifier(random_state=RANDOM_STATE),
    KNeighborsClassifier(),
]

In [35]:
results = []

for model in tqdm(models):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    tpr = tp / (tp + fn)
    tnr = tn / (tn + fp)
    
    results.append({
        'Model': model.__class__.__name__,
        'Accuracy': accuracy,
        'TPR': tpr,
        'TNR': tnr
    })

100%|██████████| 6/6 [00:33<00:00,  5.54s/it]


In [36]:
results_df = pd.DataFrame(results)
results_df

Unnamed: 0,Model,Accuracy,TPR,TNR
0,GaussianNB,0.799017,0.320178,0.951234
1,RandomForestClassifier,0.857823,0.623806,0.932214
2,GradientBoostingClassifier,0.870567,0.621897,0.949616
3,SVC,0.795639,0.155315,0.999191
4,MLPClassifier,0.785506,0.134309,0.992513
5,KNeighborsClassifier,0.77875,0.326544,0.922501


In [37]:
results_df = results_df.set_index('Model').sort_values(by='Accuracy', ascending=False)
results_df.columns.name = results_df.index.name
results_df.index.name = ""
results_df = results_df.map(lambda x: f"{x:.2f}" if isinstance(x, float) else x)
results_df

Model,Accuracy,TPR,TNR
,,,
GradientBoostingClassifier,0.87,0.62,0.95
RandomForestClassifier,0.86,0.62,0.93
GaussianNB,0.8,0.32,0.95
SVC,0.8,0.16,1.0
MLPClassifier,0.79,0.13,0.99
KNeighborsClassifier,0.78,0.33,0.92


In [38]:
style.Styler(results_df).to_latex(
    "../tables/base_results.tex",
    column_format="lccc",
    caption="Wyniki wybranych klasyfikatorów na zbiorze testowym przy zastosowaniu domyślnych parametrów.",
    label="tab:base-results",
    environment="table",
    position="t",
    position_float="centering",
    multicol_align="c",
    hrules=True,
)