# Entry 23 notebook - Scoring Classification Models - Theory

In [1]:
import numpy as np
import pandas as pd

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_validate
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
import category_encoders as ce

from sklearn.pipeline import make_pipeline

In [2]:
cancer = load_breast_cancer()
df = pd.DataFrame(cancer.data, columns=cancer.feature_names)
target = cancer.target

## Standard pipeline with make_pipeline

In [3]:
X_train, X_test, y_train, y_test = train_test_split(df, target, train_size=0.8, random_state=12)
pipe = make_pipeline(SimpleImputer(strategy='median'), StandardScaler(), LogisticRegression())
pipe.fit(X_train, y_train)
pipe.score(X_test, y_test)

0.9736842105263158

### Using the `scoring` parameter

In [28]:
cross_validate(pipe, X_train, y_train, cv=10, scoring=['precision', 'average_precision', 'recall',
                                                       'f1', 'neg_log_loss', 'neg_brier_score'])

{'fit_time': array([0.01355386, 0.01152802, 0.01034188, 0.01049304, 0.0082829 ,
        0.00822568, 0.00793123, 0.00792408, 0.00828123, 0.0075841 ]),
 'score_time': array([0.00488806, 0.00726295, 0.00550914, 0.00441504, 0.00532603,
        0.00426435, 0.00426698, 0.00411487, 0.00430775, 0.00481176]),
 'test_precision': array([0.96774194, 0.93548387, 1.        , 0.96666667, 0.93548387,
        1.        , 1.        , 1.        , 1.        , 0.96666667]),
 'test_average_precision': array([0.99423915, 0.99766151, 1.        , 0.99885057, 0.99164153,
        1.        , 1.        , 1.        , 0.99885057, 0.99515285]),
 'test_recall': array([1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 0.96551724, 1.        , 0.96551724, 1.        ]),
 'test_f1': array([0.98360656, 0.96666667, 1.        , 0.98305085, 0.96666667,
        1.        , 0.98245614, 1.        , 0.98245614, 0.98305085]),
 'test_neg_log_loss': array([-0.10971434, -0.1140552 , -0.01653453, -0.06429

Those results look suspiciously good. And the high scores are pretty consistant across all 10 cross-validation splits.

In [6]:
np.unique(target, return_counts=True)

(array([0, 1]), array([212, 357]))

However, looking at the class distribution shows a pretty even distribution, roughly a 40/60 split. Nothing like a 90/10 split, which would give 90% accuracy just by always guessing the majority class.

Time to try the dummy model method explained in *[Machine Learning with Python Cookbook](https://www.amazon.com/Machine-Learning-Python-Cookbook-Preprocessing/dp/1491989386)*.

### Baseline dummy models

In [26]:
from sklearn.dummy import DummyClassifier

def make_dummy(X_train, y_train, X_test, y_test, strategy):
    d_pipe = make_pipeline(SimpleImputer(strategy='median'), StandardScaler(), DummyClassifier(strategy=strategy, random_state=12))
    d_pipe.fit(X_train, y_train)
    print(strategy, 'score:', d_pipe.score(X_test, y_test), '\n')

In [27]:
for strategy in ['stratified', 'most_frequent', 'prior', 'uniform']:
    make_dummy(X_train, y_train, X_test, y_test, strategy)

stratified score: 0.5350877192982456 

most_frequent score: 0.5789473684210527 

prior score: 0.5789473684210527 

uniform score: 0.45614035087719296 



### `make_scorer` functionality

In [None]:
>>> from sklearn.metrics import fbeta_score, make_scorer
>>> ftwo_scorer = make_scorer(fbeta_score, beta=2)
>>> from sklearn.model_selection import GridSearchCV
>>> from sklearn.svm import LinearSVC
>>> grid = GridSearchCV(LinearSVC(), param_grid={'C': [1, 10]},
...                     scoring=ftwo_scorer, cv=5)

In [29]:
from sklearn.metrics import make_scorer, balanced_accuracy_score, cohen_kappa_score, matthews_corrcoef, fbeta_score

In [30]:
informedness = make_scorer(balanced_accuracy_score, adjusted=True)
kappa = make_scorer(cohen_kappa_score)
mcc = make_scorer(matthews_corrcoef)
fbet = make_scorer(fbeta_score, beta=0.5)

In [35]:
cross_validate(pipe, X_train, y_train, cv=10, scoring={'informedness':informedness, 'cohen_kappa':kappa,
                                                       'matthews_corr':mcc, 'f_beta':fbet})

{'fit_time': array([0.01373601, 0.01640511, 0.01178503, 0.01210093, 0.00984502,
        0.01096487, 0.0100081 , 0.01053119, 0.00862694, 0.00976515]),
 'score_time': array([0.00388598, 0.00384498, 0.00424814, 0.00297809, 0.00253677,
        0.00244927, 0.00248384, 0.00387788, 0.0025599 , 0.00285888]),
 'test_informedness': array([0.9375    , 0.88235294, 1.        , 0.94117647, 0.88235294,
        1.        , 0.96551724, 1.        , 0.96551724, 0.9375    ]),
 'test_cohen_kappa': array([0.95137421, 0.9043659 , 1.        , 0.95277207, 0.9043659 ,
        1.        , 0.95217853, 1.        , 0.95217853, 0.95081967]),
 'test_matthews_corr': array([0.95250095, 0.9085301 , 1.        , 0.95383642, 0.9085301 ,
        1.        , 0.95326917, 1.        , 0.95326917, 0.95197164]),
 'test_f_beta': array([0.97402597, 0.94771242, 1.        , 0.97315436, 0.94771242,
        1.        , 0.9929078 , 1.        , 0.9929078 , 0.97315436])}