# Model Evaluation  |  Cross-Validation


# Dataset | Breast Cancer

### Importing Libraries

In [12]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set()

In [13]:
from sklearn.datasets import load_breast_cancer

In [14]:
cancer = load_breast_cancer()

In [15]:
print(cancer.DESCR)

.. _breast_cancer_dataset:

Breast cancer wisconsin (diagnostic) dataset
--------------------------------------------

**Data Set Characteristics:**

    :Number of Instances: 569

    :Number of Attributes: 30 numeric, predictive attributes and the class

    :Attribute Information:
        - radius (mean of distances from center to points on the perimeter)
        - texture (standard deviation of gray-scale values)
        - perimeter
        - area
        - smoothness (local variation in radius lengths)
        - compactness (perimeter^2 / area - 1.0)
        - concavity (severity of concave portions of the contour)
        - concave points (number of concave portions of the contour)
        - symmetry
        - fractal dimension ("coastline approximation" - 1)

        The mean, standard error, and "worst" or largest (mean of the three
        worst/largest values) of these features were computed for each image,
        resulting in 30 features.  For instance, field 0 is Mean Radi

### Selecting Model 

In [16]:
from sklearn.svm import SVC

In [17]:
clf = SVC()

### Cross Validation with cross_val_score (Default:  K-fold)

In [18]:
from sklearn.model_selection import cross_val_score

In [19]:
scores = cross_val_score (clf, cancer.data, cancer.target, cv=5)
scores.round(2)

array([0.85, 0.89, 0.93, 0.95, 0.94])

In [20]:
scores.mean().round(2)

0.91

### Cross Validation with cross_validate

### 1- scoring = 'accuracy'

In [21]:
from sklearn.model_selection import cross_validate

In [22]:
scores = cross_validate (clf, cancer.data, cancer.target, scoring='accuracy', cv=5)
scores

{'fit_time': array([0.01001215, 0.00888515, 0.0102253 , 0.01355267, 0.01139474]),
 'score_time': array([0.00572443, 0.00626802, 0.00363779, 0.0044651 , 0.00622034]),
 'test_score': array([0.85087719, 0.89473684, 0.92982456, 0.94736842, 0.9380531 ])}

In [23]:
pd.DataFrame(scores).round(3)

Unnamed: 0,fit_time,score_time,test_score
0,0.01,0.006,0.851
1,0.009,0.006,0.895
2,0.01,0.004,0.93
3,0.014,0.004,0.947
4,0.011,0.006,0.938


In [24]:
pd.DataFrame(scores)['test_score'].mean()

0.9121720229777983

### 2- scoring = 'average_precision'

In [25]:
from sklearn.model_selection import cross_validate

In [26]:
scores = cross_validate (clf, cancer.data, cancer.target, scoring='average_precision', cv=5)
scores

{'fit_time': array([0.00753784, 0.00203204, 0.00599217, 0.01203394, 0.00311279]),
 'score_time': array([0.01083851, 0.01082015, 0.00558472, 0.        , 0.00813031]),
 'test_score': array([0.97162911, 0.9863203 , 0.98964237, 0.97967836, 0.99093745])}

In [27]:
pd.DataFrame(scores).round(3)

Unnamed: 0,fit_time,score_time,test_score
0,0.008,0.011,0.972
1,0.002,0.011,0.986
2,0.006,0.006,0.99
3,0.012,0.0,0.98
4,0.003,0.008,0.991


In [28]:
pd.DataFrame(scores)['test_score'].mean()

0.9836415169786278