<h1>Chap11 - Model Evaluation</h1>

Imports

In [22]:
from sklearn import datasets
from sklearn import metrics
from sklearn.model_selection import KFold, cross_val_score, train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_boston, load_iris, make_classification
from sklearn.dummy import DummyRegressor, DummyClassifier
from sklearn.ensemble import RandomForestClassifier

## 11.1 Cross-Validating Models 

In [2]:
digits = datasets.load_digits()
features = digits.data
target = digits.target
standardizer = StandardScaler()
logit = LogisticRegression()
pipeline = make_pipeline(standardizer, logit)
kf = KFold(n_splits=10, shuffle=True, random_state=1)
cv_results = cross_val_score(pipeline,
                             features,
                             target, 
                             cv=kf,
                             scoring="accuracy",
                             n_jobs=-1)

In [3]:
cv_results

array([0.97777778, 0.98888889, 0.96111111, 0.94444444, 0.97777778,
       0.98333333, 0.95555556, 0.98882682, 0.97765363, 0.93854749])

In [5]:
features_train, features_test, target_train, target_test = train_test_split(features, target, test_size = 0.1, random_state=1)

In [6]:
standardizer.fit(features_train)

features_train_std = standardizer.transform(features_train)
features_test_std = standardizer.transform(features_test)

In [7]:
pipeline = make_pipeline(standardizer, logit)
cv_results = cross_val_score(pipeline,
                             features, 
                             target, 
                             cv=kf,
                             scoring="accuracy",
                             n_jobs=-1)

In [8]:
cv_results

array([0.97777778, 0.98888889, 0.96111111, 0.94444444, 0.97777778,
       0.98333333, 0.95555556, 0.98882682, 0.97765363, 0.93854749])

## 11.2 Creating a Baseline Regression Model 

In [10]:
boston = load_boston()

In [12]:
features, target = boston.data, boston.target
features_train, features_test, target_train, target_test = train_test_split(features, target, random_state=0)
dummy = DummyRegressor(strategy="mean")
dummy.fit(features_train, target_train)
dummy.score(features_test, target_test)

-0.001119359203955339

In [14]:
ols = LinearRegression()
ols.fit(features_train, target_train)
ols.score(features_test, target_test)

0.6354638433202116

In [15]:
clf = DummyRegressor(strategy="constant", constant=20)
clf.fit(features_train, target_train)
clf.score(features_test, target_test)

-0.06510502029325727

## 11.3 Creating a Baseline Classification Model 

In [19]:
iris  = load_iris()
features, target = iris.data, iris.target
features_train, features_test, target_train, target_test = train_test_split(features, target, random_state=0)
dummy = DummyClassifier(strategy="uniform", random_state=1)
dummy.fit(features_train, target_train)
dummy.score(features_test, target_test)

0.42105263157894735

In [21]:
classifier = RandomForestClassifier()
classifier.fit(features_train, target_train)
classifier.score(features_test, target_test)

0.9736842105263158

## 11.4 Evaluating Binary Classifier Predictions 

In [23]:
X, y = make_classification(n_samples = 10000,
                           n_features = 3,
                           n_informative = 3,
                           n_redundant = 0,
                           n_classes = 2,
                           random_state = 1)

logit = LogisticRegression()
cross_val_score(logit, X, y, scoring="accuracy")

array([0.9555, 0.95  , 0.9585, 0.9555, 0.956 ])

In [24]:
cross_val_score(logit, X, y, scoring="precision")

array([0.95963673, 0.94820717, 0.9635996 , 0.96149949, 0.96060606])

In [25]:
cross_val_score(logit, X, y, scoring="recall")

array([0.951, 0.952, 0.953, 0.949, 0.951])

In [27]:
cross_val_score(logit, X, y, scoring="f1")

array([0.95529884, 0.9500998 , 0.95827049, 0.95520886, 0.95577889])