# Model Evaluation

<a href="https://colab.research.google.com/github/thomasjpfan/ml-workshop-intermediate-2-of-2/blob/master/notebooks/01-model-evaluation.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open in Colab" title="Open and Execute in Google Colaboratory"></a>

In [None]:
# Install dependencies for google colab
import sys
IN_COLAB = 'google.colab' in sys.modules
if IN_COLAB:
    %pip install -r https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-2-of-2/master/requirements.txt

In [None]:
import sklearn
assert sklearn.__version__.startswith("1.0"), "Plese install scikit-learn 1.0"

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

plt.rcParams['font.size'] = 20
plt.rcParams['figure.figsize'] = [12, 8]
plt.rcParams['lines.linewidth'] = 2.5
plt.rcParams['savefig.bbox'] = 'tight'

sklearn.set_config(display='diagram')

In [None]:
from sklearn.datasets import fetch_openml

In [None]:
mammography = fetch_openml(data_id=310, as_frame=True)
X, y = mammography.data, mammography.target

In [None]:
X.head()

## Is this data imbalanced?

In [None]:
y.value_counts()

In [None]:
y = (y == '1').astype('int')

In [None]:
y

## Train models for prediction

### Linear model

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, stratify=y, random_state=42)

In [None]:
log_reg = Pipeline([
    ('scaler', StandardScaler()),
    ('log_reg', LogisticRegression(random_state=42))])
log_reg.fit(X_train, y_train)

In [None]:
y_pred = log_reg.predict(X_test)

In [None]:
y_pred

In [None]:
log_reg.score(X_test, y_test)

In [None]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred))

# Exercise 1

1. Fit a `sklearn.ensemble.RandomForestClassifier` model on the same dataset.
2. Use the `score` method to compute the accuracy on the test set.
3. Print the classification report for the random forest with the test set.
4. Compare the classification report of the random forest to logistic regression. Which one has the better overall preformance?

In [None]:
from sklearn.ensemble import RandomForestClassifier

**If you are running locally**, you can uncomment the following cell to load the solution into the cell. On **Google Colab**, [see solution here](https://github.com/thomasjpfan/ml-workshop-intermediate-2-of-2/blob/master/notebooks/solutions/01-ex01-solutions.py).  

In [None]:
# %load solutions/01-ex01-solutions.py

## Thresholds

## Default

In [None]:
y_pred = log_reg.predict(X_test)
print(classification_report(y_test, y_pred))

Using probabilities

In [None]:
y_proba = log_reg.predict_proba(X_test)

In [None]:
y_proba[65:70]

In [None]:
y_pred[65:70]

### Threshold at 0.50

In [None]:
y_pred_50 = y_proba[:, 1] > 0.5
print(classification_report(y_test, y_pred_50))

### Threshold at 0.25

In [None]:
y_pred_25 = y_proba[:, 1] > 0.25
print(classification_report(y_test, y_pred_25))

### Threshold at 0.75

In [None]:
y_pred_75 = y_proba[:, 1] > 0.75
print(classification_report(y_test, y_pred_75))

In [None]:
from sklearn.metrics import PrecisionRecallDisplay
PrecisionRecallDisplay.from_estimator(
    log_reg, X_test, y_test, name="LogisticRegression")

In [None]:
from sklearn.metrics import RocCurveDisplay
RocCurveDisplay.from_estimator(log_reg, X_test, y_test, name="LogisticRegression")

## Use ax to plot both curves next to each other

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 8))
RocCurveDisplay.from_estimator(log_reg, X_test, y_test, name="LogisticRegression", ax=ax1)
PrecisionRecallDisplay.from_estimator(log_reg, X_test, y_test, name="LogisticRegression", ax=ax2)

In [None]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)

In [None]:
fig, ax = plt.subplots(figsize=(12, 8))
PrecisionRecallDisplay.from_estimator(log_reg, X_test, y_test, ax=ax, name="Logistic Regression")
PrecisionRecallDisplay.from_estimator(rf, X_test, y_test, ax=ax, name="Random Forest")

## Exercise 2

1. Plot the roc curve of the logistic regression model and the random forest model on the same axes.
2. Train a `sklearn.dummy.DummyClassifier(strategy='prior')` on the training dataset and plot the precision recall curve and the roc curve with the test dataset.
    - **Hint**: Plot on seperate axes `fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 8))`
3. What is the ROC AUC and the average precision for the dummy classifer?
4. **Extra**: Compute the f1 score for three models we have trained so far with the test set. While model performs the best according to the f1 score?

In [None]:
from sklearn.dummy import DummyClassifier
from sklearn.metrics import f1_score

**If you are running locally**, you can uncomment the following cell to load the solution into the cell. On **Google Colab**, [see solution here](https://github.com/thomasjpfan/ml-workshop-intermediate-2-of-2/blob/master/notebooks/solutions/01-ex02-solutions.py).

In [None]:
# %load solutions/01-ex02-solutions.py

## Decision function

### Comparing decision function vs predictions

In [None]:
log_reg_decision = log_reg.decision_function(X_test)

In [None]:
np.all((log_reg_decision > 0) ==  log_reg.predict(X_test))

In [None]:
log_reg_pred = log_reg.predict_proba(X_test)

In [None]:
log_reg_pred

### Computing the predict_proba from the decision function

In [None]:
1/(1 + np.exp(-log_reg_decision))

In [None]:
log_reg_pred[:, 1]

### Ranking metrics

In [None]:
from sklearn.metrics import average_precision_score

#### Using the decision function to compute the average precision

In [None]:
average_precision_score(y_test, log_reg_decision)

#### Using predict_proba to compute the average precision

In [None]:
average_precision_score(y_test, log_reg_pred[:, 1])

## Exercise 3

1. Compute the `roc_auc_score` for the random forest on the test set. **Hint**: Use `predict_proba`.
2. Train a `sklearn.svm.SVC` model on the training datast and compute the average precision. **Hint**: Use `decision_function`.

In [None]:
from sklearn.metrics import roc_auc_score
from sklearn.metrics import average_precision_score
from sklearn.svm import SVC

**If you are running locally**, you can uncomment the following cell to load the solution into the cell. On **Google Colab**, [see solution here](https://github.com/thomasjpfan/ml-workshop-intermediate-2-of-2/blob/master/notebooks/solutions/01-ex03-solutions.py).

In [None]:
# %load solutions/01-ex03-solutions.py

## Multiclass

In [None]:
from sklearn.datasets import load_digits

digits = load_digits()
X, y = digits.data, digits.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, stratify=y, random_state=42)

In [None]:
rf = RandomForestClassifier(random_state=42).fit(X_train, y_train)

In [None]:
from sklearn.metrics import ConfusionMatrixDisplay

In [None]:
ConfusionMatrixDisplay.from_estimator(rf, X_test, y_test, cmap='gray_r')

In [None]:
y_pred = rf.predict(X_test)

In [None]:
print(classification_report(y_test, y_pred))

In [None]:
from sklearn.metrics import roc_auc_score

In [None]:
rf_y_pred_proba = rf.predict_proba(X_test)

In [None]:
roc_auc_score(y_test, rf_y_pred_proba, multi_class='ovo')

In [None]:
roc_auc_score(y_test, rf_y_pred_proba, multi_class='ovr')

## Regression

In [None]:
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing(as_frame=True)
X, y = housing.data, housing.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=42)

In [None]:
from sklearn.pipeline import make_pipeline

ridge = make_pipeline(StandardScaler(), Ridge())
ridge.fit(X_train, y_train)

In [None]:
ridge.score(X_test, y_test)

In [None]:
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

In [None]:
ridge_pred = ridge.predict(X_test)

### Look at predictions

In [None]:
ridge_pred[:10]

In [None]:
ridge_r2 = r2_score(y_test, ridge_pred)
ridge_r2

In [None]:
ridge_mse = mean_squared_error(y_test, ridge_pred)
ridge_mse

In [None]:
ridge_mae = mean_absolute_error(y_test, ridge_pred)
ridge_mae

## Prediction plots

In [None]:
fig, ax = plt.subplots()
delta = y_test - ridge_pred
ax.plot(ridge_pred, delta, 'o', alpha=0.5)
ax.axhline(y=0, c='k', ls='--')
ax.set(xlabel='predicted', ylabel='y_true - predicited', aspect='equal');

## Prediction plots histogram

In [None]:
fig, ax = plt.subplots()
ax.hist(delta, bins=40)
ax.set(xlabel="y_true - predicted", ylabel="Counts");

## Exercise 4

1. Train a random forest regressor on the training dataset.
2. Evaluate the random forest regressor on the test dataset and compare it to the ridge results with `r2_score`, `mean_sqaured_error`, `mean_absolute_error`.

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

**If you are running locally**, you can uncomment the following cell to load the solution into the cell. On **Google Colab**, [see solution here](https://github.com/thomasjpfan/ml-workshop-intermediate-2-of-2/blob/master/notebooks/solutions/01-ex04-solutions.py).

In [None]:
# %load solutions/01-ex04-solutions.py

## Prediction plots per feature (Extra)

In [None]:
from sklearn.datasets import load_boston
import pandas as pd

In [None]:
X_test.head()

In [None]:
X_analysis = X_test.assign(
    delta=y_test - ridge_pred
)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

columns = X_analysis.columns
n_features = X.shape[1]

fig, axes = plt.subplots(2, 4, figsize=(24, 10), constrained_layout=True)
for i, ax in enumerate(axes.ravel()):
    sns.scatterplot(x=columns[i], y='delta', ax=ax, data=X_analysis)
    ax.axhline(y=0, c='k', ls='--')