In [None]:
# RUN THIS COMMAND ONLY IF YOU USE GOOGLE COLAB.
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# RUN THIS COMMAND ONLY IF YOU USE GOOGLE COLAB.
%cd drive/MyDrive/TechLabs/04_Machine\ Learning

In [None]:
# IMPORT THESE FIRST.
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import cm
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn import datasets
from sklearn.datasets import load_digits
from sklearn.svm import SVC
from sklearn.dummy import DummyClassifier, DummyRegressor
from sklearn.metrics import (confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, 
                             classification_report, precision_recall_curve, roc_curve, auc, mean_squared_error,
                             r2_score, roc_auc_score)
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.tree import DecisionTreeClassifier
from data.adspy_shared_utilities import (plot_class_regions_for_classifier_subplot, 
                                         plot_class_regions_for_classifier)

# Chapter 03 - Evaluation
### Hey Techie,   
Welcome to the third notebook of this Machine Learning tutorial series. Today's videos introduce you to the concepts of model evaluation for both classification and regression.   
This notebook is designed to allow you to take notes as you watch each video and learn along the code discussed in the videos. At the end of the notebook, you will find practice tasks that you can solve on your own and compare to our sample solution.   
After auditing the course, you may find the respective materials here: https://www.coursera.org/learn/python-machine-learning/home/week/3
#### Have fun! :-)   
*Video length in total*: 80 minutes   
*Self-study time*: 80 minutes   
*Total*: **160 minutes**   
#### Credits
Applied Machine Learning in Python, University of Michigan (Coursera), https://www.coursera.org/learn/python-machine-learning?specialization=data-science-python
<hr style="border:2px solid gray"> </hr>   
   
## Notes

* Take notes here.
    * And here.
* ...
* ...

<hr style="border:2px solid gray"> </hr>   
   
## Applied Machine Learning, Module 3:  Evaluation

## Evaluation for Classification

### Preamble

In [None]:
dataset = load_digits()
X, y = dataset.data, dataset.target

for class_name, class_count in zip(dataset.target_names, np.bincount(dataset.target)):
    print(class_name,class_count)

In [None]:
# Creating a dataset with imbalanced binary classes:  
# Negative class (0) is 'not digit 1' 
# Positive class (1) is 'digit 1'
y_binary_imbalanced = y.copy()
y_binary_imbalanced[y_binary_imbalanced != 1] = 0

print('Original labels:\t', y[1:30])
print('New binary labels:\t', y_binary_imbalanced[1:30])

In [None]:
np.bincount(y_binary_imbalanced)    # Negative class (0) is the most frequent class

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y_binary_imbalanced, random_state=0)

# Accuracy of Support Vector Machine classifier
svm = SVC(kernel='rbf', C=1).fit(X_train, y_train)
svm.score(X_test, y_test)

### Dummy Classifiers

DummyClassifier is a classifier that makes predictions using simple rules, which can be useful as a baseline for comparison against actual classifiers, especially with imbalanced classes.

In [None]:
# Negative class (0) is most frequent
dummy_majority = DummyClassifier(strategy = 'most_frequent').fit(X_train, y_train)
# Therefore the dummy 'most_frequent' classifier always predicts class 0
y_dummy_predictions = dummy_majority.predict(X_test)

y_dummy_predictions

In [None]:
dummy_majority.score(X_test, y_test)

In [None]:
svm = SVC(kernel='linear', C=1).fit(X_train, y_train)
svm.score(X_test, y_test)

### Confusion matrices

#### Binary (two-class) confusion matrix

In [None]:
# Negative class (0) is most frequent
dummy_majority = DummyClassifier(strategy = 'most_frequent').fit(X_train, y_train)
y_majority_predicted = dummy_majority.predict(X_test)
confusion = confusion_matrix(y_test, y_majority_predicted)

print('Most frequent class (dummy classifier)\n', confusion)

In [None]:
# produces random predictions w/ same class proportion as training set
dummy_classprop = DummyClassifier(strategy='stratified').fit(X_train, y_train)
y_classprop_predicted = dummy_classprop.predict(X_test)
confusion = confusion_matrix(y_test, y_classprop_predicted)

print('Random class-proportional prediction (dummy classifier)\n', confusion)

In [None]:
svm = SVC(kernel='linear', C=1).fit(X_train, y_train)
svm_predicted = svm.predict(X_test)
confusion = confusion_matrix(y_test, svm_predicted)

print('Support vector machine classifier (linear kernel, C=1)\n', confusion)

In [None]:
lr = LogisticRegression(solver="liblinear").fit(X_train, y_train)
lr_predicted = lr.predict(X_test)
confusion = confusion_matrix(y_test, lr_predicted)

print('Logistic regression classifier (default settings)\n', confusion)

In [None]:
dt = DecisionTreeClassifier(max_depth=2).fit(X_train, y_train)
tree_predicted = dt.predict(X_test)
confusion = confusion_matrix(y_test, tree_predicted)

print('Decision tree classifier (max_depth = 2)\n', confusion)

### Evaluation metrics for binary classification

In [None]:
# Accuracy = TP + TN / (TP + TN + FP + FN)
# Precision = TP / (TP + FP)
# Recall = TP / (TP + FN)  Also known as sensitivity, or True Positive Rate
# F1 = 2 * Precision * Recall / (Precision + Recall) 
print('Accuracy: {:.2f}'.format(accuracy_score(y_test, tree_predicted)))
print('Precision: {:.2f}'.format(precision_score(y_test, tree_predicted)))
print('Recall: {:.2f}'.format(recall_score(y_test, tree_predicted)))
print('F1: {:.2f}'.format(f1_score(y_test, tree_predicted)))

In [None]:
# Combined report with all above metrics
print(classification_report(y_test, tree_predicted, target_names=['not 1', '1']))

In [None]:
print('Random class-proportional (dummy)\n', 
      classification_report(y_test, y_classprop_predicted, target_names=['not 1', '1']))
print('SVM\n', 
      classification_report(y_test, svm_predicted, target_names = ['not 1', '1']))
print('Logistic regression\n', 
      classification_report(y_test, lr_predicted, target_names = ['not 1', '1']))
print('Decision tree\n', 
      classification_report(y_test, tree_predicted, target_names = ['not 1', '1']))

### Decision functions

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y_binary_imbalanced, random_state=0)
y_scores_lr = lr.fit(X_train, y_train).decision_function(X_test)
y_score_list = list(zip(y_test[0:20], y_scores_lr[0:20]))

# show the decision_function scores for first 20 instances
y_score_list

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y_binary_imbalanced, random_state=0)
y_proba_lr = lr.fit(X_train, y_train).predict_proba(X_test)
y_proba_list = list(zip(y_test[0:20], y_proba_lr[0:20,1]))

# show the probability of positive class for first 20 instances
y_proba_list

### Precision-recall curves

In [None]:
precision, recall, thresholds = precision_recall_curve(y_test, y_scores_lr)
closest_zero = np.argmin(np.abs(thresholds))
closest_zero_p = precision[closest_zero]
closest_zero_r = recall[closest_zero]

plt.figure(dpi=100)
plt.axes().set_aspect('equal')
plt.xlim([0.0, 1.01])
plt.ylim([0.0, 1.01])
plt.plot(precision, recall, label='Precision-Recall Curve')
plt.plot(closest_zero_p, closest_zero_r, 'o', markersize = 12, fillstyle = 'none', c='r', mew=3)
plt.xlabel('Precision', fontsize=16)
plt.ylabel('Recall', fontsize=16)
plt.show()

### ROC curves, Area-Under-Curve (AUC)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y_binary_imbalanced, random_state=0)

y_score_lr = lr.fit(X_train, y_train).decision_function(X_test)
fpr_lr, tpr_lr, _ = roc_curve(y_test, y_score_lr)
roc_auc_lr = auc(fpr_lr, tpr_lr)

plt.figure(dpi=100)
plt.axes().set_aspect('equal')
plt.xlim([-0.01, 1.00])
plt.ylim([-0.01, 1.01])
plt.plot(fpr_lr, tpr_lr, lw=3, label='LogRegr ROC curve (area = {:0.2f})'.format(roc_auc_lr))
plt.xlabel('False Positive Rate', fontsize=16)
plt.ylabel('True Positive Rate', fontsize=16)
plt.title('ROC curve (1-of-10 digits classifier)', fontsize=16)
plt.legend(loc='lower right', fontsize=13)
plt.plot([0, 1], [0, 1], color='navy', lw=3, linestyle='--')
plt.show()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y_binary_imbalanced, random_state=0)

plt.figure(dpi=100)
plt.axes().set_aspect('equal')
plt.xlim([-0.01, 1.00])
plt.ylim([-0.01, 1.01])
for g in [0.01, 0.1, 0.20, 1]:
    svm = SVC(gamma=g).fit(X_train, y_train)
    y_score_svm = svm.decision_function(X_test)
    fpr_svm, tpr_svm, _ = roc_curve(y_test, y_score_svm)
    roc_auc_svm = auc(fpr_svm, tpr_svm)
    accuracy_svm = svm.score(X_test, y_test)
    print("gamma = {:.2f}  accuracy = {:.2f}   AUC = {:.2f}".format(g, accuracy_svm, 
                                                                    roc_auc_svm))
    plt.plot(fpr_svm, tpr_svm, lw=3, alpha=0.7, 
             label='SVM (gamma = {:0.2f}, area = {:0.2f})'.format(g, roc_auc_svm))

plt.xlabel('False Positive Rate', fontsize=16)
plt.ylabel('True Positive Rate (Recall)', fontsize=16)
plt.plot([0, 1], [0, 1], color='k', lw=0.5, linestyle='--')
plt.legend(loc="lower right", fontsize=11)
plt.title('ROC curve: (1-of-10 digits classifier)', fontsize=16)
plt.show()

### Evaluation measures for multi-class classification

#### Multi-class confusion matrix

In [None]:
dataset = load_digits()
X, y = dataset.data, dataset.target
X_train_mc, X_test_mc, y_train_mc, y_test_mc = train_test_split(X, y, random_state=0)


svm = SVC(kernel = 'linear').fit(X_train_mc, y_train_mc)
svm_predicted_mc = svm.predict(X_test_mc)
confusion_mc = confusion_matrix(y_test_mc, svm_predicted_mc)
df_cm = pd.DataFrame(confusion_mc, 
                     index = [i for i in range(0,10)], columns = [i for i in range(0,10)])

plt.figure(figsize=(5.5,4))
sns.heatmap(df_cm, annot=True)
plt.title('SVM Linear Kernel \nAccuracy:{0:.3f}'.format(accuracy_score(y_test_mc, 
                                                                       svm_predicted_mc)))
plt.ylabel('True label')
plt.xlabel('Predicted label')


svm = SVC(kernel = 'rbf').fit(X_train_mc, y_train_mc)
svm_predicted_mc = svm.predict(X_test_mc)
confusion_mc = confusion_matrix(y_test_mc, svm_predicted_mc)
df_cm = pd.DataFrame(confusion_mc, index = [i for i in range(0,10)],
                  columns = [i for i in range(0,10)])

plt.figure(figsize = (5.5,4))
sns.heatmap(df_cm, annot=True)
plt.title('SVM RBF Kernel \nAccuracy:{0:.3f}'.format(accuracy_score(y_test_mc, 
                                                                    svm_predicted_mc)))
plt.ylabel('True label')
plt.xlabel('Predicted label');

#### Multi-class classification report

In [None]:
print(classification_report(y_test_mc, svm_predicted_mc))

#### Micro- vs. macro-averaged metrics

In [None]:
print('Micro-averaged precision = {:.2f} (treat instances equally)'
      .format(precision_score(y_test_mc, svm_predicted_mc, average = 'micro')))
print('Macro-averaged precision = {:.2f} (treat classes equally)'
      .format(precision_score(y_test_mc, svm_predicted_mc, average = 'macro')))

In [None]:
print('Micro-averaged f1 = {:.2f} (treat instances equally)'
      .format(f1_score(y_test_mc, svm_predicted_mc, average = 'micro')))
print('Macro-averaged f1 = {:.2f} (treat classes equally)'
      .format(f1_score(y_test_mc, svm_predicted_mc, average = 'macro')))

### Regression evaluation metrics

In [None]:
diabetes = datasets.load_diabetes()

X = diabetes.data[:, None, 6]
y = diabetes.target

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

lm = LinearRegression().fit(X_train, y_train)
lm_dummy_mean = DummyRegressor(strategy = 'mean').fit(X_train, y_train)

y_predict = lm.predict(X_test)
y_predict_dummy_mean = lm_dummy_mean.predict(X_test)

print('Linear model, coefficients: ', lm.coef_)
print("Mean squared error (dummy): {:.2f}".format(mean_squared_error(y_test, 
                                                                     y_predict_dummy_mean)))
print("Mean squared error (linear model): {:.2f}".format(mean_squared_error(y_test, y_predict)))
print("r2_score (dummy): {:.2f}".format(r2_score(y_test, y_predict_dummy_mean)))
print("r2_score (linear model): {:.2f}".format(r2_score(y_test, y_predict)))

# Plot outputs
plt.scatter(X_test, y_test,  color='black')
plt.plot(X_test, y_predict, color='green', linewidth=2)
plt.plot(X_test, y_predict_dummy_mean, color='red', linestyle = 'dashed', 
         linewidth=2, label = 'dummy')

plt.show()

### Model selection using evaluation metrics

#### Cross-validation example

In [None]:
dataset = load_digits()
# again, making this a binary problem with 'digit 1' as positive class 
# and 'not 1' as negative class
X, y = dataset.data, dataset.target == 1
clf = SVC(kernel='linear', C=1)

# accuracy is the default scoring metric
print('Cross-validation (accuracy)', cross_val_score(clf, X, y, cv=5))
# use AUC as scoring metric
print('Cross-validation (AUC)', cross_val_score(clf, X, y, cv=5, scoring = 'roc_auc'))
# use recall as scoring metric
print('Cross-validation (recall)', cross_val_score(clf, X, y, cv=5, scoring = 'recall'))

#### Grid search example

In [None]:
dataset = load_digits()
X, y = dataset.data, dataset.target == 1
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

clf = SVC(kernel='rbf')
grid_values = {'gamma': [0.001, 0.01, 0.05, 0.1, 1, 10, 100]}

# default metric to optimize over grid parameters: accuracy
grid_clf_acc = GridSearchCV(clf, param_grid = grid_values)
grid_clf_acc.fit(X_train, y_train)
y_decision_fn_scores_acc = grid_clf_acc.decision_function(X_test) 

print('Grid best parameter (max. accuracy): ', grid_clf_acc.best_params_)
print('Grid best score (accuracy): ', grid_clf_acc.best_score_)

# alternative metric to optimize over grid parameters: AUC
grid_clf_auc = GridSearchCV(clf, param_grid = grid_values, scoring = 'roc_auc')
grid_clf_auc.fit(X_train, y_train)
y_decision_fn_scores_auc = grid_clf_auc.decision_function(X_test) 

print('Test set AUC: ', roc_auc_score(y_test, y_decision_fn_scores_auc))
print('Grid best parameter (max. AUC): ', grid_clf_auc.best_params_)
print('Grid best score (AUC): ', grid_clf_auc.best_score_)


#### Evaluation metrics supported for model selection   
<img src="data/metrics.png" />   

Source: https://scikit-learn.org/stable/modules/model_evaluation.html

### Two-feature classification example using the digits dataset

#### Optimizing a classifier using different evaluation metrics

In [None]:
dataset = load_digits()
X, y = dataset.data, dataset.target == 1
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

# Create a two-feature input vector matching the example plot above
# We jitter the points (add a small amount of random noise) in case there are areas
# in feature space where many instances have the same features.
jitter_delta = 0.25
X_twovar_train = X_train[:,[20,59]]+ np.random.rand(X_train.shape[0], 2) - jitter_delta
X_twovar_test  = X_test[:,[20,59]] + np.random.rand(X_test.shape[0], 2) - jitter_delta

clf = SVC(kernel = 'linear').fit(X_twovar_train, y_train)
grid_values = {'class_weight':['balanced', {1:2},{1:3},{1:4},{1:5},{1:10},{1:20},{1:50}]}
plt.figure(figsize=(9,6))
for i, eval_metric in enumerate(('precision','recall', 'f1','roc_auc')):
    grid_clf_custom = GridSearchCV(clf, param_grid=grid_values, scoring=eval_metric)
    grid_clf_custom.fit(X_twovar_train, y_train)
    print('Grid best parameter (max. {0}): {1}'
          .format(eval_metric, grid_clf_custom.best_params_))
    print('Grid best score ({0}): {1}'
          .format(eval_metric, grid_clf_custom.best_score_))
    plt.subplots_adjust(wspace=0.3, hspace=0.3)
    plot_class_regions_for_classifier_subplot(grid_clf_custom, X_twovar_test, y_test, None,
                                             None, None,  plt.subplot(2, 2, i+1))
    
    plt.title(eval_metric+'-oriented SVC')
plt.tight_layout()
plt.show()

#### Precision-recall curve for the default SVC classifier (with balanced class weights)

In [None]:
dataset = load_digits()
X, y = dataset.data, dataset.target == 1
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

# create a two-feature input vector matching the example plot above
jitter_delta = 0.25
X_twovar_train = X_train[:,[20,59]]+ np.random.rand(X_train.shape[0], 2) - jitter_delta
X_twovar_test  = X_test[:,[20,59]] + np.random.rand(X_test.shape[0], 2) - jitter_delta

clf = SVC(kernel='linear', class_weight='balanced').fit(X_twovar_train, y_train)

y_scores = clf.decision_function(X_twovar_test)

precision, recall, thresholds = precision_recall_curve(y_test, y_scores)
closest_zero = np.argmin(np.abs(thresholds))
closest_zero_p = precision[closest_zero]
closest_zero_r = recall[closest_zero]

plot_class_regions_for_classifier(clf, X_twovar_test, y_test, 
                                  title="SVC, class_weight = 'balanced', optimized for accuracy")

plt.figure()
plt.axes().set_aspect('equal')
plt.xlim([0.0, 1.01])
plt.ylim([0.0, 1.01])
plt.title ("Precision-recall curve: SVC, class_weight = 'balanced'")
plt.plot(precision, recall, label = 'Precision-Recall Curve')
plt.plot(closest_zero_p, closest_zero_r, 'o', markersize=12, fillstyle='none', c='r', mew=3)
plt.xlabel('Precision', fontsize=16)
plt.ylabel('Recall', fontsize=16)
plt.show()
print('At zero threshold, precision: {:.2f}, recall: {:.2f}'
      .format(closest_zero_p, closest_zero_r))

<hr style="border:2px solid gray"> </hr>   

In [None]:
# IMPORT THESE FIRST.
import numpy as np
import pandas as pd

## Practice Tasks

In the following practice tasks you will train several models and evaluate how effectively they predict instances of fraud using data based on [this dataset from Kaggle](https://www.kaggle.com/dalpozz/creditcardfraud).
 
Each row in `fraud_data.csv` corresponds to a credit card transaction. Features include confidential variables `V1` through `V28` as well as `Amount` which is the amount of the transaction. 
 
The target is stored in the `Class` column, where a value of 1 corresponds to an instance of fraud and 0 corresponds to an instance of not fraud.

### Task 1
Import the data from `data/fraud_data.csv`. What percentage of the observations in the dataset are instances of fraud?

*This function should return a float between 0 and 1.*   
<br />
<details>    
<summary>
    <font size="3" color="red"><b>Hints (click to expand)</b></font>
</summary>
<p>
    <ul>
        <li>There is a pandas method to create unique value counts for a Series object.</li>
        <li>Every shape attribute is a tuple that one can access with fundamental python indexing.</li>
    </ul>
</p>
</details>

In [None]:
def answer_one():
    # START YOUR CODE HERE.
    
    return # RETURN YOUR ANSWER HERE.

In [None]:
# THIS CELL TESTS YOUR RESULTS.
assert answer_one() == 356/21693, "Your results seem to be incorrect!"

In [None]:
# Use X_train, X_test, y_train, y_test for all of the following tasks.
from sklearn.model_selection import train_test_split

df = pd.read_csv('data/fraud_data.csv')

X = df.iloc[:,:-1]
y = df.iloc[:,-1]

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

<details>    
<summary>
    <font size="3" color="darkgreen"><b>Solution (click to expand)</b></font>
</summary>
<p>
    <code>def answer_one():</code><br />
    <code>&nbsp;&nbsp;&nbsp;&nbsp;df = pd.read_csv("data/fraud_data.csv")</code><br />
    <code>&nbsp;&nbsp;&nbsp;&nbsp;return df["Class"].value_counts().loc[1.0]/df.shape[0]</code><br />
</p>
</details> 

### Task 2

Using `X_train`, `X_test`, `y_train`, and `y_test` (as defined above), train a dummy classifier that classifies everything as the majority class of the training data. What is the accuracy of this classifier? What is the recall?

*This function should a return a tuple with two floats, i.e. `(accuracy score, recall score)`.*   
<br />
<details>    
<summary>
    <font size="3" color="red"><b>Hints (click to expand)</b></font>
</summary>
<p>
    <ul>
        <li>DummyClassifier's score method yields the accuracy score for a user-given X and y.</li>
        <li>The recall_score method takes the actual and predicted y-values as parameters.</li>
    </ul>
</p>
</details>

In [None]:
def answer_two():
    from sklearn.dummy import DummyClassifier
    from sklearn.metrics import recall_score
    # START YOUR CODE HERE.
    
    return # RETURN YOUR ANSWER HERE.

In [None]:
# THIS CELL TESTS YOUR RESULTS.
assert len(answer_two()) == 2, "Please return a tuple of length two!"
assert answer_two()[0] == 0.9852507374631269, "Your accuracy score seems to be incorrect!"
assert answer_two()[1] == 0, "Your recall score seems to be incorrect!"

<details>    
<summary>
    <font size="3" color="darkgreen"><b>Solution (click to expand)</b></font>
</summary>
<p>
    <code>def answer_two():</code><br />
    <code>&nbsp;&nbsp;&nbsp;&nbsp;from sklearn.dummy import DummyClassifier</code><br />
    <code>&nbsp;&nbsp;&nbsp;&nbsp;from sklearn.metrics import recall_score</code><br />
    <code></code><br />
    <code>&nbsp;&nbsp;&nbsp;&nbsp;model = DummyClassifier().fit(X_train, y_train)</code><br />
    <code>&nbsp;&nbsp;&nbsp;&nbsp;predictions = model.predict(X_test)</code><br />
    <code></code><br />
    <code>&nbsp;&nbsp;&nbsp;&nbsp;return model.score(X_test, y_test), recall_score(y_test, predictions)</code><br />
</p>
</details> 

### Task 3

Using `X_train`, `X_test`, `y_train`, `y_test` (as defined above), train a SVC classifer using the default parameters. What is the accuracy, recall, and precision of this classifier?

*This function should a return a tuple with three floats, i.e. `(accuracy score, recall score, precision score)`.*   

<br />
<details>    
<summary>
    <font size="3" color="red"><b>Hints (click to expand)</b></font>
</summary>
<p>
    <ul>
        <li>SVC's score method yields the accuracy score for a user-given X and y.</li>
        <li>The recall_score and precision_score methods take the actual and predicted y-values as parameters.</li>
    </ul>
</p>
</details>

In [None]:
def answer_three():
    from sklearn.metrics import recall_score, precision_score
    from sklearn.svm import SVC
    # START YOUR CODE HERE.
    
    return # RETURN YOUR ANSWER HERE.

In [None]:
# THIS CELL TESTS YOUR RESULTS.
results = answer_three()
assert len(results) == 3, "Please return a tuple of length three!"
assert results[0] == 0.9900442477876106, "Your accuracy score seems to be incorrect!"
assert results[1] == 0.35, "Your recall score seems to be incorrect!"
assert results[2] == 0.9333333333333333, "Your precision score seems to be incorrect!"

<details>    
<summary>
    <font size="3" color="darkgreen"><b>Solution (click to expand)</b></font>
</summary>
<p>
    <code>def answer_three():</code><br />
    <code>&nbsp;&nbsp;&nbsp;&nbsp;from sklearn.metrics import recall_score, precision_score</code><br />
    <code>&nbsp;&nbsp;&nbsp;&nbsp;from sklearn.svm import SVC</code><br />
    <code></code><br />
    <code>&nbsp;&nbsp;&nbsp;&nbsp;model = SVC().fit(X_train, y_train)</code><br />
    <code>&nbsp;&nbsp;&nbsp;&nbsp;predictions = model.predict(X_test)</code><br />
    <code></code><br />
    <code>&nbsp;&nbsp;&nbsp;&nbsp;return model.score(X_test, y_test), recall_score(y_test, predictions), precision_score(y_test, predictions)</code><br />
</p>
</details> 

### Task 4

Using the SVC classifier with parameters `{'C': 1e9, 'gamma': 1e-07}`, what is the confusion matrix when using a threshold of -220 on the decision function? Use `X_test` and `y_test`.

*This function should return a confusion matrix, a 2x2 numpy array with 4 integers.*   
<br />
<details>    
<summary>
    <font size="3" color="red"><b>Hints (click to expand)</b></font>
</summary>
<p>
    <ul>
        <li>SVC's decision_function method returns values to which a threshold is applicable.</li>
        <li>Remind yourself of boolean masking with numpy arrays.</li>
        <li>Python handles boolean values the same as integers/floats 0 (False) and 1 (True).</li>
    </ul>
</p>
</details>

In [None]:
def answer_four():
    from sklearn.metrics import confusion_matrix
    from sklearn.svm import SVC
    # START YOUR CODE HERE.
    
    return # RETURN YOUR ANSWERS HERE.

In [None]:
# THIS CELL TESTS YOUR RESULTS.
results = answer_four()
assert results.shape == (2, 2), "Please return a 2x2 numpy-array!"
assert results[0, 0] == 5320, "Your results seem to be incorrect!"
assert results[-1, -1] == 66, "Your results seem to be incorrect!"

<details>    
<summary>
    <font size="3" color="darkgreen"><b>Solution (click to expand)</b></font>
</summary>
<p>
    <code>def answer_four():</code><br />
    <code>&nbsp;&nbsp;&nbsp;&nbsp;from sklearn.metrics import confusion_matrix</code><br />
    <code>&nbsp;&nbsp;&nbsp;&nbsp;from sklearn.svm import SVC</code><br />
    <code></code><br />
    <code>&nbsp;&nbsp;&nbsp;&nbsp;model = SVC(C=1e9, gamma=1e-07).fit(X_train, y_train)</code><br />
    <code>&nbsp;&nbsp;&nbsp;&nbsp;predictions = model.decision_function(X_test) > -220</code><br />
    <code></code><br />
    <code>&nbsp;&nbsp;&nbsp;&nbsp;return confusion_matrix(y_test, predictions)</code><br />
</p>
</details> 

### Task 5

Train a logisitic regression classifier (with `solver="liblinear"`) using `X_train` and `y_train`.

For the logisitic regression classifier, create a plot with two subplots that features a precision recall curve and a roc curve using `y_test` and the probability estimates for `X_test` (probability it is fraud).   
   
Your plot should look like this:  <img src="data/precision_recall_roc.png">

*You should create the plot outside of the function. The function should return a tuple with four arrays, i.e. `(precision_values [shape = (3253,)], recall_values [shape = (3253,)], false_positive_rates [shape = (50,)], true_positive_rates [shape = (50,)])`.*   
<br />
<details>    
<summary>
    <font size="3" color="red"><b>Hints (click to expand)</b></font>
</summary>
<p>
    <ul>
        <li>Sklearn's precision_recall_curve method takes actual and predicted y-values as inputs and returns three arrays in the following order: precision, recall, and thresholds.</li>
        <li>Sklearn's roc curve takes actual and predicted y-values as inputs and returns three arrays in the following order: false-positive rates, true-positive rates, and thresholds.</li>
        <li>To create a subplot in matplotlib, one needs to call plt.subplot(rows, columns, id). In our case: plt.subplot(2,1,1) and plt.subplot(2,1,2).</li>
        <li>To enhance your visualization: The method plt.tight_layout() takes care of the padding between and around subplots.</li>
    </ul>
</p>
</details>

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_recall_curve, roc_curve
import matplotlib.pyplot as plt
# TRAIN YOUR MODEL HERE.


# CREATE PLOT HERE.


def answer_five():
    
    return # RETURN THE DESIRED ARRAYS HERE.

In [None]:
# THIS CELL TESTS YOUR RESULTS.
results = answer_five()
assert results[0].shape and results[1].shape == (3253,), "Please return the arrays in the right order!"
assert results[2].shape and results[2].shape == (50,), "Please return the arrays in the right order!"
assert results[0][500] == 0.028291621327529923, "Your results seem to be incorrect!"
assert results[1][600] == 0.975, "Your results seem to be incorrect!"
assert results[2][30] == 0.20527694610778444, "Your results seem to be incorrect!"
assert results[3][25] == 0.925, "Your results seem to be incorrect!"

<details>    
<summary>
    <font size="3" color="darkgreen"><b>Solution (click to expand)</b></font>
</summary>
<p>
    <code>from sklearn.linear_model import LogisticRegression</code><br />
    <code>from sklearn.metrics import precision_recall_curve, roc_curve</code><br />
    <code>import matplotlib.pyplot as plt</code><br />
    <code></code><br />
    <code>lr = LogisticRegression(solver="liblinear")</code><br />
    <code>lr.fit(X_train, y_train)</code><br />
    <code>predictions = lr.decision_function(X_test)</code><br />
    <code></code><br />
    <code>fig = plt.figure()</code><br />
    <code>ax1 = fig.add_subplot(2, 1, 1)</code><br />
    <code>precision, recall, thresholds_pr = precision_recall_curve(y_test, predictions)</code><br />
    <code>ax1.plot(precision, recall)</code><br />
    <code>ax1.set_title("Precision-recall curve")</code><br />
    <code>ax1.set_xlabel("Precision")</code><br />
    <code>ax1.set_ylabel("Recall")</code><br />
    <code></code><br />
    <code>ax2 = fig.add_subplot(2, 1, 2)</code><br />
    <code>fpr, tpr, thresholds_roc = roc_curve(y_test, predictions)</code><br />
    <code>ax2.plot(fpr, tpr)</code><br />
    <code>ax2.set_title("ROC")</code><br />
    <code>ax2.set_xlabel("False positive rate")</code><br />
    <code>ax2.set_ylabel("True positive rate")</code><br />
    <code></code><br />
    <code>fig.tight_layout()</code><br />
    <code></code><br />
    <code>def answer_five():</code><br />
    <code>&nbsp;&nbsp;&nbsp;&nbsp;return precision, recall, fpr, tpr</code><br />
</p>
</details> 

### Task 6

Perform a grid search over the parameters listed below for a Logisitic Regression classifier (with `solver="liblinear"`), using `X_train` and `X_test`, recall for scoring, as well as the default 5-fold cross validation.

`'penalty': ['l1', 'l2']`

`'C':[0.01, 0.1, 1, 10, 100]`

From `.cv_results_`, create an array of the mean test scores of each parameter combination. i.e.

|      	| `l1` 	| `l2` 	|
|:----:	|----	|----	|
| **`0.01`** 	|    ?	|   ? 	|
| **`0.1`**  	|    ?	|   ? 	|
| **`1`**    	|    ?	|   ? 	|
| **`10`**   	|    ?	|   ? 	|
| **`100`**   	|    ?	|   ? 	|

<br>

*This function should return a 5 by 2 numpy array with 10 floats.* 

*Note: Do not return a DataFrame, just the values denoted by '?' above in a numpy array. You might need to reshape your raw result to meet the format we are looking for.*   
<br />
<details>    
<summary>
    <font size="3" color="red"><b>Hints (click to expand)</b></font>
</summary>
<p>
    <ul>
        <li>The parameter grid needs to be a dictionary with parameter names as its keys and lists as its values. The different lists' elements correspond to respective parameter settings.</li>
        <li>cv_results_ returns a dictionary. Its key "mean_test_score" yields the desired test scores.</li>
        <li>The grid-search is performed as follows: [("l1", 0.01), ("l2", 0.01), ("l1", 0.1), ("l2", 0.1), ...]</li>
    </ul>
</p>
</details>

In [None]:
def answer_six():    
    from sklearn.model_selection import GridSearchCV
    from sklearn.linear_model import LogisticRegression
    # START YOUR CODE HERE.
    
    return # RETURN YOUR ANSWER HERE.

In [None]:
# THIS CELL TESTS YOUR RESULTS.
results = answer_six()
assert results.shape == (5, 2), "Your results are in the wrong format!"
assert results[2, 1] == 0.8114935064935065, "Your results seem to be incorrect!"
assert results[-1, -1] == 0.8006493506493506, "Your results seem to be incorrect!"

<details>    
<summary>
    <font size="3" color="darkgreen"><b>Solution (click to expand)</b></font>
</summary>
<p>
    <code>def answer_six():</code><br />
    <code>&nbsp;&nbsp;&nbsp;&nbsp;from sklearn.model_selection import GridSearchCV</code><br /> 
    <code>&nbsp;&nbsp;&nbsp;&nbsp;from sklearn.linear_model import LogisticRegression</code><br />  
    <code></code><br />
    <code>&nbsp;&nbsp;&nbsp;&nbsp;param_grid = {</code><br />
    <code>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;"penalty": ["l1", "l2"],</code><br />
    <code>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;"C": [0.01, 0.1, 1, 10, 100]</code><br />
    <code>&nbsp;&nbsp;&nbsp;&nbsp;}</code><br />
    <code></code><br />
    <code>&nbsp;&nbsp;&nbsp;&nbsp;clf = GridSearchCV(estimator=LogisticRegression(solver="liblinear"),</code><br />
    <code>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;param_grid=param_grid, scoring="recall")</code><br />
    <code>&nbsp;&nbsp;&nbsp;&nbsp;clf.fit(X_train, y_train)</code><br />
    <code>&nbsp;&nbsp;&nbsp;&nbsp;return clf.cv_results_["mean_test_score"].reshape(5, 2)</code><br />
</p>
</details> 