In [1]:
import numpy
import pandas
import joblib
import json
import seaborn
import sklearn

%matplotlib inline

# Instructions

- Read **the test data** from the CSV file and properly set the index
- Load the trained model

In [2]:
data_test = pandas.read_csv('./data/features.test.csv')
data_test.head()

Unnamed: 0,principal_feature_1,principal_feature_2,label
0,6.362479,-4.015454,0
1,-2.978411,-10.918748,0
2,-13.144219,1.878526,1
3,8.129202,-6.925383,0
4,12.692328,-2.771581,0


In [3]:
model = joblib.load('model/model.joblib')
model


# Instructions

- Calculate and add the `prediction` column to the dataframe of test data
- Write the dataframe to `./results/predictions.test.csv`

In [4]:
data_test['prediction'] = model.predict(data_test[['principal_feature_1', 'principal_feature_2']])
data_test.head()


Unnamed: 0,principal_feature_1,principal_feature_2,label,prediction
0,6.362479,-4.015454,0,0
1,-2.978411,-10.918748,0,0
2,-13.144219,1.878526,1,1
3,8.129202,-6.925383,0,0
4,12.692328,-2.771581,0,0


In [5]:
data_test.to_csv('./results/predictions.test.csv')

# Instructions
- Use `sklearn.metrics.confusion_matrix` to calculate the confusion matrix


In [6]:
confusion_matrix_from_sklearn = sklearn.metrics.confusion_matrix(
    y_true = data_test['label'],
    y_pred = data_test['prediction'],
)
confusion_matrix_from_sklearn

array([[114,   8],
       [  3, 125]])

# Instructions

- Use `sklearn.metrics.classification_report` to print the classification report
- Now, **manually fill** the following metrics and put them in a Python dictionary
- The expected output should look like:
```
{
     'true_positive': 118,
     'true_negative': 107,
     'false_negative': 15,
     'false_positive': 10,
     'accuracy': 0.9,
     'precision': 0.921875,
     'recall': 0.887218,
     'support': 133,
     'f1-score': 0.9042145
}
```
- Use `json.dump` to save the output to `./results/scores.test.json`


In [7]:
pandas.DataFrame(sklearn.metrics.classification_report(
    y_true = data_test['label'],
    y_pred = data_test['prediction'],
    digits = 4,
    output_dict = True,
))


Unnamed: 0,0,1,accuracy,macro avg,weighted avg
precision,0.974359,0.93985,0.956,0.957104,0.95669
recall,0.934426,0.976562,0.956,0.955494,0.956
f1-score,0.953975,0.957854,0.956,0.955915,0.955961
support,122.0,128.0,0.956,250.0,250.0


In [8]:
scores = {
     'true_positive': 113,
     'true_negative': 107,
     'false_negative': 15,
     'false_positive': 15,
     'accuracy': 0.88,
     'precision': 0.882812,
     'recall': 0.882812,
     'support': 128,
     'f1-score': 0.882812
}

In [9]:
with open('./results/scores.test.json', 'w') as f:
    json.dump(scores, f, indent=4)