In [None]:
import json
from time import time

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import f1_score, classification_report
from joblib import dump


X_all = pd.read_pickle('features.pickle')
y_all = pd.read_pickle('labels.pickle')

### Training and Testing Data Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X_all, y_all, test_size=0.33, random_state=42
)

# Show the results of the split
print("Training set has {} samples.".format(X_train.shape[0]))
print("Testing set has {} samples.".format(X_test.shape[0]))

print("Anomaly rate of the training set: {:.2f}%"
      .format(100 * (y_train == 1).mean()))
print("Anomaly rate of the testing set: {:.2f}%"
      .format(100 * (y_test == 1).mean()))

## Training and Evaluating Models
- Setup helper functions

### Setup
Run the code cell below to initialize three helper functions which you can use for training and testing supervised learning models. The functions are as follows:
- `train_classifier` - takes as input a classifier and training data and fits the classifier to the data.
- `predict_labels` - takes as input a fit classifier, features, and a target labeling and makes predictions using the F<sub>1</sub> score.
- `train_predict` - takes as input a classifier, and the training and testing data, and performs `train_clasifier` and `predict_labels`.
 - This function will report the F<sub>1</sub> score for both the training and testing data separately.

In [None]:
def train_classifier(clf, X_train, y_train):
    ''' Fits a classifier to the training data. '''

    # Start the clock, train the classifier, then stop the clock
    start = time()
    clf.fit(X_train, y_train)
    end = time()

    # Print the results
    print("Trained model in {:.4f} seconds".format(end - start))


def predict_labels(clf, features, target):
    ''' Makes predictions using a fit classifier based on F1 score. '''

    # Start the clock, make predictions, then stop the clock
    start = time()
    y_pred = clf.predict(features)
    end = time()

    # Print and return results
    print("Made predictions in {:.4f} seconds.".format(end - start))
    return f1_score(target.values, y_pred)


def train_predict(clf, X_train, y_train, X_test, y_test):
    ''' Train and predict using a classifer based on F1 score. '''

    # Indicate the classifier and the training set size
    print("Training a {} using a training set size of {}. . ."
          .format(clf.__class__.__name__, len(X_train)))

    # Train the classifier
    train_classifier(clf, X_train, y_train)

    # Print the results of prediction for both training and testing
    print("F1 score for training set: {:.4f}."
          .format(predict_labels(clf, X_train, y_train)))
    print("F1 score for test set: {:.4f}."
          .format(predict_labels(clf, X_test, y_test)))

### Testing Model Performance 

Let's test 3 supervised learning models:
- A) Decision Trees
- B) Support Vector Machines (SVM)
- C) Gaussian Naive Bayes (GaussianNB)
 

In [None]:
my_random_seed = 42

# Initialize the three models
clf_A = DecisionTreeClassifier(random_state=my_random_seed)
clf_B = SVC(random_state=my_random_seed)
clf_C = GaussianNB()


# loop thru models, then thru train sizes
for clf in [clf_A, clf_B, clf_C]:
    print("\n{}: \n".format(clf.__class__.__name__))
    train_predict(clf, X_train, y_train, X_test, y_test)


for clf in [clf_A, clf_B, clf_C]:
    print('\nReport for {}:\n'.format(clf.__class__.__name__))
    print(classification_report(y_test, clf.predict(X_test)))
    print('-'*52)

The **precision** is the ratio ```tp / (tp + fp)``` where tp is the number of true positives and fp the number of false positives. The precision is intuitively the ability of the classifier not to label as positive a sample that is negative.

The **recall** is the ratio ```tp / (tp + fn)``` where tp is the number of true positives and fn the number of false negatives. The recall is intuitively the ability of the classifier to find all the positive samples.

The **F1 score** can be interpreted as a weighted average of the precision and recall, where an F1 score reaches its best value at 1 and worst score at 0. The relative contribution of precision and recall to the F1 score are equal. The formula for the F1 score is:

```F1 = 2 * (precision * recall) / (precision + recall)```

The **support** is the number of occurrences of each class in ```y_true```.

Source:
[sklearn.metrics.precision_recall_fscore_support](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.precision_recall_fscore_support.html) [sklearn.metrics.f1_score](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html)

**Brief interpretation:**

DecisionTreeClassifier has the best peformance and provides the best forecasts.

Let's save and load the model just to double check that it works.

In [None]:
filename = 'model.joblib'
dump(clf_A, open(filename, 'wb'))

Log F1 score for experiment tracking

In [None]:
f1_score_clf_A = f1_score(y_test, clf_A.predict(X_test))
metrics = {
    'metrics': [
        {
            'name': 'F1-score',
            'numberValue': f1_score_clf_A,
            'format': 'PERCENTAGE'
        }
    ]
}

with open('mlpipeline-metrics.json', 'w') as f:
    json.dump(metrics, f)