<span style="color:red; font-family:Helvetica Neue, Helvetica, Arial, sans-serif; font-size:2em;">An Exception was encountered at '<a href="#papermill-error-cell">In [3]</a>'.</span>

### Import Libraries

In [1]:
# General
import pandas as pd
import numpy as np
import joblib
import json

# Model and Evaluation
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import f1_score, classification_report

# # DVC stuff
# from dvclive import Live

#### Setup Parameters

In [2]:
# Data
x_train_path = './notebooks/X_train_scaled.csv'
x_test_path = './notebooks/X_test_scaled.csv'
y_train_path = './notebooks/y_train.csv'
y_test_path = './notebooks/y_test.csv'

# gbc Model
random_state = 42
lr = 0.01
n_estimators = 100
max_depth = 5

models_dir = './notebooks/'
model_fname  = 'model.joblib'

train_params = {'n_estimators': n_estimators,
                'max_depth': max_depth,
                'lr': lr}

#### Load Preprocessed Data

<span id="papermill-error-cell" style="color:red; font-family:Helvetica Neue, Helvetica, Arial, sans-serif; font-size:2em;">Execution using papermill encountered an exception here and stopped:</span>

In [3]:
# Load data
X_train_scaled = np.genfromtxt(x_train_path, delimiter=',')
X_test_scaled = np.genfromtxt(x_test_path, delimiter=',')
y_train = np.genfromtxt(y_train_path, delimiter=',')
y_test = np.genfromtxt(y_test_path, delimiter=',')

FileNotFoundError: ./notebooks/X_train_scaled.csv not found.

#### Build Models

In [None]:
# Initialize Model
gbc = GradientBoostingClassifier(random_state=random_state,
                                 learning_rate=lr,
                                 n_estimators=n_estimators,
                                 max_depth=max_depth)

# Train model
gbc.fit(X_train_scaled, y_train)

# Save model
joblib.dump(gbc, models_dir+model_fname)



#### Evaluate Models

In [None]:
# Classification Report for each type of attack
def print_f1_scores(y_true, y_pred):
    report = classification_report(y_true, y_pred, output_dict=True)

    for class_label, metric in report.items():
        try:
            f1_score_float = metric['f1-score']
            print(f"Class {class_label}: F1-score = {round(f1_score_float, 3)}")
        except:
            pass

In [None]:
# Make predictions
preds = gbc.predict(X_test_scaled)

# Evaluate model (F1 Score)
f1_score_weighted = round(f1_score(y_test, preds, average='weighted'), 3)
f1_score_macro = round(f1_score(y_test, preds, average='macro'), 3)
print_f1_scores(y_test, preds)

In [None]:
# Save metrics
metrics = {
    'F1-Score_weighted': f1_score_weighted,
    'F1-Score_macro': f1_score_macro
    }

json.dump(
    obj=metrics,
    fp=open('./notebooks/metrics.json', 'w'),
    indent = 4,
    sort_keys = True
    )



In [None]:
# DVC command to run pipeline
"""
$ dvc run -n run_model_exp -d notebooks/X_train_scaled.csv -d notebooks/X_test_scaled.csv -d notebooks/y_train.csv -d notebooks/y_test.csv -d notebooks/model_exp.ipynb -o notebooks/model.joblib -M notebooks/metrics.json papermill notebooks/model_exp.ipynb notebooks/model_exp_out.ipynb
"""