---
# Utility (Evaluation)

In [None]:
from sklearn.metrics import accuracy_score, roc_curve, auc


def evaluate_roc(predictions, labels, title):
    preds = predictions
    fpr, tpr, threshold = roc_curve(labels, preds)
    roc_auc = auc(fpr, tpr)
    print(f'AUC: {roc_auc:.4f}')

    # Get accuracy over the test set
    y_pred = np.where(preds >= 0.5, 1, 0)
    accuracy = accuracy_score(labels, y_pred)
    print(f'Accuracy: {accuracy * 100:.2f}%')

    # Plot ROC AUC
    plt.figure(figsize=(6, 4))
    plt.title(f'Receiver Operating Characteristic ({category})')
    plt.plot(fpr, tpr, 'b', label='AUC = %0.2f' % roc_auc)
    plt.legend(loc='lower right')
    plt.plot([0, 1], [0, 1], 'r--')
    plt.grid(which='major', b=False, linestyle='--')
    plt.grid(which='minor', alpha=0.2, linestyle='--')
    plt.xlim([0, 1])
    plt.ylim([0, 1])
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')
    plt.show()

# Evaluation

Apply the model on the testing data.

* Accuracy - As done in the model training completion.
* ROC - Evaluate Receiver Operating Characteristic (ROC)

### Instantiate predictors from the save models

In [None]:
dummy_data = ["dummy"]
dummy_label = [0]
id=""
predictors = {}

for category in CATEGORIES:
    predictor = Runner(
        category=category,
        training_data=dummy_data,
        training_label=dummy_label,
        validation_data=dummy_data,
        validation_label=dummy_label,
        batch_size=BATCH_SIZE,
        max_sequence_length=MAX_SEQUENCE_LENGTH,
    )
    path_to_dir = "{parent}/model_C{category}_B{size}_L{length}".format(
        parent=RESULT_DIRECTORY,
        category=category,
        size=BATCH_SIZE,
        length=MAX_SEQUENCE_LENGTH
    )
    predictor.load(path_to_dir)
    predictors[category] = predictor

### Predictions

In [None]:
test_data = test['comment_text'].tolist()
test_label = test[category].tolist()

In [None]:
row = {}
index = np.random.randint(0, len(test_data))
data = test_data[index]
row['data'] = data
for category in CATEGORIES:
    row[category] = np.argmax(predictors[category].predict(data).numpy().tolist()[0])

pd.DataFrame([row])

#### True Ratings

True results.

In [None]:
raw_test[(raw_test['toxic'] >= 0)].iloc[[index]]

In [None]:
del test_data

### ROC

In [None]:
predictions = {}
for category in CATEGORIES:
    labels = test[category].tolist()
    predictions[category] = predictors[category].predict(test['comment_text'].tolist())[:, 1:2].numpy()
    evaluate_roc(predictions[category], labels, category)

## Performance

In [None]:
# Restore the test data including -1 label values
if CLEANING_FOR_TRAINING:
    _, submission = load_clean_data(train, test)
    del _
else:
    # submission = pd.read_pickle(test_pickle_path).loc[:, ['id', 'comment_text']]
    _, submission = load_raw_data(TEST_MODE)
    del _
assert submission['toxic'].count() > 0

submission = submission.loc[:, ['id', 'comment_text']]
for category in CATEGORIES:
    # [:, 1:2] -> TensorFlow Tensor indices to select column 1 for all rows
    # Add to 'category' column as numpy array.
    submission[category] = predictors[category].predict(submission['comment_text'].tolist())[:, 1:2].numpy()

submission.drop('comment_text',axis = 1,inplace=True)

In [None]:
submission.head(3)

In [None]:
review = pd.merge(test, submission, on='id')
review.head(3)

In [None]:
for category in CATEGORIES:
    # NP: Negative Predictions
    # PP: Positive Predictions
    PP = review[(review[f'{category}_y'] > 0.5)]['id'].count()
    NP = review[(review[f'{category}_y'] <= 0.5)]['id'].count()
    ALL = PP + NP

    # TP: True Positive Prediction
    # AP: Actual Positive
    # TN: True Negative Prediction
    # AN: Actual Negative
    TP = review[(review[f'{category}_x'] == 1) & (review[f'{category}_y'] > 0.5)]['id'].count()
    AP = review[(review[f'{category}_x'] == 1)]['id'].count()
    TN = review[(review[f'{category}_x'] == 0) & (review[f'{category}_y'] <= 0.3)]['id'].count()
    AN = review[(review[f'{category}_x'] == 0)]['id'].count()

    print(f"[{category:13s}] TP {TP/ALL:0.3f} FP {(PP-TP)/ALL:0.3f} TN: {TN/ALL:0.3f} FN {(NP-TN)/ALL:0.3f}" ) 
    if AP > 0:
        print(f"[{category:13s}] Positive : Recall {TP/AP:0.3f}" ) 
    if AN > 0:
        print(f"[{category:13s}] Negative : Recall {TN/AN:0.3f}") 

---
# Submission

In [None]:
submission.to_csv(f"{RESULT_DIRECTORY}/{'submission.csv'}", index=False)

In [None]:
del submission