diff --git a/README.md b/README.md
index dc3ddff..37e67ea 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,9 @@
+[![PyPi Version](https://img.shields.io/pypi/pyversions/bokbokbok)](#)
+[![PyPI](https://img.shields.io/pypi/v/bokbokbok)](#)
+[![PyPI - Downloads](https://img.shields.io/pypi/dm/bokbokbok)](#)
+
# bokbokbok
@@ -12,6 +16,7 @@ Main features:
- Focal Loss
- Squared Log Error
- Log Cosh Loss
+- F1 score
## Installation
diff --git a/bokbokbok/eval_metrics/classification/__init__.py b/bokbokbok/eval_metrics/classification/__init__.py
index 7752d34..f77d8b3 100644
--- a/bokbokbok/eval_metrics/classification/__init__.py
+++ b/bokbokbok/eval_metrics/classification/__init__.py
@@ -1,12 +1,14 @@
"""Import required metrics."""
-from .classification_eval_metrics import(
+from .classification_eval_metrics import (
WeightedCrossEntropyMetric,
FocalMetric,
+ F1_Score_Binary,
)
__all__ = [
"WeightedCrossEntropyMetric",
- "FocalMetric"
+ "FocalMetric",
+ "F1_Score_Binary"
]
\ No newline at end of file
diff --git a/bokbokbok/eval_metrics/classification/classification_eval_metrics.py b/bokbokbok/eval_metrics/classification/classification_eval_metrics.py
index 415fdcd..2527790 100644
--- a/bokbokbok/eval_metrics/classification/classification_eval_metrics.py
+++ b/bokbokbok/eval_metrics/classification/classification_eval_metrics.py
@@ -1,4 +1,5 @@
import numpy as np
+from sklearn.metrics import f1_score
from bokbokbok.utils import clip_sigmoid
@@ -69,11 +70,44 @@ def focal_metric(yhat, dtrain, alpha=alpha, gamma=gamma, XGBoost=XGBoost):
yhat = clip_sigmoid(yhat)
elements = (- alpha * y * np.log(yhat) * np.power(1 - yhat, gamma) -
- (1 - y) * np.log(1 - yhat) * np.power(yhat, gamma))
+ (1 - y) * np.log(1 - yhat) * np.power(yhat, gamma))
if XGBoost:
return f'Focal_alpha{alpha}_gamma{gamma}', (np.sum(elements) / len(y))
else:
- return f'Focal_alpha{alpha}_gamma{gamma}', (np.sum(elements)/ len(y)), False
+ return f'Focal_alpha{alpha}_gamma{gamma}', (np.sum(elements) / len(y)), False
return focal_metric
+
+
+def F1_Score_Binary(XGBoost=False, *args, **kwargs):
+ """
+ Implements the f1_score metric from scikit learn:
+ https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html#sklearn-metrics-f1-score
+
+ Args:
+ *args: The arguments to be fed into the scikit learn metric.
+ XGBoost (Bool): Set to True if using XGBoost. We assume LightGBM as default use.
+ Note that you should also set `maximize=True` in the XGBoost train function
+
+ """
+ def binary_f1_score(yhat, data, XGBoost=XGBoost):
+ """
+ F1 Score.
+
+ Args:
+ yhat: Predictions
+ dtrain: The XGBoost / LightGBM dataset
+ XGBoost (Bool): If XGBoost is to be implemented
+
+ Returns:
+ Name of the eval metric, Eval score, Bool to maximise function
+ """
+ y_true = data.get_label()
+ yhat = np.round(yhat)
+ if XGBoost:
+ return 'F1', f1_score(y_true, yhat, *args, **kwargs)
+ else:
+ return 'F1', f1_score(y_true, yhat, *args, **kwargs), True
+
+ return binary_f1_score
diff --git a/bokbokbok/eval_metrics/regression/__init__.py b/bokbokbok/eval_metrics/regression/__init__.py
index e69de29..ead2072 100644
--- a/bokbokbok/eval_metrics/regression/__init__.py
+++ b/bokbokbok/eval_metrics/regression/__init__.py
@@ -0,0 +1,14 @@
+"""Import required metrics."""
+
+
+from .regression_eval_metrics import (
+ SquaredLogErrorMetric,
+ RootMeanSquaredLogErrorMetric,
+ LogCoshMetric,
+)
+
+__all__ = [
+ "SquaredLogErrorMetric",
+ "RootMeanSquaredLogErrorMetric",
+ "LogCoshMetric"
+]
diff --git a/bokbokbok/loss_functions/classification/classification_loss_functions.py b/bokbokbok/loss_functions/classification/classification_loss_functions.py
index 8d2e467..fa6d90f 100644
--- a/bokbokbok/loss_functions/classification/classification_loss_functions.py
+++ b/bokbokbok/loss_functions/classification/classification_loss_functions.py
@@ -23,7 +23,7 @@ def _gradient(yhat, dtrain, alpha):
yhat = clip_sigmoid(yhat)
- grad = y * yhat * (alpha - 1) + yhat - alpha * y
+ grad = (y * yhat * (alpha - 1)) + yhat - (alpha * y)
return grad
diff --git a/bokbokbok/loss_functions/regression/__init__.py b/bokbokbok/loss_functions/regression/__init__.py
index e69de29..0034830 100644
--- a/bokbokbok/loss_functions/regression/__init__.py
+++ b/bokbokbok/loss_functions/regression/__init__.py
@@ -0,0 +1,12 @@
+"""Import required losses."""
+
+
+from .regression_loss_functions import (
+ SquaredLogErrorLoss,
+ LogCoshLoss,
+)
+
+__all__ = [
+ "SquaredLogErrorLoss",
+ "LogCoshLoss"
+]
\ No newline at end of file
diff --git a/bokbokbok/loss_functions/regression/regression_loss_functions.py b/bokbokbok/loss_functions/regression/regression_loss_functions.py
index 803ebf7..886e388 100644
--- a/bokbokbok/loss_functions/regression/regression_loss_functions.py
+++ b/bokbokbok/loss_functions/regression/regression_loss_functions.py
@@ -62,6 +62,7 @@ def squared_log_loss(
def LogCoshLoss():
"""
+ An alternative to Mean Absolute Error.
"""
def _gradient(yhat, dtrain):
diff --git a/docs/derivations/note.md b/docs/derivations/note.md
index 191a45f..f4c3ce5 100644
--- a/docs/derivations/note.md
+++ b/docs/derivations/note.md
@@ -17,6 +17,6 @@ The Hessian is similarly calculated:
-We will make use of the following property for the calculations of the Losses and Hessians:
+We will make use of the following property for the calculations of the Gradients and Hessians:
\ No newline at end of file
diff --git a/docs/tutorials/F1_score.ipynb b/docs/tutorials/F1_score.ipynb
new file mode 100644
index 0000000..ef415a7
--- /dev/null
+++ b/docs/tutorials/F1_score.ipynb
@@ -0,0 +1,126 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.datasets import make_classification\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn.metrics import roc_auc_score\n",
+ "from bokbokbok.eval_metrics.classification import F1_Score_Binary\n",
+ "from bokbokbok.utils import clip_sigmoid\n",
+ "\n",
+ "X, y = make_classification(n_samples=1000, \n",
+ " n_features=10, \n",
+ " random_state=41114)\n",
+ "\n",
+ "X_train, X_valid, y_train, y_valid = train_test_split(X, \n",
+ " y, \n",
+ " test_size=0.25, \n",
+ " random_state=41114)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Usage in LightGBM"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import lightgbm as lgb\n",
+ "\n",
+ "train = lgb.Dataset(X_train, y_train)\n",
+ "valid = lgb.Dataset(X_valid, y_valid, reference=train)\n",
+ "params = {\n",
+ " 'n_estimators': 300,\n",
+ " 'objective': 'binary',\n",
+ " 'seed': 41114,\n",
+ " 'n_jobs': 8,\n",
+ " 'learning_rate': 0.1,\n",
+ " }\n",
+ "\n",
+ "clf = lgb.train(params=params,\n",
+ " train_set=train,\n",
+ " valid_sets=[train, valid],\n",
+ " valid_names=['train','valid'],\n",
+ " feval=F1_Score_Binary(average='micro'),\n",
+ " early_stopping_rounds=100)\n",
+ "\n",
+ "roc_auc_score(y_valid, clf.predict(X_valid))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Usage in XGBoost"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import xgboost as xgb\n",
+ "\n",
+ "dtrain = xgb.DMatrix(X_train, y_train)\n",
+ "dvalid = xgb.DMatrix(X_valid, y_valid)\n",
+ "\n",
+ "params = {\n",
+ " 'seed': 41114,\n",
+ " 'objective':'binary:logistic',\n",
+ " 'learning_rate': 0.1,\n",
+ " 'disable_default_eval_metric': 1\n",
+ " }\n",
+ "\n",
+ "bst = xgb.train(params,\n",
+ " dtrain=dtrain,\n",
+ " num_boost_round=300,\n",
+ " early_stopping_rounds=10,\n",
+ " verbose_eval=10,\n",
+ " maximize=True,\n",
+ " feval=F1_Score_Binary(average='micro', XGBoost=True),\n",
+ " evals=[(dtrain, 'dtrain'), (dvalid, 'dvalid')])\n",
+ "\n",
+ "roc_auc_score(y_valid, clip_sigmoid(bst.predict(dvalid)))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python [conda env:skorecard_py37]",
+ "language": "python",
+ "name": "conda-env-skorecard_py37-py"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.7"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/docs/tutorials/focal_loss.ipynb b/docs/tutorials/focal_loss.ipynb
index 06a50a1..934b39a 100644
--- a/docs/tutorials/focal_loss.ipynb
+++ b/docs/tutorials/focal_loss.ipynb
@@ -8,8 +8,10 @@
"source": [
"from sklearn.datasets import make_classification\n",
"from sklearn.model_selection import train_test_split\n",
+ "from sklearn.metrics import roc_auc_score\n",
"from bokbokbok.loss_functions.classification import FocalLoss\n",
"from bokbokbok.eval_metrics.classification import FocalMetric\n",
+ "from bokbokbok.utils import clip_sigmoid\n",
"\n",
"X, y = make_classification(n_samples=1000, \n",
" n_features=10, \n",
@@ -54,7 +56,9 @@
" valid_names=['train','valid'],\n",
" fobj=FocalLoss(alpha=alpha, gamma=gamma),\n",
" feval=FocalMetric(alpha=alpha, gamma=gamma),\n",
- " early_stopping_rounds=100)"
+ " early_stopping_rounds=100)\n",
+ "\n",
+ "roc_auc_score(y_valid, clip_sigmoid(clf.predict(X_valid)))"
]
},
{
@@ -89,7 +93,9 @@
" obj=FocalLoss(alpha=alpha, gamma=gamma),\n",
" maximize=False,\n",
" feval=FocalMetric(alpha=alpha, gamma=gamma, XGBoost=True),\n",
- " evals=[(dtrain, 'dtrain'), (dvalid, 'dvalid')])"
+ " evals=[(dtrain, 'dtrain'), (dvalid, 'dvalid')])\n",
+ "\n",
+ "roc_auc_score(y_valid, clip_sigmoid(bst.predict(dvalid)))"
]
}
],
diff --git a/docs/tutorials/weighted_cross_entropy.ipynb b/docs/tutorials/weighted_cross_entropy.ipynb
index 249fc35..aa66fe6 100644
--- a/docs/tutorials/weighted_cross_entropy.ipynb
+++ b/docs/tutorials/weighted_cross_entropy.ipynb
@@ -8,8 +8,10 @@
"source": [
"from sklearn.datasets import make_classification\n",
"from sklearn.model_selection import train_test_split\n",
+ "from sklearn.metrics import roc_auc_score\n",
"from bokbokbok.loss_functions.classification import WeightedCrossEntropyLoss\n",
"from bokbokbok.eval_metrics.classification import WeightedCrossEntropyMetric\n",
+ "from bokbokbok.utils import clip_sigmoid\n",
"\n",
"X, y = make_classification(n_samples=1000, \n",
" n_features=10, \n",
@@ -53,7 +55,9 @@
" valid_names=['train','valid'],\n",
" fobj=WeightedCrossEntropyLoss(alpha=alpha),\n",
" feval=WeightedCrossEntropyMetric(alpha=alpha),\n",
- " early_stopping_rounds=100)"
+ " early_stopping_rounds=100)\n",
+ "\n",
+ "roc_auc_score(y_valid, clip_sigmoid(clf.predict(X_valid)))"
]
},
{
@@ -88,7 +92,9 @@
" obj=WeightedCrossEntropyLoss(alpha=alpha),\n",
" maximize=False,\n",
" feval=WeightedCrossEntropyMetric(alpha=alpha, XGBoost=True),\n",
- " evals=[(dtrain, 'dtrain'), (dvalid, 'dvalid')])"
+ " evals=[(dtrain, 'dtrain'), (dvalid, 'dvalid')])\n",
+ "\n",
+ "roc_auc_score(y_valid, clip_sigmoid(bst.predict(dvalid)))"
]
}
],
diff --git a/mkdocs.yml b/mkdocs.yml
index dfb98ee..ef6baec 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -12,6 +12,7 @@ nav:
- Tutorials:
- Weighted Cross Entropy: tutorials/weighted_cross_entropy.ipynb
- Focal Loss: tutorials/focal_loss.ipynb
+ - F1 Score: tutorials/F1_score.ipynb
- Derivations:
- General Remarks: derivations/note.md
- Weighted Cross Entropy: derivations/wce.md
diff --git a/setup.py b/setup.py
index 4c53bc8..27b219a 100644
--- a/setup.py
+++ b/setup.py
@@ -1,7 +1,7 @@
from setuptools import setup, find_packages
-# with open("README.md", "r", encoding="UTF-8") as fh:
-# long_description = fh.read()
+with open("README.md", "r", encoding="UTF-8") as fh:
+ long_description = fh.read()
base_packages = [
"numpy>=1.19.2",
@@ -33,9 +33,9 @@
setup(
name="bokbokbok",
- version="0.1",
+ version="0.2",
description="Custom Losses and Metrics for XGBoost, LightGBM, CatBoost",
- #long_description=long_description,
+ long_description=long_description,
long_description_content_type="text/markdown",
author="Daniel Timbrell",
author_email="dantimbrell@gmail.com",