Skip to content

Commit

Permalink
Merge pull request #56 from orchardbirds/add_binary_f1_eval
Browse files Browse the repository at this point in the history
Add binary f1 eval
  • Loading branch information
orchardbirds committed Mar 17, 2021
2 parents 472df0f + 46978fd commit d4c2a7a
Show file tree
Hide file tree
Showing 13 changed files with 221 additions and 14 deletions.
5 changes: 5 additions & 0 deletions README.md
@@ -1,5 +1,9 @@
<img src="https://github.com/orchardbirds/bokbokbok/raw/main/docs/img/bokbokbok.png" width="120" align="right">

[![PyPi Version](https://img.shields.io/pypi/pyversions/bokbokbok)](#)
[![PyPI](https://img.shields.io/pypi/v/bokbokbok)](#)
[![PyPI - Downloads](https://img.shields.io/pypi/dm/bokbokbok)](#)


# bokbokbok

Expand All @@ -12,6 +16,7 @@ Main features:
- Focal Loss
- Squared Log Error
- Log Cosh Loss
- F1 score

## Installation

Expand Down
6 changes: 4 additions & 2 deletions bokbokbok/eval_metrics/classification/__init__.py
@@ -1,12 +1,14 @@
"""Import required metrics."""


from .classification_eval_metrics import(
from .classification_eval_metrics import (
WeightedCrossEntropyMetric,
FocalMetric,
F1_Score_Binary,
)

__all__ = [
"WeightedCrossEntropyMetric",
"FocalMetric"
"FocalMetric",
"F1_Score_Binary"
]
@@ -1,4 +1,5 @@
import numpy as np
from sklearn.metrics import f1_score
from bokbokbok.utils import clip_sigmoid


Expand Down Expand Up @@ -69,11 +70,44 @@ def focal_metric(yhat, dtrain, alpha=alpha, gamma=gamma, XGBoost=XGBoost):
yhat = clip_sigmoid(yhat)

elements = (- alpha * y * np.log(yhat) * np.power(1 - yhat, gamma) -
(1 - y) * np.log(1 - yhat) * np.power(yhat, gamma))
(1 - y) * np.log(1 - yhat) * np.power(yhat, gamma))

if XGBoost:
return f'Focal_alpha{alpha}_gamma{gamma}', (np.sum(elements) / len(y))
else:
return f'Focal_alpha{alpha}_gamma{gamma}', (np.sum(elements)/ len(y)), False
return f'Focal_alpha{alpha}_gamma{gamma}', (np.sum(elements) / len(y)), False

return focal_metric


def F1_Score_Binary(XGBoost=False, *args, **kwargs):
"""
Implements the f1_score metric from scikit learn:
https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html#sklearn-metrics-f1-score
Args:
*args: The arguments to be fed into the scikit learn metric.
XGBoost (Bool): Set to True if using XGBoost. We assume LightGBM as default use.
Note that you should also set `maximize=True` in the XGBoost train function
"""
def binary_f1_score(yhat, data, XGBoost=XGBoost):
"""
F1 Score.
Args:
yhat: Predictions
dtrain: The XGBoost / LightGBM dataset
XGBoost (Bool): If XGBoost is to be implemented
Returns:
Name of the eval metric, Eval score, Bool to maximise function
"""
y_true = data.get_label()
yhat = np.round(yhat)
if XGBoost:
return 'F1', f1_score(y_true, yhat, *args, **kwargs)
else:
return 'F1', f1_score(y_true, yhat, *args, **kwargs), True

return binary_f1_score
14 changes: 14 additions & 0 deletions bokbokbok/eval_metrics/regression/__init__.py
@@ -0,0 +1,14 @@
"""Import required metrics."""


from .regression_eval_metrics import (
SquaredLogErrorMetric,
RootMeanSquaredLogErrorMetric,
LogCoshMetric,
)

__all__ = [
"SquaredLogErrorMetric",
"RootMeanSquaredLogErrorMetric",
"LogCoshMetric"
]
Expand Up @@ -23,7 +23,7 @@ def _gradient(yhat, dtrain, alpha):

yhat = clip_sigmoid(yhat)

grad = y * yhat * (alpha - 1) + yhat - alpha * y
grad = (y * yhat * (alpha - 1)) + yhat - (alpha * y)

return grad

Expand Down
12 changes: 12 additions & 0 deletions bokbokbok/loss_functions/regression/__init__.py
@@ -0,0 +1,12 @@
"""Import required losses."""


from .regression_loss_functions import (
SquaredLogErrorLoss,
LogCoshLoss,
)

__all__ = [
"SquaredLogErrorLoss",
"LogCoshLoss"
]
Expand Up @@ -62,6 +62,7 @@ def squared_log_loss(

def LogCoshLoss():
"""
An alternative to Mean Absolute Error.
"""

def _gradient(yhat, dtrain):
Expand Down
2 changes: 1 addition & 1 deletion docs/derivations/note.md
Expand Up @@ -17,6 +17,6 @@ The Hessian is similarly calculated:

<img src="https://latex.codecogs.com/svg.latex?\hat{y}&space;=&space;\sigma(z)&space;=&space;\frac{1}{1&space;&plus;&space;e^{-z}}" title="\hat{y} = \sigma(z) = \frac{1}{1 + e^{-z}}" />

We will make use of the following property for the calculations of the Losses and Hessians:
We will make use of the following property for the calculations of the Gradients and Hessians:

<img src="https://latex.codecogs.com/svg.latex?\frac{\partial&space;\hat{y}}{\partial&space;z}&space;=&space;\hat{y}&space;\cdot&space;(1&space;-&space;\hat{y})" title="\frac{\partial \hat{y}}{\partial z} = \hat{y} \cdot (1 - \hat{y})" />
126 changes: 126 additions & 0 deletions docs/tutorials/F1_score.ipynb
@@ -0,0 +1,126 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.datasets import make_classification\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import roc_auc_score\n",
"from bokbokbok.eval_metrics.classification import F1_Score_Binary\n",
"from bokbokbok.utils import clip_sigmoid\n",
"\n",
"X, y = make_classification(n_samples=1000, \n",
" n_features=10, \n",
" random_state=41114)\n",
"\n",
"X_train, X_valid, y_train, y_valid = train_test_split(X, \n",
" y, \n",
" test_size=0.25, \n",
" random_state=41114)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Usage in LightGBM"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import lightgbm as lgb\n",
"\n",
"train = lgb.Dataset(X_train, y_train)\n",
"valid = lgb.Dataset(X_valid, y_valid, reference=train)\n",
"params = {\n",
" 'n_estimators': 300,\n",
" 'objective': 'binary',\n",
" 'seed': 41114,\n",
" 'n_jobs': 8,\n",
" 'learning_rate': 0.1,\n",
" }\n",
"\n",
"clf = lgb.train(params=params,\n",
" train_set=train,\n",
" valid_sets=[train, valid],\n",
" valid_names=['train','valid'],\n",
" feval=F1_Score_Binary(average='micro'),\n",
" early_stopping_rounds=100)\n",
"\n",
"roc_auc_score(y_valid, clf.predict(X_valid))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Usage in XGBoost"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import xgboost as xgb\n",
"\n",
"dtrain = xgb.DMatrix(X_train, y_train)\n",
"dvalid = xgb.DMatrix(X_valid, y_valid)\n",
"\n",
"params = {\n",
" 'seed': 41114,\n",
" 'objective':'binary:logistic',\n",
" 'learning_rate': 0.1,\n",
" 'disable_default_eval_metric': 1\n",
" }\n",
"\n",
"bst = xgb.train(params,\n",
" dtrain=dtrain,\n",
" num_boost_round=300,\n",
" early_stopping_rounds=10,\n",
" verbose_eval=10,\n",
" maximize=True,\n",
" feval=F1_Score_Binary(average='micro', XGBoost=True),\n",
" evals=[(dtrain, 'dtrain'), (dvalid, 'dvalid')])\n",
"\n",
"roc_auc_score(y_valid, clip_sigmoid(bst.predict(dvalid)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:skorecard_py37]",
"language": "python",
"name": "conda-env-skorecard_py37-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
10 changes: 8 additions & 2 deletions docs/tutorials/focal_loss.ipynb
Expand Up @@ -8,8 +8,10 @@
"source": [
"from sklearn.datasets import make_classification\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import roc_auc_score\n",
"from bokbokbok.loss_functions.classification import FocalLoss\n",
"from bokbokbok.eval_metrics.classification import FocalMetric\n",
"from bokbokbok.utils import clip_sigmoid\n",
"\n",
"X, y = make_classification(n_samples=1000, \n",
" n_features=10, \n",
Expand Down Expand Up @@ -54,7 +56,9 @@
" valid_names=['train','valid'],\n",
" fobj=FocalLoss(alpha=alpha, gamma=gamma),\n",
" feval=FocalMetric(alpha=alpha, gamma=gamma),\n",
" early_stopping_rounds=100)"
" early_stopping_rounds=100)\n",
"\n",
"roc_auc_score(y_valid, clip_sigmoid(clf.predict(X_valid)))"
]
},
{
Expand Down Expand Up @@ -89,7 +93,9 @@
" obj=FocalLoss(alpha=alpha, gamma=gamma),\n",
" maximize=False,\n",
" feval=FocalMetric(alpha=alpha, gamma=gamma, XGBoost=True),\n",
" evals=[(dtrain, 'dtrain'), (dvalid, 'dvalid')])"
" evals=[(dtrain, 'dtrain'), (dvalid, 'dvalid')])\n",
"\n",
"roc_auc_score(y_valid, clip_sigmoid(bst.predict(dvalid)))"
]
}
],
Expand Down
10 changes: 8 additions & 2 deletions docs/tutorials/weighted_cross_entropy.ipynb
Expand Up @@ -8,8 +8,10 @@
"source": [
"from sklearn.datasets import make_classification\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import roc_auc_score\n",
"from bokbokbok.loss_functions.classification import WeightedCrossEntropyLoss\n",
"from bokbokbok.eval_metrics.classification import WeightedCrossEntropyMetric\n",
"from bokbokbok.utils import clip_sigmoid\n",
"\n",
"X, y = make_classification(n_samples=1000, \n",
" n_features=10, \n",
Expand Down Expand Up @@ -53,7 +55,9 @@
" valid_names=['train','valid'],\n",
" fobj=WeightedCrossEntropyLoss(alpha=alpha),\n",
" feval=WeightedCrossEntropyMetric(alpha=alpha),\n",
" early_stopping_rounds=100)"
" early_stopping_rounds=100)\n",
"\n",
"roc_auc_score(y_valid, clip_sigmoid(clf.predict(X_valid)))"
]
},
{
Expand Down Expand Up @@ -88,7 +92,9 @@
" obj=WeightedCrossEntropyLoss(alpha=alpha),\n",
" maximize=False,\n",
" feval=WeightedCrossEntropyMetric(alpha=alpha, XGBoost=True),\n",
" evals=[(dtrain, 'dtrain'), (dvalid, 'dvalid')])"
" evals=[(dtrain, 'dtrain'), (dvalid, 'dvalid')])\n",
"\n",
"roc_auc_score(y_valid, clip_sigmoid(bst.predict(dvalid)))"
]
}
],
Expand Down
1 change: 1 addition & 0 deletions mkdocs.yml
Expand Up @@ -12,6 +12,7 @@ nav:
- Tutorials:
- Weighted Cross Entropy: tutorials/weighted_cross_entropy.ipynb
- Focal Loss: tutorials/focal_loss.ipynb
- F1 Score: tutorials/F1_score.ipynb
- Derivations:
- General Remarks: derivations/note.md
- Weighted Cross Entropy: derivations/wce.md
Expand Down
8 changes: 4 additions & 4 deletions setup.py
@@ -1,7 +1,7 @@
from setuptools import setup, find_packages

# with open("README.md", "r", encoding="UTF-8") as fh:
# long_description = fh.read()
with open("README.md", "r", encoding="UTF-8") as fh:
long_description = fh.read()

base_packages = [
"numpy>=1.19.2",
Expand Down Expand Up @@ -33,9 +33,9 @@

setup(
name="bokbokbok",
version="0.1",
version="0.2",
description="Custom Losses and Metrics for XGBoost, LightGBM, CatBoost",
#long_description=long_description,
long_description=long_description,
long_description_content_type="text/markdown",
author="Daniel Timbrell",
author_email="dantimbrell@gmail.com",
Expand Down

0 comments on commit d4c2a7a

Please sign in to comment.