From 31cabd6484eff8bb3398b2e566123e01ae29367f Mon Sep 17 00:00:00 2001
From: Dan <daniel.timbrell@ing.com>
Date: Wed, 17 Mar 2021 16:11:34 +0100
Subject: [PATCH 01/10] add long description, prepare bump

---
 README.md | 4 ++++
 1 file changed, 4 insertions(+)
diff --git a/README.md b/README.md
index dc3ddff..a1b97fe 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,9 @@
 <img src="https://github.com/orchardbirds/bokbokbok/raw/main/docs/img/bokbokbok.png" width="120" align="right">
 
+[![PyPi Version](https://img.shields.io/pypi/pyversions/bokbokbok)](#)
+[![PyPI](https://img.shields.io/pypi/v/bokbokbok)](#)
+[![PyPI - Downloads](https://img.shields.io/pypi/dm/bokbokbok)](#)
+
 
 # bokbokbok
 

From aceab94021e285c4ab2681f69360e196bbbd98f1 Mon Sep 17 00:00:00 2001
From: Dan <daniel.timbrell@ing.com>
Date: Wed, 17 Mar 2021 16:11:48 +0100
Subject: [PATCH 02/10] correct Gradint

---
 docs/derivations/note.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/derivations/note.md b/docs/derivations/note.md
index 191a45f..f4c3ce5 100644
--- a/docs/derivations/note.md
+++ b/docs/derivations/note.md
@@ -17,6 +17,6 @@ The Hessian is similarly calculated:
 
 <img src="https://latex.codecogs.com/svg.latex?\hat{y}&space;=&space;\sigma(z)&space;=&space;\frac{1}{1&space;&plus;&space;e^{-z}}" title="\hat{y} = \sigma(z) = \frac{1}{1 + e^{-z}}" />
 
-We will make use of the following property for the calculations of the Losses and Hessians:
+We will make use of the following property for the calculations of the Gradients and Hessians:
 
 <img src="https://latex.codecogs.com/svg.latex?\frac{\partial&space;\hat{y}}{\partial&space;z}&space;=&space;\hat{y}&space;\cdot&space;(1&space;-&space;\hat{y})" title="\frac{\partial \hat{y}}{\partial z} = \hat{y} \cdot (1 - \hat{y})" />
\ No newline at end of file

From b3acd6fb17f0346dfe70cef157156cc9bf280372 Mon Sep 17 00:00:00 2001
From: Dan <daniel.timbrell@ing.com>
Date: Wed, 17 Mar 2021 16:12:23 +0100
Subject: [PATCH 03/10] add working init

---
 bokbokbok/eval_metrics/regression/__init__.py   | 14 ++++++++++++++
 bokbokbok/loss_functions/regression/__init__.py | 12 ++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/bokbokbok/eval_metrics/regression/__init__.py b/bokbokbok/eval_metrics/regression/__init__.py
index e69de29..ead2072 100644
--- a/bokbokbok/eval_metrics/regression/__init__.py
+++ b/bokbokbok/eval_metrics/regression/__init__.py
@@ -0,0 +1,14 @@
+"""Import required metrics."""
+
+
+from .regression_eval_metrics import (
+    SquaredLogErrorMetric,
+    RootMeanSquaredLogErrorMetric,
+    LogCoshMetric,
+)
+
+__all__ = [
+    "SquaredLogErrorMetric",
+    "RootMeanSquaredLogErrorMetric",
+    "LogCoshMetric"
+]
diff --git a/bokbokbok/loss_functions/regression/__init__.py b/bokbokbok/loss_functions/regression/__init__.py
index e69de29..0034830 100644
--- a/bokbokbok/loss_functions/regression/__init__.py
+++ b/bokbokbok/loss_functions/regression/__init__.py
@@ -0,0 +1,12 @@
+"""Import required losses."""
+
+
+from .regression_loss_functions import (
+    SquaredLogErrorLoss,
+    LogCoshLoss,
+)
+
+__all__ = [
+    "SquaredLogErrorLoss",
+    "LogCoshLoss"
+]
\ No newline at end of file

From 8080f77b7228b4613f23bd1309bc4263e37cd2e9 Mon Sep 17 00:00:00 2001
From: Dan <daniel.timbrell@ing.com>
Date: Wed, 17 Mar 2021 16:12:43 +0100
Subject: [PATCH 04/10] add docstring

---
 bokbokbok/loss_functions/regression/regression_loss_functions.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/bokbokbok/loss_functions/regression/regression_loss_functions.py b/bokbokbok/loss_functions/regression/regression_loss_functions.py
index 803ebf7..886e388 100644
--- a/bokbokbok/loss_functions/regression/regression_loss_functions.py
+++ b/bokbokbok/loss_functions/regression/regression_loss_functions.py
@@ -62,6 +62,7 @@ def squared_log_loss(
 
 def LogCoshLoss():
     """
+    An alternative to Mean Absolute Error.
     """
 
     def _gradient(yhat, dtrain):

From 363c9799f9f63c5abb29efbceeaeefacb232d82c Mon Sep 17 00:00:00 2001
From: Dan <daniel.timbrell@ing.com>
Date: Wed, 17 Mar 2021 18:43:49 +0100
Subject: [PATCH 05/10] add f1score

---
 .../eval_metrics/classification/__init__.py   |  6 ++-
 .../classification_eval_metrics.py            | 38 ++++++++++++++++++-
 2 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/bokbokbok/eval_metrics/classification/__init__.py b/bokbokbok/eval_metrics/classification/__init__.py
index 7752d34..f77d8b3 100644
--- a/bokbokbok/eval_metrics/classification/__init__.py
+++ b/bokbokbok/eval_metrics/classification/__init__.py
@@ -1,12 +1,14 @@
 """Import required metrics."""
 
 
-from .classification_eval_metrics import(
+from .classification_eval_metrics import (
     WeightedCrossEntropyMetric,
     FocalMetric,
+    F1_Score_Binary,
 )
 
 __all__ = [
     "WeightedCrossEntropyMetric",
-    "FocalMetric"
+    "FocalMetric",
+    "F1_Score_Binary"
 ]
\ No newline at end of file
diff --git a/bokbokbok/eval_metrics/classification/classification_eval_metrics.py b/bokbokbok/eval_metrics/classification/classification_eval_metrics.py
index 415fdcd..2527790 100644
--- a/bokbokbok/eval_metrics/classification/classification_eval_metrics.py
+++ b/bokbokbok/eval_metrics/classification/classification_eval_metrics.py
@@ -1,4 +1,5 @@
 import numpy as np
+from sklearn.metrics import f1_score
 from bokbokbok.utils import clip_sigmoid
 
 
@@ -69,11 +70,44 @@ def focal_metric(yhat, dtrain, alpha=alpha, gamma=gamma, XGBoost=XGBoost):
         yhat = clip_sigmoid(yhat)
 
         elements = (- alpha * y * np.log(yhat) * np.power(1 - yhat, gamma) -
-                    (1 - y) * np.log(1 - yhat) *  np.power(yhat, gamma))
+                    (1 - y) * np.log(1 - yhat) * np.power(yhat, gamma))
 
         if XGBoost:
             return f'Focal_alpha{alpha}_gamma{gamma}', (np.sum(elements) / len(y))
         else:
-            return f'Focal_alpha{alpha}_gamma{gamma}', (np.sum(elements)/ len(y)), False
+            return f'Focal_alpha{alpha}_gamma{gamma}', (np.sum(elements) / len(y)), False
 
     return focal_metric
+
+
+def F1_Score_Binary(XGBoost=False, *args, **kwargs):
+    """
+    Implements the f1_score metric from scikit learn:
+    https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html#sklearn-metrics-f1-score
+
+    Args:
+        *args: The arguments to be fed into the scikit learn metric.
+        XGBoost (Bool): Set to True if using XGBoost. We assume LightGBM as default use.
+                        Note that you should also set `maximize=True` in the XGBoost train function
+
+    """
+    def binary_f1_score(yhat, data, XGBoost=XGBoost):
+        """
+        F1 Score.
+
+        Args:
+            yhat: Predictions
+            dtrain: The XGBoost / LightGBM dataset
+            XGBoost (Bool): If XGBoost is to be implemented
+
+        Returns:
+            Name of the eval metric, Eval score, Bool to maximise function
+        """
+        y_true = data.get_label()
+        yhat = np.round(yhat)
+        if XGBoost:
+            return 'F1', f1_score(y_true, yhat, *args, **kwargs)
+        else:
+            return 'F1', f1_score(y_true, yhat, *args, **kwargs), True
+
+    return binary_f1_score

From 3cccb5885f58c08c7ceff3cadebde59a1b8956ed Mon Sep 17 00:00:00 2001
From: Dan <daniel.timbrell@ing.com>
Date: Wed, 17 Mar 2021 18:44:13 +0100
Subject: [PATCH 06/10] clean

---
 .../classification/classification_loss_functions.py             | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bokbokbok/loss_functions/classification/classification_loss_functions.py b/bokbokbok/loss_functions/classification/classification_loss_functions.py
index 8d2e467..fa6d90f 100644
--- a/bokbokbok/loss_functions/classification/classification_loss_functions.py
+++ b/bokbokbok/loss_functions/classification/classification_loss_functions.py
@@ -23,7 +23,7 @@ def _gradient(yhat, dtrain, alpha):
 
         yhat = clip_sigmoid(yhat)
 
-        grad = y * yhat * (alpha - 1) + yhat - alpha * y
+        grad = (y * yhat * (alpha - 1)) + yhat - (alpha * y)
 
         return grad
 

From a4f7b0179f60fe5af0a2ecc3da9969c785044ca8 Mon Sep 17 00:00:00 2001
From: Dan <daniel.timbrell@ing.com>
Date: Wed, 17 Mar 2021 18:44:57 +0100
Subject: [PATCH 07/10] add sigmoid score

---
 docs/tutorials/focal_loss.ipynb             | 10 ++++++++--
 docs/tutorials/weighted_cross_entropy.ipynb | 10 ++++++++--
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/docs/tutorials/focal_loss.ipynb b/docs/tutorials/focal_loss.ipynb
index 06a50a1..934b39a 100644
--- a/docs/tutorials/focal_loss.ipynb
+++ b/docs/tutorials/focal_loss.ipynb
@@ -8,8 +8,10 @@
    "source": [
     "from sklearn.datasets import make_classification\n",
     "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.metrics import roc_auc_score\n",
     "from bokbokbok.loss_functions.classification import FocalLoss\n",
     "from bokbokbok.eval_metrics.classification import FocalMetric\n",
+    "from bokbokbok.utils import clip_sigmoid\n",
     "\n",
     "X, y = make_classification(n_samples=1000, \n",
     "                           n_features=10, \n",
@@ -54,7 +56,9 @@
     "                valid_names=['train','valid'],\n",
     "                fobj=FocalLoss(alpha=alpha, gamma=gamma),\n",
     "                feval=FocalMetric(alpha=alpha, gamma=gamma),\n",
-    "                early_stopping_rounds=100)"
+    "                early_stopping_rounds=100)\n",
+    "\n",
+    "roc_auc_score(y_valid, clip_sigmoid(clf.predict(X_valid)))"
    ]
   },
   {
@@ -89,7 +93,9 @@
     "          obj=FocalLoss(alpha=alpha, gamma=gamma),\n",
     "          maximize=False,\n",
     "          feval=FocalMetric(alpha=alpha, gamma=gamma, XGBoost=True),\n",
-    "          evals=[(dtrain, 'dtrain'), (dvalid, 'dvalid')])"
+    "          evals=[(dtrain, 'dtrain'), (dvalid, 'dvalid')])\n",
+    "\n",
+    "roc_auc_score(y_valid, clip_sigmoid(bst.predict(dvalid)))"
    ]
   }
  ],
diff --git a/docs/tutorials/weighted_cross_entropy.ipynb b/docs/tutorials/weighted_cross_entropy.ipynb
index 249fc35..aa66fe6 100644
--- a/docs/tutorials/weighted_cross_entropy.ipynb
+++ b/docs/tutorials/weighted_cross_entropy.ipynb
@@ -8,8 +8,10 @@
    "source": [
     "from sklearn.datasets import make_classification\n",
     "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.metrics import roc_auc_score\n",
     "from bokbokbok.loss_functions.classification import WeightedCrossEntropyLoss\n",
     "from bokbokbok.eval_metrics.classification import WeightedCrossEntropyMetric\n",
+    "from bokbokbok.utils import clip_sigmoid\n",
     "\n",
     "X, y = make_classification(n_samples=1000, \n",
     "                           n_features=10, \n",
@@ -53,7 +55,9 @@
     "                valid_names=['train','valid'],\n",
     "                fobj=WeightedCrossEntropyLoss(alpha=alpha),\n",
     "                feval=WeightedCrossEntropyMetric(alpha=alpha),\n",
-    "                early_stopping_rounds=100)"
+    "                early_stopping_rounds=100)\n",
+    "\n",
+    "roc_auc_score(y_valid, clip_sigmoid(clf.predict(X_valid)))"
    ]
   },
   {
@@ -88,7 +92,9 @@
     "          obj=WeightedCrossEntropyLoss(alpha=alpha),\n",
     "          maximize=False,\n",
     "          feval=WeightedCrossEntropyMetric(alpha=alpha, XGBoost=True),\n",
-    "          evals=[(dtrain, 'dtrain'), (dvalid, 'dvalid')])"
+    "          evals=[(dtrain, 'dtrain'), (dvalid, 'dvalid')])\n",
+    "\n",
+    "roc_auc_score(y_valid, clip_sigmoid(bst.predict(dvalid)))"
    ]
   }
  ],

From ba31867e50147c827e33b35b5ea916d4a8bee08f Mon Sep 17 00:00:00 2001
From: Dan <daniel.timbrell@ing.com>
Date: Wed, 17 Mar 2021 18:45:28 +0100
Subject: [PATCH 08/10] add f1 score tutorial

---
 docs/tutorials/F1_score.ipynb | 126 ++++++++++++++++++++++++++++++++++
 1 file changed, 126 insertions(+)
 create mode 100644 docs/tutorials/F1_score.ipynb

diff --git a/docs/tutorials/F1_score.ipynb b/docs/tutorials/F1_score.ipynb
new file mode 100644
index 0000000..ef415a7
--- /dev/null
+++ b/docs/tutorials/F1_score.ipynb
@@ -0,0 +1,126 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.datasets import make_classification\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.metrics import roc_auc_score\n",
+    "from bokbokbok.eval_metrics.classification import F1_Score_Binary\n",
+    "from bokbokbok.utils import clip_sigmoid\n",
+    "\n",
+    "X, y = make_classification(n_samples=1000, \n",
+    "                           n_features=10, \n",
+    "                           random_state=41114)\n",
+    "\n",
+    "X_train, X_valid, y_train, y_valid = train_test_split(X, \n",
+    "                                                      y, \n",
+    "                                                      test_size=0.25, \n",
+    "                                                      random_state=41114)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Usage in LightGBM"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import lightgbm as lgb\n",
+    "\n",
+    "train = lgb.Dataset(X_train, y_train)\n",
+    "valid = lgb.Dataset(X_valid, y_valid, reference=train)\n",
+    "params = {\n",
+    "     'n_estimators': 300,\n",
+    "     'objective': 'binary',\n",
+    "     'seed': 41114,\n",
+    "     'n_jobs': 8,\n",
+    "     'learning_rate': 0.1,\n",
+    "   }\n",
+    "\n",
+    "clf = lgb.train(params=params,\n",
+    "                train_set=train,\n",
+    "                valid_sets=[train, valid],\n",
+    "                valid_names=['train','valid'],\n",
+    "                feval=F1_Score_Binary(average='micro'),\n",
+    "                early_stopping_rounds=100)\n",
+    "\n",
+    "roc_auc_score(y_valid, clf.predict(X_valid))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Usage in XGBoost"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import xgboost as xgb\n",
+    "\n",
+    "dtrain = xgb.DMatrix(X_train, y_train)\n",
+    "dvalid = xgb.DMatrix(X_valid, y_valid)\n",
+    "\n",
+    "params = {\n",
+    "     'seed': 41114,\n",
+    "     'objective':'binary:logistic',\n",
+    "     'learning_rate': 0.1,\n",
+    "    'disable_default_eval_metric': 1\n",
+    "   }\n",
+    "\n",
+    "bst = xgb.train(params,\n",
+    "          dtrain=dtrain,\n",
+    "          num_boost_round=300,\n",
+    "          early_stopping_rounds=10,\n",
+    "          verbose_eval=10,\n",
+    "          maximize=True,\n",
+    "          feval=F1_Score_Binary(average='micro', XGBoost=True),\n",
+    "          evals=[(dtrain, 'dtrain'), (dvalid, 'dvalid')])\n",
+    "\n",
+    "roc_auc_score(y_valid, clip_sigmoid(bst.predict(dvalid)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python [conda env:skorecard_py37]",
+   "language": "python",
+   "name": "conda-env-skorecard_py37-py"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From a53feb8119d7ab94bcb9a3b0b66fc8416257ae2f Mon Sep 17 00:00:00 2001
From: Dan <daniel.timbrell@ing.com>
Date: Wed, 17 Mar 2021 18:46:26 +0100
Subject: [PATCH 09/10] add f1 score

---
 README.md  | 1 +
 mkdocs.yml | 1 +
 2 files changed, 2 insertions(+)

diff --git a/README.md b/README.md
index a1b97fe..37e67ea 100644
--- a/README.md
+++ b/README.md
@@ -16,6 +16,7 @@ Main features:
 - Focal Loss
 - Squared Log Error
 - Log Cosh Loss
+- F1 score
 
 ## Installation
 
diff --git a/mkdocs.yml b/mkdocs.yml
index dfb98ee..ef6baec 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -12,6 +12,7 @@ nav:
   - Tutorials:
       - Weighted Cross Entropy: tutorials/weighted_cross_entropy.ipynb
       - Focal Loss: tutorials/focal_loss.ipynb
+      - F1 Score: tutorials/F1_score.ipynb
   - Derivations:
       - General Remarks: derivations/note.md
       - Weighted Cross Entropy: derivations/wce.md

From 46978fd54c011275757540fa24634804ec761d61 Mon Sep 17 00:00:00 2001
From: Dan <daniel.timbrell@ing.com>
Date: Wed, 17 Mar 2021 18:46:37 +0100
Subject: [PATCH 10/10] bump and long description

---
 setup.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/setup.py b/setup.py
index 4c53bc8..27b219a 100644
--- a/setup.py
+++ b/setup.py
@@ -1,7 +1,7 @@
 from setuptools import setup, find_packages
 
-# with open("README.md", "r", encoding="UTF-8") as fh:
-#     long_description = fh.read()
+with open("README.md", "r", encoding="UTF-8") as fh:
+    long_description = fh.read()
 
 base_packages = [
     "numpy>=1.19.2",
@@ -33,9 +33,9 @@
 
 setup(
     name="bokbokbok",
-    version="0.1",
+    version="0.2",
     description="Custom Losses and Metrics for XGBoost, LightGBM, CatBoost",
-    #long_description=long_description,
+    long_description=long_description,
     long_description_content_type="text/markdown",
     author="Daniel Timbrell",
     author_email="dantimbrell@gmail.com",