scikit-learn-contrib · thibaultcordier · Sep 22, 2023 · Sep 22, 2023 · Sep 22, 2023 · Sep 22, 2023
diff --git a/.coveragerc b/.coveragerc
@@ -2,4 +2,4 @@
 omit = mapie/_compatibility.py
 
 [report]
-omit = mapie/_compatibility.py
+omit = mapie/_compatibility.py
diff --git a/.flake8 b/.flake8
@@ -0,0 +1,7 @@
+[flake8]
+exclude = .git, .github, __pycache__ , .vscode, build
+max-line-length = 99
+ignore = E302,E305,W503,E203,E731,E402,E266,E712,F401,F821
+indent-size = 4
+per-file-ignores =
+    */__init__.py:F401
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
@@ -1 +1 @@
-blank_issues_enabled: false
+blank_issues_enabled: false
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -28,4 +28,4 @@ Please describe the tests that you ran to verify your changes. Provide instructi
 - [ ] Typing passes successfully : `make type-check`
 - [ ] Unit tests pass successfully : `make tests`
 - [ ] Coverage is 100% : `make coverage`
-- [ ] Documentation builds successfully : `make doc`
+- [ ] Documentation builds successfully : `make doc`
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -1,6 +1,12 @@
 name: Unit tests
 
-on: [push, pull_request]
+on:
+  push:
+    branches:
+      -dev
+      -main
+      -master
+  pull_request:
 
 jobs:
   build:
@@ -23,6 +29,9 @@ jobs:
           - os: windows-latest
             python-version: "3.10"
             numpy-version: 1.22.3
+          - os: macos-latest
+            python-version: "3.10"
+            numpy-version: 1.22.3
     defaults:
       run:
         shell: bash -l {0}

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,25 @@
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v2.3.0
+    hooks:
+      - id: check-yaml
+        exclude: (docs/)
+      - id: end-of-file-fixer
+        exclude: (docs/)
+      - id: trailing-whitespace
+        exclude: (docs/)
+  - repo: https://github.com/psf/black
+    rev: 22.8.0
+    hooks:
+      - id: black
+        args:
+          - "-l 99"
+  # Flake8
+  - repo: https://github.com/PyCQA/flake8
+    rev: 4.0.1
+    hooks:
+      - id: flake8
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.1.1
+    hooks:
+      - id: mypy
diff --git a/.readthedocs.yml b/.readthedocs.yml
@@ -12,7 +12,7 @@ python:
 
 conda:
   environment: environment.doc.yml
-  
+
 sphinx:
   builder: html
   configuration: doc/conf.py

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,4 +1,4 @@
 include LICENSE
 include AUTHORS.rst
 recursive-exclude doc *
-recursive-include examples *.py
+recursive-include examples *.py
diff --git a/Makefile b/Makefile
@@ -1,7 +1,7 @@
 .PHONY: tests doc build
 
-lint:	
-	flake8 . --exclude=doc
+lint:
+	flake8 . --exclude=doc,build
 
 type-check:
 	mypy mapie

diff --git a/README.rst b/README.rst
@@ -68,7 +68,7 @@ Here's a quick instantiation of MAPIE models for regression and classification p
 
 Implemented methods in **MAPIE** respect three fundamental pillars:
 
-- They are **model and use case agnostic**, 
+- They are **model and use case agnostic**,
 - They possess **theoretical guarantees** under minimal assumptions on the data and the model,
 - They are based on **peer-reviewed algorithms** and respect programming standards.
 
@@ -168,7 +168,7 @@ For more information on the contribution process, please go `here <CONTRIBUTING.
 MAPIE has been developed through a collaboration between Quantmetry, Michelin, ENS Paris-Saclay,
 and with the financial support from Région Ile de France and Confiance.ai.
 
-|Quantmetry|_ |Michelin|_ |ENS|_ |Confiance.ai|_  |IledeFrance|_ 
+|Quantmetry|_ |Michelin|_ |ENS|_ |Confiance.ai|_  |IledeFrance|_
 
 .. |Quantmetry| image:: https://www.quantmetry.com/wp-content/uploads/2020/08/08-Logo-quant-Texte-noir.svg
     :width: 150

diff --git a/doc/api.rst b/doc/api.rst
@@ -48,7 +48,7 @@ Metrics
 .. autosummary::
    :toctree: generated/
    :template: function.rst
-   
+
    metrics.classification_coverage_score
    metrics.classification_coverage_score_v2
    metrics.classification_mean_width_score

diff --git a/doc/conf.py b/doc/conf.py
@@ -80,8 +80,8 @@
 master_doc = "index"
 
 # General information about the project.
-project = u"MAPIE"
-copyright = u"2022, Quantmetry"
+project = "MAPIE"
+copyright = "2022, Quantmetry"
 
 # The version info for the project you"re documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
@@ -234,7 +234,7 @@
 # (source start file, target name, title,
 #  author, documentclass [howto, manual, or own class]).
 latex_documents = [
-    ("index", "mapie.tex", u"MAPIE Documentation", u"Quantmetry", "manual"),
+    ("index", "mapie.tex", "MAPIE Documentation", "Quantmetry", "manual"),
 ]
 
 # The name of an image file (relative to this directory) to place at the top of
@@ -262,7 +262,7 @@
 
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
-man_pages = [("index", "mapie", u"MAPIE Documentation", [u"Quantmetry"], 1)]
+man_pages = [("index", "mapie", "MAPIE Documentation", ["Quantmetry"], 1)]
 
 # If true, show URL addresses after external links.
 # man_show_urls = False
@@ -277,8 +277,8 @@
     (
         "index",
         "mapie",
-        u"MAPIE Documentation",
-        u"Quantmetry",
+        "MAPIE Documentation",
+        "Quantmetry",
         "MAPIE",
         "One line description of project.",
         "Miscellaneous",
@@ -316,13 +316,13 @@
         "../examples/regression",
         "../examples/classification",
         "../examples/multilabel_classification",
-        "../examples/calibration"
+        "../examples/calibration",
     ],
     "gallery_dirs": [
         "examples_regression",
         "examples_classification",
         "examples_multilabel_classification",
-        "examples_calibration"
+        "examples_calibration",
     ],
     "doc_module": "mapie",
     "backreferences_dir": os.path.join("generated"),

diff --git a/doc/images/comp-methods.csv b/doc/images/comp-methods.csv
@@ -12,4 +12,4 @@
 **Jackknife-aB+**,:math:`\geq 1-2\alpha`,:math:`\gtrsim 1-\alpha`,:math:`K`,:math:`K \times n_{test}`
 **Jackknife-aB-minmax**,:math:`\geq 1-\alpha`,:math:`> 1-\alpha`,:math:`K`,:math:`K \times n_{test}`
 **Conformalized quantile regressor**,:math:`\geq 1-\alpha`,:math:`\gtrsim 1-\alpha`,:math:`3`,:math:`3 \times n_{test}`
-**EnbPI**,:math:`\geq 1-\alpha` (asymptotic),:math:`\gtrsim 1-\alpha`,:math:`K`,:math:`K \times n_{test}`
+**EnbPI**,:math:`\geq 1-\alpha` (asymptotic),:math:`\gtrsim 1-\alpha`,:math:`K`,:math:`K \times n_{test}`
diff --git a/doc/index.rst b/doc/index.rst
@@ -1,4 +1,4 @@
-.. include:: ../README.rst 
+.. include:: ../README.rst
 
 .. toctree::
    :maxdepth: 2

diff --git a/doc/notebooks_multilabel_classification.rst b/doc/notebooks_multilabel_classification.rst
@@ -2,11 +2,11 @@ Multi-label Classification notebooks
 ===========================
 
 The following examples present advanced analyses
-on multi-label classification problems with different 
+on multi-label classification problems with different
 methods proposed in MAPIE.
 
 1. Overview of Recall Control for Multi-Label Classification : `notebook <https://github.com/scikit-learn-contrib/MAPIE/tree/master/notebooks/classification/tutorial_multilabel_classification_recall.ipynb>`_
 ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 
 2. Overview of Precision Control for Multi-Label Classification : `notebook <https://github.com/scikit-learn-contrib/MAPIE/tree/master/notebooks/classification/tutorial_multilabel_classification_precision.ipynb>`_
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
diff --git a/doc/notebooks_regression.rst b/doc/notebooks_regression.rst
@@ -14,5 +14,3 @@ This section lists a series of Jupyter notebooks hosted on the MAPIE Github repo
 
 3. Estimating prediction intervals for time series forecast with EnbPI : `notebook <https://github.com/scikit-learn-contrib/MAPIE/tree/master/notebooks/regression/ts-changepoint.ipynb>`_
 ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-
-
diff --git a/doc/quick_start.rst b/doc/quick_start.rst
@@ -35,7 +35,7 @@ To install directly from the github repository :
 2. Run MapieRegressor
 ---------------------
 
-Let us start with a basic regression problem. 
+Let us start with a basic regression problem.
 Here, we generate one-dimensional noisy data that we fit with a linear model.
 
 .. code:: python
@@ -71,12 +71,12 @@ for each desired alpha value.
 You can compute the coverage of your prediction intervals.
 
 .. code:: python
-    
+
     from mapie.metrics import regression_coverage_score_v2
 
     coverage_scores = regression_coverage_score_v2(y_test, y_pis)
 
-The estimated prediction intervals can then be plotted as follows. 
+The estimated prediction intervals can then be plotted as follows.
 
 .. code:: python
 

diff --git a/doc/theoretical_description_binary_classification.rst b/doc/theoretical_description_binary_classification.rst
@@ -9,7 +9,7 @@ Theoretical Description
 There are mainly three different ways to handle uncertainty quantification in binary classification:
 calibration (see :doc:`theoretical_description_calibration`), confidence interval (CI) for the probability
 :math:`P(Y \vert \hat{\mu}(X))` and prediction sets (see :doc:`theoretical_description_classification`).
-These 3 notions are tightly related for score-based classifier, as it is shown in [1]. 
+These 3 notions are tightly related for score-based classifier, as it is shown in [1].
 
 Prediction sets can be computed in the same way for multiclass and binary classification with
 :class:`~mapie.calibration.MapieClassifier`, and there are the same theoretical guarantees.
@@ -40,7 +40,7 @@ Definition 1 (Prediction Set (PS) w.r.t :math:`f`) [1].
     Define the set of all subsets of :math:`\mathcal{Y}`, :math:`L = \{\{0\}, \{1\}, \{0, 1\}, \emptyset\}`.
     A function :math:`S:[0,1]\to\mathcal{L}` is said to be :math:`(1-\alpha)`-PS with respect to :math:`\hat{\mu}` if:
 
-.. math:: 
+.. math::
     P(Y\in S(\hat{\mu}(X))) \geq 1 - \alpha
 
 PSs are typically studied for larger output sets, such as :math:`\mathcal{Y}_{regression}=\mathbb{R}` or
@@ -57,7 +57,7 @@ Definition 2 (Confidence Interval (CI) w.r.t :math:`\hat{\mu}`) [1].
     Let :math:`I` denote the set of all subintervals of :math:`[0,1]`.
     A function :math:`C:[0,1]\to\mathcal{I}` is said to be :math:`(1-\alpha)`-CI with respect to :math:`\hat{\mu}` if:
 
-.. math:: 
+.. math::
     P(\mathbb{E}[Y|\hat{\mu}(X)]\in C(\hat{\mu}(X))) \geq 1 - \alpha
 
 In the framework of conformal prediction, the Venn predictor has this property.
@@ -74,7 +74,7 @@ Definition 3 (Approximate calibration) [1].
     The predictor :math:`\hat{\mu}:\mathcal{X} \to [0, 1]` is :math:`(\epsilon,\alpha)`-calibrated
     for some :math:`\epsilon,\alpha\in[0, 1]` if with probability at least :math:`1-\alpha`:
 
-.. math:: 
+.. math::
     |\mathbb{E}[Y|\hat{\mu}(X)] - \hat{\mu}(X)| \leq \epsilon
 
 See :class:`~sklearn.calibration.CalibratedClassifierCV` or :class:`~mapie.calibration.MapieCalibrator`

diff --git a/doc/theoretical_description_calibration.rst b/doc/theoretical_description_calibration.rst
@@ -25,7 +25,7 @@ true probability compared to the original output.
 Firstly, we introduce binary calibration, we denote the :math:`(h(X), y)` pair as the score and ground truth for the object. Hence, :math:`y`
 values are in :math:`{0, 1}`. The model is calibrated if for every output :math:`q \in [0, 1]`, we have:
 
-.. math:: 
+.. math::
     Pr(Y = 1 \mid h(X) = q) = q
 
 where :math:`h()` is the score predictor.
@@ -46,7 +46,7 @@ highest score and the corresponding class, whereas confidence calibration only c
 Let :math:`c` be the classifier and :math:`h` be the maximum score from the classifier. The couple :math:`(c, h)` is calibrated
 according to Top-Label calibration if:
 
-.. math:: 
+.. math::
     Pr(Y = c(X) \mid h(X), c(X)) = h(X)
 
 
@@ -70,7 +70,7 @@ The ECE is the combination of these two metrics combined together.
     \text{ECE} = \sum_{m=1}^M \frac{\left| B_m \right|}{n} \left| acc(B_m) - conf(B_m) \right|
 
 In simple terms, once all the different bins from the confidence scores have been created, we check the mean accuracy of each bin.
-The absolute mean difference between the two is the ECE. Hence, the lower the ECE, the better the calibration was performed. 
+The absolute mean difference between the two is the ECE. Hence, the lower the ECE, the better the calibration was performed.
 
 **Top-Label ECE**
 
@@ -84,7 +84,7 @@ of the accuracy and confidence based on the top label and take the average ECE f
 
 Kolmogorov-Smirnov test was derived in [2, 3, 4]. The idea is to consider the cumulative differences between sorted scores :math:`s_i`
 and their corresponding labels :math:`y_i` and to compare its properties to that of a standard Brownian motion. Let us consider the
-cumulative differences on sorted scores: 
+cumulative differences on sorted scores:
 
 .. math::
     C_k = \frac{1}{N}\sum_{i=1}^k (s_i - y_i)
@@ -94,13 +94,13 @@ We also introduce a typical normalization scale :math:`\sigma`:
 .. math::
     \sigma = \frac{1}{N}\sqrt{\sum_{i=1}^N s_i(1 - s_i)}
 
-Tho Kolmogorov-Smirnov statisitc is then defined as : 
+Tho Kolmogorov-Smirnov statisitc is then defined as :
 
 .. math::
    G = \max|C_k|/\sigma
 
 It can be shown [2] that, under the null hypothesis of well calibrated scores, this quantity asymptotically (i.e. when N goes to infinity)
-converges to the maximum absolute value of a standard Brownian motion over the unit interval :math:`[0, 1]`. [3, 4] also provide closed-form 
+converges to the maximum absolute value of a standard Brownian motion over the unit interval :math:`[0, 1]`. [3, 4] also provide closed-form
 formulas for the cumulative distribution function (CDF) of the maximum absolute value of such a standard Brownian motion.
 So we state the p-value associated to the statistical test of well calibration as:
 
@@ -115,7 +115,7 @@ Kuiper test was derived in [2, 3, 4] and is very similar to Kolmogorov-Smirnov.
    H = (\max_k|C_k| - \min_k|C_k|)/\sigma
 
 It can be shown [2] that, under the null hypothesis of well calibrated scores, this quantity asymptotically (i.e. when N goes to infinity)
-converges to the range of a standard Brownian motion over the unit interval :math:`[0, 1]`. [3, 4] also provide closed-form 
+converges to the range of a standard Brownian motion over the unit interval :math:`[0, 1]`. [3, 4] also provide closed-form
 formulas for the cumulative distribution function (CDF) of the range of such a standard Brownian motion.
 So we state the p-value associated to the statistical test of well calibration as:
 
@@ -124,7 +124,7 @@ So we state the p-value associated to the statistical test of well calibration a
 
 **Spiegelhalter test**
 
-Spiegelhalter test was derived in [6]. It is basically based on a decomposition of the Brier score: 
+Spiegelhalter test was derived in [6]. It is basically based on a decomposition of the Brier score:
 
 .. math::
    B = \frac{1}{N}\sum_{i=1}^N(y_i - s_i)^2
@@ -141,7 +141,7 @@ computed as:
 .. math::
    Var(B) = \frac{1}{N^2}\sum_{i=1}^N(1 - 2s_i)^2 s_i(1 - s_i)
 
-So we can build a Z-score as follows: 
+So we can build a Z-score as follows:
 
 .. math::
    Z = \frac{B - E(B)}{\sqrt{Var(B)}} = \frac{\sum_{i=1}^N(y_i - s_i)(1 - 2s_i)}{\sqrt{\sum_{i=1}^N(1 - 2s_i)^2 s_i(1 - s_i)}}
@@ -183,4 +183,4 @@ September, 1951.
 [6] Spiegelhalter DJ.
 Probabilistic prediction in patient management and clinical trials.
 Statistics in medicine.
-1986 Sep;5(5):421-33.
+1986 Sep;5(5):421-33.