Merge pull request #141 from quantumblacklabs/release/0.11.0

Release/0.11.0
mckinsey · Nov 11, 2021 · aa39d8a · aa39d8a
2 parents b6a399f + b4566ee
commit aa39d8a
Show file tree

Hide file tree

Showing 97 changed files with 7,397 additions and 2,849 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -44,15 +44,14 @@ utils:
       pre-commit install --install-hooks
       pre-commit install --hook-type pre-push
   linters: &linters
-    name: Run pylint and flake8
+    name: Run linters and auto-formatters
     command: make lint
 
   unit_tests: &unit_tests
     name: Run tests
     command: make test
 
   build_docs: &build_docs
-    # NOTE: doesn't work on python 3.5
     name: Build documentation
     command: make build-docs
 

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -1,11 +1,3 @@
-## Notice
-
-- [ ] I acknowledge and agree that, by checking this box and clicking "Submit Pull Request":
-
-- I submit this contribution under the [Apache 2.0 license](https://www.apache.org/licenses/LICENSE-2.0.txt) and represent that I am entitled to do so on behalf of myself, my employer, or relevant third parties, as applicable.
-- I certify that (a) this contribution is my original creation and / or (b) to the extent it is not my original creation, I am authorised to submit this contribution on behalf of the original creator(s) or their licensees.
-- I certify that the use of this contribution as authorised by the Apache 2.0 license does not violate the intellectual property rights of anyone else.
-
 ## Motivation and Context
 Why was this PR created?
 
@@ -14,9 +6,17 @@ What testing strategies have you used?
 
 ## Checklist
 
-- [ ] Read the [contributing](/CONTRIBUTING.md) guidelines
+- [ ] Read the [contributing](https://github.com/quantumblacklabs/causalnex/blob/develop/CONTRIBUTING.md) guidelines
 - [ ] Opened this PR as a 'Draft Pull Request' if it is work-in-progress
 - [ ] Updated the documentation to reflect the code changes
-- [ ] Added a description of this change and added my name to the list of supporting contributions in the [`RELEASE.md`](/RELEASE.md) file
+- [ ] Added a description of this change and added my name to the list of supporting contributions in the [`RELEASE.md`](https://github.com/quantumblacklabs/causalnex/blob/develop/RELEASE.md) file
 - [ ] Added tests to cover my changes
 - [ ] Assigned myself to the PR
+
+## Notice
+
+- [ ] I acknowledge and agree that, by checking this box and clicking "Submit Pull Request":
+
+- I submit this contribution under the [Apache 2.0 license](https://www.apache.org/licenses/LICENSE-2.0.txt) and represent that I am entitled to do so on behalf of myself, my employer, or relevant third parties, as applicable.
+- I certify that (a) this contribution is my original creation and / or (b) to the extent it is not my original creation, I am authorised to submit this contribution on behalf of the original creator(s) or their licensees.
+- I certify that the use of this contribution as authorised by the Apache 2.0 license does not violate the intellectual property rights of anyone else.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -8,17 +8,15 @@ repos:
     rev: v2.2.3
     hooks:
     -   id: trailing-whitespace
-        stages: [commit, manual]
     -   id: end-of-file-fixer
-        stages: [commit, manual]
+        exclude: ^docs/source/03_tutorial
     -   id: check-yaml  # Checks yaml files for parseable syntax.
-#        exclude:
     -   id: check-json  # Checks json files for parseable syntax.
     -   id: check-added-large-files
+        exclude: ^docs/source/03_tutorial
     -   id: check-case-conflict  # Check for files that would conflict in case-insensitive filesystems
     -   id: check-merge-conflict  # Check for files that contain merge conflict strings.
     -   id: debug-statements  # Check for debugger imports and py37+ `breakpoint()` calls in python source.
-#        exclude:
     -   id: detect-private-key  # Detects the presence of private keys
     -   id: requirements-txt-fixer  # Sorts entries in requirements.txt
     -   id: flake8
@@ -41,15 +39,15 @@ repos:
     # https://github.com/PyCQA/pylint/issues/618
     # The first set of pylint checks if for local pre-commit, it only runs on the files changed.
     -   id: pylint-quick-causalnex
-        name: "Quick PyLint on causalnex/*"
+        name: "Quick Pylint on causalnex/*"
         language: system
         types: [file, python]
         files: ^causalnex/
         exclude: ^causalnex/ebaybbn
         entry: pylint --disable=unnecessary-pass,cyclic-import --ignore=ebaybbn
         stages: [commit]
     -   id: pylint-quick-tests
-        name: "Quick PyLint on tests/*"
+        name: "Quick Pylint on tests/*"
         language: system
         types: [file, python]
         files: ^tests/
@@ -58,14 +56,14 @@ repos:
 
     # The same pylint checks, but running on all files. It's for manual run with `make lint`
     -   id: pylint-causalnex
-        name: "PyLint on causalnex/*"
+        name: "Pylint on causalnex/*"
         language: system
         pass_filenames: false
         stages: [manual]
         entry: pylint --disable=unnecessary-pass,cyclic-import --ignore=ebaybbn causalnex
         exclude: ^causalnex/ebaybbn
     -   id: pylint-tests
-        name: "PyLint on tests/*"
+        name: "Pylint on tests/*"
         language: system
         pass_filenames: false
         stages: [manual]

diff --git a/CITATION.cff b/CITATION.cff
@@ -0,0 +1,26 @@
+cff-version: 1.2.0
+message: "If you'd like to cite CausalNex, please use the following metadata"
+authors:
+- family-names: "Beaumont"
+  given-names: "Paul"
+- family-names: "Horsburgh"
+  given-names: "Ben"
+- family-names: "Pilgerstorfer"
+  given-names: "Philip"
+- family-names: "Droth"
+  given-names: "Angel"
+- family-names: "Oentaryo"
+  given-names: "Richard"
+- family-names: "Ler"
+  given-names: "Steven"
+- family-names: "Nguyen"
+  given-names: "Hiep"
+- family-names: "Ferreira"
+  given-names: "Gabriel Azevedo"
+- family-names: "Patel"
+  given-names: "Zain"
+- family-names: "Leong"
+  given-names: "Wesley"
+title: "CausalNex"
+date-released: 2021-10-15
+url: "https://github.com/quantumblacklabs/causalnex"
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -120,7 +120,7 @@ make install-pre-commit
 
 All checks run by our CI / CD servers can be run locally on your computer.
 
-#### PEP-8 Standards (`pylint` and `flake8`)
+#### Linters and auto-formatters
 
 ```bash
 make lint

diff --git a/README.md b/README.md
@@ -64,13 +64,13 @@ Use `all` for a full installation of dependencies (only the plotting right now):
 pip install "causalnex[all]"
 ```
 
-See more detailed installation instructions, including how to setup Python virtual environments, in our [installation guide](https://causalnex.readthedocs.io/en/latest/02_getting_started/02_install.html) and get started with our [tutorial](https://causalnex.readthedocs.io/en/latest/03_tutorial/03_tutorial.html).
+See more detailed installation instructions, including how to setup Python virtual environments, in our [installation guide](https://causalnex.readthedocs.io/en/latest/02_getting_started/02_install.html) and get started with our [tutorial](https://causalnex.readthedocs.io/en/latest/03_tutorial/01_first_tutorial.html).
 
 ## How do I use CausalNex?
 
 You can find the documentation for the latest stable release [here](https://causalnex.readthedocs.io/en/latest/). It explains:
 
-- An end-to-end [tutorial on how to use CausalNex](https://causalnex.readthedocs.io/en/latest/03_tutorial/03_tutorial.html)
+- An end-to-end [tutorial on how to use CausalNex](https://causalnex.readthedocs.io/en/latest/03_tutorial/01_first_tutorial.html)
 - The [main concepts and methods](https://causalnex.readthedocs.io/en/latest/04_user_guide/04_user_guide.html) in using Bayesian Networks for Causal Inference
 
 > Note: You can find the notebook and markdown files used to build the docs in [`docs/source`](docs/source).
@@ -83,6 +83,10 @@ Yes! We'd love you to join us and help us build CausalNex. Check out our [contri
 
 We use [SemVer](http://semver.org/) for versioning. The best way to upgrade safely is to check our [release notes](RELEASE.md) for any notable breaking changes.
 
+## How do I cite CausalNex?
+
+You may click "Cite this repository" under the "About" section of this repository to get the citation information in APA and BibTeX formats.
+
 ## What licence do you use?
 
 See our [LICENSE](LICENSE.md) for more detail.

diff --git a/RELEASE.md b/RELEASE.md
@@ -1,9 +1,22 @@
-# Upcoming release
+# Release 0.11.0
+* Add expectation-maximisation (EM) algorithm to learn with latent variables
+* Add a new tutorial on adding latent variable as well as identifying its candidate location
+* Allow users to provide self-defined CPD, as per #18 and #99
+* Generalise the utility function to get Markov blanket and incorporate it within `StructureModel` (cf. #136)
+* Add a link to `PyGraphviz` installation guide under the installation prerequisites
+* Add GPU support to Pytorch implementation, as requested in #56 and #114 (some issues remain)
+* Add an example for structure model exporting into first causalnex tutorial, as per #124 and #129
+* Fix infinite loop when querying `InferenceEngine` after a do-intervention that splits
+  the graph into two or more subgraphs, as per #45 and #100
+* Fix decision tree and mdlp discretisations bug when input data is shuffled
+* Fix broken URLs in FAQ documentation, as per #113 and #125
+* Fix integer index type checking for timeseries data, as per #74 and #86
+* Fix bug where inputs to the DAGRegressor/Classifier yielded different predictions between float and int dtypes, as per #140
 
 # Release 0.10.0
-* Add supervised discretisation strategies using Decision Tree and MDLP algorithms.
-* Add `BayesianNetworkClassifier` an sklearn compatible class for fitting and predicting probabilities in a BN.
-* Fixes cyclical import of `causalnex.plots`, as per #106.
+* Add supervised discretisation strategies using Decision Tree and MDLP algorithms
+* Add `BayesianNetworkClassifier` an sklearn compatible class for fitting and predicting probabilities in a BN
+* Fixes cyclical import of `causalnex.plots`, as per #106
 * Add utility function to extract Markov blanket from a Bayesian Network
 * Support receiving a list of inputs for `InferenceEngine` with a multiprocessing option
 * Add supervised discretisation strategies using Decision Tree and MDLP algorithms
@@ -97,7 +110,28 @@ Bugfix to address readthedocs issue.
 The initial release of CausalNex.
 
 ## Thanks for supporting contributions
-CausalNex was originally designed by [Paul Beaumont](https://www.linkedin.com/in/pbeaumont/) and [Ben Horsburgh](https://www.linkedin.com/in/benhorsburgh/) to solve challenges they faced in inferencing causality in their project work. This work was later turned into a product thanks to the following contributors:
-[Yetunde Dada](https://github.com/yetudada), [Wesley Leong](https://www.linkedin.com/in/wesleyleong/), [Steve Ler](https://www.linkedin.com/in/song-lim-steve-ler-380366106/), [Viktoriia Oliinyk](https://www.linkedin.com/in/victoria-oleynik/), [Roxana Pamfil](https://www.linkedin.com/in/roxana-pamfil-1192053b/), [Nisara Sriwattanaworachai](https://www.linkedin.com/in/nisara-sriwattanaworachai-795b357/), [Nikolaos Tsaousis](https://www.linkedin.com/in/ntsaousis/), [Angel Droth](https://www.linkedin.com/in/angeldroth/), [Zain Patel](https://www.linkedin.com/in/zain-patel/), and [Shuhei Ishida](https://www.linkedin.com/in/shuhei-i/).
 
-CausalNex would also not be possible without the generous sharing from leading researches in the field of causal inference and we are grateful to everyone who advised and supported us, filed issues or helped resolve them, asked and answered questions or simply be part of inspiring discussions.
+CausalNex was originally designed by [Paul Beaumont](https://www.linkedin.com/in/pbeaumont/) and
+[Ben Horsburgh](https://www.linkedin.com/in/benhorsburgh/) to solve challenges they faced in
+inferring causality in their project work.
+This work was later turned into a product thanks to the following contributors:
+[Philip Pilgerstorfer](https://uk.linkedin.com/in/philippilgerstorfer)
+, [Angel Droth](https://www.linkedin.com/in/angeldroth/)
+, [Richard Oentaryo](https://www.linkedin.com/in/oentaryo/)
+, [Steve Ler](https://www.linkedin.com/in/song-lim-steve-ler-380366106/)
+, [Hiep Nguyen](https://vn.linkedin.com/in/hiep-nguyen-87b772105)
+, [Gabriel Azevedo Ferreira](https://sg.linkedin.com/in/gabriel-azevedo-ferreira-82415810b)
+, [Zain Patel](https://www.linkedin.com/in/zain-patel/)
+, [Wesley Leong](https://www.linkedin.com/in/wesleyleong/)
+, [Yetunde Dada](https://github.com/yetudada)
+, [Viktoriia Oliinyk](https://www.linkedin.com/in/victoria-oleynik/)
+, [Roxana Pamfil](https://www.linkedin.com/in/roxana-pamfil-1192053b/)
+, [Nisara Sriwattanaworachai](https://www.linkedin.com/in/nisara-sriwattanaworachai-795b357/)
+, [Nikolaos Tsaousis](https://www.linkedin.com/in/ntsaousis/)
+, [Shuhei Ishida](https://www.linkedin.com/in/shuhei-i/)
+, [Francesca Sogaro](https://www.linkedin.com/in/francesca-sogaro/)
+, [Deepyaman Datta](https://www.linkedin.com/in/deepyaman/).
+
+CausalNex would also not be possible without the generous sharing from leading researches in the field of causal inference
+and we are grateful to everyone who advised and supported us, filed issues or helped resolve them, asked and answered
+questions or simply be part of inspiring discussions.
diff --git a/causalnex/__init__.py b/causalnex/__init__.py
@@ -30,6 +30,6 @@
 causalnex toolkit for causal reasoning (Bayesian Networks / Inference)
 """
 
-__version__ = "0.10.0"
+__version__ = "0.11.0"
 
 __all__ = ["structure", "discretiser", "evaluation", "inference", "network", "plots"]
diff --git a/causalnex/discretiser/abstract_discretiser.py b/causalnex/discretiser/abstract_discretiser.py
@@ -102,7 +102,8 @@ def transform(self, data: pd.DataFrame) -> np.array:
             outputs[col] = self._transform_one_column(data[[col]])
 
         transformed_df = pd.DataFrame.from_dict(outputs)
-        return transformed_df
+
+        return transformed_df.set_index(data.index)
 
     def fit_transform(self, *args, **kwargs):
         """

diff --git a/causalnex/discretiser/discretiser.py b/causalnex/discretiser/discretiser.py
@@ -101,57 +101,40 @@ def __init__(
 
         if self.method not in allowed_methods:
             raise ValueError(
-                "{0} is not a recognised method. Use one of: {1}".format(
-                    self.method, " ".join(allowed_methods)
-                )
+                f"{self.method} is not a recognised method. "
+                f"Use one of: {' '.join(allowed_methods)}"
             )
         if self.method in {"uniform", "quantile"} and num_buckets is None:
-            raise ValueError(
-                "{0} method expects {1}".format(self.method, "num_buckets")
-            )
+            raise ValueError(f"{self.method} method expects num_buckets")
 
         if self.method == "outlier" and outlier_percentile is None:
-            raise ValueError(
-                "{0} method expects {1}".format(self.method, "outlier_percentile")
-            )
+            raise ValueError(f"{self.method} method expects outlier_percentile")
 
         if outlier_percentile is not None and not 0 <= outlier_percentile < 0.5:
-            raise ValueError(
-                "{0} must be between 0 and 0.5".format("outlier_percentile")
-            )
+            raise ValueError("outlier_percentile must be between 0 and 0.5")
 
         if self.method == "fixed" and numeric_split_points is None:
-            raise ValueError(
-                "{0} method expects {1}".format(self.method, "numeric_split_points")
-            )
+            raise ValueError(f"{self.method} method expects numeric_split_points")
 
         if (
             numeric_split_points is not None
             and sorted(numeric_split_points) != numeric_split_points
         ):
-            raise ValueError(
-                "{0} must be monotonically increasing".format("numeric_split_points")
-            )
+            raise ValueError("numeric_split_points must be monotonically increasing")
 
         if self.method == "percentiles" and percentile_split_points is None:
-            raise ValueError(
-                "{0} method expects {1}".format(self.method, "percentile_split_points")
-            )
+            raise ValueError(f"{self.method} method expects percentile_split_points")
 
         if percentile_split_points is not None and not all(
             0 <= p <= 1 for p in percentile_split_points
         ):
-            raise ValueError(
-                "{0} must be between 0 and 1".format("percentile_split_points")
-            )
+            raise ValueError("percentile_split_points must be between 0 and 1")
 
         if (
             percentile_split_points is not None
             and sorted(percentile_split_points) != percentile_split_points
         ):
-            raise ValueError(
-                "{0} must be monotonically increasing".format("percentile_split_points")
-            )
+            raise ValueError("percentile_split_points must be monotonically increasing")
 
         if self.method == "fixed":
             self.numeric_split_points = numeric_split_points

diff --git a/causalnex/discretiser/discretiser_strategy.py b/causalnex/discretiser/discretiser_strategy.py
@@ -177,8 +177,8 @@ def fit(
                 self.map_thresholds[feat] = threshold
 
             if self.split_unselected_feat:
-                for feat in self.map_thresholds:
-                    if self.map_thresholds[feat].size == 0:
+                for feat, thres in self.map_thresholds.items():
+                    if thres.size == 0:
                         dtree = deepcopy(dtree)
                         dtree.fit(dataframe[[feat]], dataframe[[target]])
                         thresholds = extract_thresholds_from_dtree(dtree, 1)[0]