From bf21bdad4f659b25e3b6274a67f7ead41f5ffb1f Mon Sep 17 00:00:00 2001 From: Joshua Wendland Date: Wed, 8 Nov 2023 16:33:37 +0100 Subject: [PATCH 01/16] add makefile and version --- MAKEFILE | 122 +++++++++++++++++++++++++++++++++++++++++ plotsandgraphs/VERSION | 1 + 2 files changed, 123 insertions(+) create mode 100644 MAKEFILE create mode 100644 plotsandgraphs/VERSION diff --git a/MAKEFILE b/MAKEFILE new file mode 100644 index 0000000..e0641ae --- /dev/null +++ b/MAKEFILE @@ -0,0 +1,122 @@ +.ONESHELL: +ENV_PREFIX=$(shell python -c "if __import__('pathlib').Path('.venv/bin/pip').exists(): print('.venv/bin/')") +USING_POETRY=$(shell grep "tool.poetry" pyproject.toml && echo "yes") + +.PHONY: help +help: ## Show the help. + @echo "Usage: make " + @echo "" + @echo "Targets:" + @fgrep "##" Makefile | fgrep -v fgrep + + +.PHONY: show +show: ## Show the current environment. + @echo "Current environment:" + @if [ "$(USING_POETRY)" ]; then poetry env info && exit; fi + @echo "Running using $(ENV_PREFIX)" + @$(ENV_PREFIX)python -V + @$(ENV_PREFIX)python -m site + +.PHONY: install +install: ## Install the project in dev mode. + @if [ "$(USING_POETRY)" ]; then poetry install && exit; fi + @echo "Don't forget to run 'make virtualenv' if you got errors." + $(ENV_PREFIX)pip install -e .[test] + +.PHONY: fmt +fmt: ## Format code using black & isort. + $(ENV_PREFIX)isort plotsandgraphs/ + $(ENV_PREFIX)black -l 79 plotsandgraphs/ + $(ENV_PREFIX)black -l 79 tests/ + +.PHONY: lint +lint: ## Run pep8, black, mypy linters. + $(ENV_PREFIX)flake8 plotsandgraphs/ + $(ENV_PREFIX)black -l 79 --check plotsandgraphs/ + $(ENV_PREFIX)black -l 79 --check tests/ + $(ENV_PREFIX)mypy --ignore-missing-imports plotsandgraphs/ + +.PHONY: test +test: lint ## Run tests and generate coverage report. + $(ENV_PREFIX)pytest -v --cov-config .coveragerc --cov=plotsandgraphs -l --tb=short --maxfail=1 tests/ + $(ENV_PREFIX)coverage xml + $(ENV_PREFIX)coverage html + +.PHONY: watch +watch: ## Run tests on every change. + ls **/**.py | entr $(ENV_PREFIX)pytest -s -vvv -l --tb=long --maxfail=1 tests/ + +.PHONY: clean +clean: ## Clean unused files. + @find ./ -name '*.pyc' -exec rm -f {} \; + @find ./ -name '__pycache__' -exec rm -rf {} \; + @find ./ -name 'Thumbs.db' -exec rm -f {} \; + @find ./ -name '*~' -exec rm -f {} \; + @rm -rf .cache + @rm -rf .pytest_cache + @rm -rf .mypy_cache + @rm -rf build + @rm -rf dist + @rm -rf *.egg-info + @rm -rf htmlcov + @rm -rf .tox/ + @rm -rf docs/_build + +.PHONY: virtualenv +virtualenv: ## Create a virtual environment. + @if [ "$(USING_POETRY)" ]; then poetry install && exit; fi + @echo "creating virtualenv ..." + @rm -rf .venv + @python3 -m venv .venv + @./.venv/bin/pip install -U pip + @./.venv/bin/pip install -e .[test] + @echo + @echo "!!! Please run 'source .venv/bin/activate' to enable the environment !!!" + +.PHONY: release +release: ## Create a new tag for release. + @echo "WARNING: This operation will create s version tag and push to github" + @read -p "Version? (provide the next x.y.z semver) : " TAG + @echo "$${TAG}" > plotsandgraphs/VERSION + @$(ENV_PREFIX)gitchangelog > HISTORY.md + @git add plotsandgraphs/VERSION HISTORY.md + @git commit -m "release: version $${TAG} 🚀" + @echo "creating git tag : $${TAG}" + @git tag $${TAG} + @git push -u origin HEAD --tags + @echo "Github Actions will detect the new tag and release the new version." + +.PHONY: docs +docs: ## Build the documentation. + @echo "building documentation ..." + @$(ENV_PREFIX)mkdocs build + URL="site/index.html"; xdg-open $$URL || sensible-browser $$URL || x-www-browser $$URL || gnome-open $$URL || open $$URL + +.PHONY: switch-to-poetry +switch-to-poetry: ## Switch to poetry package manager. + @echo "Switching to poetry ..." + @if ! poetry --version > /dev/null; then echo 'poetry is required, install from https://python-poetry.org/'; exit 1; fi + @rm -rf .venv + @poetry init --no-interaction --name=a_flask_test --author=rochacbruno + @echo "" >> pyproject.toml + @echo "[tool.poetry.scripts]" >> pyproject.toml + @echo "plotsandgraphs = 'plotsandgraphs.__main__:main'" >> pyproject.toml + @cat requirements.txt | while read in; do poetry add --no-interaction "$${in}"; done + @cat requirements-test.txt | while read in; do poetry add --no-interaction "$${in}" --dev; done + @poetry install --no-interaction + @mkdir -p .github/backup + @mv requirements* .github/backup + @mv setup.py .github/backup + @echo "You have switched to https://python-poetry.org/ package manager." + @echo "Please run 'poetry shell' or 'poetry run plotsandgraphs'" + +.PHONY: init +init: ## Initialize the project based on an application template. + @./.github/init.sh + + +# This project has been generated from rochacbruno/python-project-template +# __author__ = 'rochacbruno' +# __repo__ = https://github.com/rochacbruno/python-project-template +# __sponsor__ = https://github.com/sponsors/rochacbruno/ \ No newline at end of file diff --git a/plotsandgraphs/VERSION b/plotsandgraphs/VERSION new file mode 100644 index 0000000..6c6aa7c --- /dev/null +++ b/plotsandgraphs/VERSION @@ -0,0 +1 @@ +0.1.0 \ No newline at end of file From 439543a6ae6dedc78e275d0980aca09dd389a9fd Mon Sep 17 00:00:00 2001 From: Joshua Wendland Date: Wed, 8 Nov 2023 17:10:21 +0100 Subject: [PATCH 02/16] Add initial unit test --- tests/__init__.py | 0 tests/test_test.py | 4 ++++ 2 files changed, 4 insertions(+) create mode 100644 tests/__init__.py create mode 100644 tests/test_test.py diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_test.py b/tests/test_test.py new file mode 100644 index 0000000..b2820fc --- /dev/null +++ b/tests/test_test.py @@ -0,0 +1,4 @@ +# This is just a test for a test + +def test_test(): + assert True \ No newline at end of file From 32162a1a182fab11aa8653f83da65a08c5b1b781 Mon Sep 17 00:00:00 2001 From: Joshua Wendland Date: Wed, 8 Nov 2023 17:15:23 +0100 Subject: [PATCH 03/16] add requirements-test.txt --- requirements-text.txt | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 requirements-text.txt diff --git a/requirements-text.txt b/requirements-text.txt new file mode 100644 index 0000000..a7baf9d --- /dev/null +++ b/requirements-text.txt @@ -0,0 +1,6 @@ +matplotlib +numpy +pandas +seaborn +scikit-learn +tqdm \ No newline at end of file From d0ab1f03c9a1b51e44d5d7fc19676bbd36c5e5a5 Mon Sep 17 00:00:00 2001 From: Joshua Wendland Date: Wed, 8 Nov 2023 17:23:12 +0100 Subject: [PATCH 04/16] update setup.py --- setup.py | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 64 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index dcc5f73..823dfc7 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,68 @@ +"""Python setup.py for plotsandgraphs package""" +import io +import os from setuptools import setup, find_packages +# setup( +# name='plotsandgraphs', +# version='0.1.0', +# packages=find_packages(include=['plotsandgraphs', 'plotsandgraphs.*']) +# ) + +PROJECT_NAME = 'plotsandgraphs' + + + + +def read(*paths, **kwargs): + """Read the contents of a text file safely. + >>> read("project_name", "VERSION") + '0.1.0' + >>> read("README.md") + ... + """ + + content = "" + with io.open( + os.path.join(os.path.dirname(__file__), *paths), + encoding=kwargs.get("encoding", "utf8"), + ) as open_file: + content = open_file.read().strip() + return content + + +def read_requirements(path): + return [ + line.strip() + for line in read(path).split("\n") + if not line.startswith(('"', "#", "-", "git+")) + ] + + setup( - name='plotsandgraphs', - version='0.1.0', - packages=find_packages(include=['plotsandgraphs', 'plotsandgraphs.*']) + name=PROJECT_NAME, + version=read(PROJECT_NAME, "VERSION"), + description="Create plots and graphs for your Machine Learning projects.", + url="https://github.com/joshuawe/plots_and_graphs", + long_description=read("README.md"), + long_description_content_type="text/markdown", + author="Joshua Wendland and Fabian Krüger", + packages=find_packages(exclude=["tests", ".github"]), + install_requires=read_requirements("requirements.txt"), + entry_points={ + "console_scripts": ["project_name = project_name.__main__:main"] + }, + extras_require={"test": read_requirements("requirements-test.txt")}, + license='GNU General Public License v3.0', + keywords=['plots', 'graphs', 'machine learning', 'data science', 'data visualization', 'data analysis', 'matplotlib'], + classifiers=[ + 'Development Status :: 2 - Pre-Alpha', + 'Environment :: Console', + 'Intended Audience :: Science/Research', + 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', + 'Natural Language :: English', + 'Operating System :: OS Independent', + 'Programming Language :: Python :: 3', + 'Topic :: Scientific/Engineering :: Artificial Intelligence' + ], ) \ No newline at end of file From 2a5d01f8956c948daa8237ed0cc0cc906054e1c9 Mon Sep 17 00:00:00 2001 From: Joshua Wendland Date: Wed, 8 Nov 2023 18:21:02 +0100 Subject: [PATCH 05/16] add the actual source --- plotsandgraphs/__init__.py | 0 plotsandgraphs/binary_classifier.py | 448 ++++++++++++++++++ plotsandgraphs/compare_distributions.py | 103 ++++ ...irements-text.txt => requirements-test.txt | 0 4 files changed, 551 insertions(+) create mode 100644 plotsandgraphs/__init__.py create mode 100644 plotsandgraphs/binary_classifier.py create mode 100644 plotsandgraphs/compare_distributions.py rename requirements-text.txt => requirements-test.txt (100%) diff --git a/plotsandgraphs/__init__.py b/plotsandgraphs/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/plotsandgraphs/binary_classifier.py b/plotsandgraphs/binary_classifier.py new file mode 100644 index 0000000..70e5069 --- /dev/null +++ b/plotsandgraphs/binary_classifier.py @@ -0,0 +1,448 @@ +import matplotlib.pyplot as plt +from matplotlib.colors import to_rgba +from matplotlib.figure import Figure +import seaborn as sns +import numpy as np +import pandas as pd +from sklearn.metrics import confusion_matrix, classification_report, ConfusionMatrixDisplay, roc_curve, auc, accuracy_score, precision_recall_curve +from sklearn.calibration import calibration_curve +from sklearn.utils import resample +from pathlib import Path +from tqdm import tqdm +from typing import Optional + + +def plot_accuracy(y_true, y_pred, name='', save_fig_path=None) -> Figure: + """ Really ugly plot, I am not sure if the scalar value for accuracy should receive an entire plot.""" + accuracy = accuracy_score(y_true, y_pred) + + # accuracy = 0 + # for t in range(max_seq_len): + # accuracy += accuracy_score( y[:,t,0].round() , y_pred[:,t] ) + # accuracy = accuracy / max_seq_len + fig= plt.figure( figsize=(4,5)) + plt.bar( np.array([0]), np.array([ accuracy ])) + # axs[0].set_xticks(ticks=range(2)) + # axs[0].set_xticklabels(["train", "test"]) + plt.ylabel('Accuracy') + plt.ylim([0,1]) + # axs[0].set_xlabel('Features') + title = "Predictor model: {}".format(name ) + plt.title(title) + plt.tight_layout() + + if (save_fig_path != None): + path = Path(save_fig_path) + path.parent.mkdir(parents=True, exist_ok=True) + fig.savefig(save_fig_path, bbox_inches='tight') + return fig, accuracy + +def plot_confusion_matrix(y_true: np.ndarray, y_pred: np.ndarray, save_fig_path=None) -> Figure: + import matplotlib.colors as colors + + # Compute the confusion matrix + cm = confusion_matrix(y_true, y_pred.round()) + # normalize the confusion matrix + cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] + + # Create the ConfusionMatrixDisplay instance and plot it + cmd = ConfusionMatrixDisplay(cm, display_labels=['class 0\nnegative', 'class 1\npositive']) + fig, ax = plt.subplots(figsize=(4,4)) + cmd.plot(cmap='YlOrRd', values_format='', colorbar=False, ax=ax, text_kw={'visible':False}) + cmd.texts_ = [] + cmd.text_ = [] + + text_labels = ['TN', 'FP', 'FN', 'TP'] + cmap_min, cmap_max = cmd.im_.cmap(0), cmd.im_.cmap(1.0) + for i in range(2): + for j in range(2): + ax.text(j, i, f"{text_labels[i * 2 + j]}\n{cmd.im_.get_array()[i, j]:.2%}", + ha="center", va="center", color=cmap_min if cmd.im_.get_array()[i, j] > 0.5 else cmap_max) + + ax.vlines([0.5], *ax.get_ylim(), color='white', linewidth=1) + ax.hlines([0.49], *ax.get_xlim(), color='white', linewidth=1) + ax.spines[:].set_visible(False) + + + bounds = np.linspace(0, 1, 11) + cmap = plt.cm.get_cmap('YlOrRd', len(bounds)+1) + norm = colors.BoundaryNorm(bounds, cmap.N) + cbar = ax.figure.colorbar(cmd.im_, ax=ax, cmap=cmap, norm=norm, boundaries=bounds, ticks=bounds[::2], location="right", shrink=0.8) + # cbar.set_ticks(np.arange(0,1.1,0.1)) + cbar.ax.yaxis.set_ticks_position('both') + cbar.outline.set_visible(False) + plt.tight_layout() + + if (save_fig_path != None): + path = Path(save_fig_path) + path.parent.mkdir(parents=True, exist_ok=True) + fig.savefig(save_fig_path, bbox_inches='tight') + + return fig + + + + +def plot_classification_report(y_test: np.ndarray, + y_pred: np.ndarray, + title='Classification Report', + figsize=(8, 4), + save_fig_path=None, **kwargs): + """ + TODO: save all these plots + Plot the classification report of sklearn + + Parameters + ---------- + y_test : pandas.Series of shape (n_samples,) + Targets. + y_pred : pandas.Series of shape (n_samples,) + Predictions. + title : str, default = 'Classification Report' + Plot title. + fig_size : tuple, default = (8, 6) + Size (inches) of the plot. + dpi : int, default = 70 + Image DPI. + save_fig_path : str, defaut=None + Full path where to save the plot. Will generate the folders if they don't exist already. + **kwargs : attributes of classification_report class of sklearn + + Returns + ------- + fig : Matplotlib.pyplot.Figure + Figure from matplotlib + ax : Matplotlib.pyplot.Axe + Axe object from matplotlib + """ + import matplotlib as mpl + import matplotlib.colors as colors + import seaborn as sns + import pathlib + + fig, ax = plt.subplots(figsize=figsize) + + cmap = 'YlOrRd' + + clf_report = classification_report(y_test, y_pred, output_dict=True, **kwargs) + keys_to_plot = [key for key in clf_report.keys() if key not in ('accuracy', 'macro avg', 'weighted avg')] + df = pd.DataFrame(clf_report, columns=keys_to_plot).T + #the following line ensures that dataframe are sorted from the majority classes to the minority classes + df.sort_values(by=['support'], inplace=True) + + #first, let's plot the heatmap by masking the 'support' column + rows, cols = df.shape + mask = np.zeros(df.shape) + mask[:,cols-1] = True + + bounds = np.linspace(0, 1, 11) + cmap = plt.cm.get_cmap('YlOrRd', len(bounds)+1) + norm = colors.BoundaryNorm(bounds, cmap.N) + + ax = sns.heatmap(df, mask=mask, annot=False, cmap=cmap, fmt='.3g', + cbar_kws={'ticks':bounds[::2], 'norm':norm, 'boundaries':bounds}, + vmin=0.0, + vmax=1.0, + linewidths=2, linecolor='white' + ) + cbar = ax.collections[0].colorbar + cbar.ax.yaxis.set_ticks_position('both') + + cmap_min, cmap_max = cbar.cmap(0), cbar.cmap(1.0) + + # add text annotation to heatmap + dx, dy = 0.5, 0.5 + for i in range(rows): + for j in range(cols-1): + text = f"{df.iloc[i, j]:.2%}" #if (j 0.5 else cmap_max) + + #then, let's add the support column by normalizing the colors in this column + mask = np.zeros(df.shape) + mask[:,:cols-1] = True + + ax = sns.heatmap(df, mask=mask, annot=False, cmap=cmap, cbar=False, + linewidths=2, linecolor='white', fmt='.0f', + vmin=df['support'].min(), + vmax=df['support'].sum(), + norm=mpl.colors.Normalize(vmin=df['support'].min(), + vmax=df['support'].sum()) + ) + + cmap_min, cmap_max = cbar.cmap(0), cbar.cmap(1.0) + for i in range(rows): + j = cols-1 + text = f"{df.iloc[i, j]:.0f}" #if (j 0.5 else cmap_max) + + plt.title(title) + plt.xticks(rotation = 45) + plt.yticks(rotation = 360) + plt.tight_layout() + + if (save_fig_path != None): + path = Path(save_fig_path) + path.parent.mkdir(parents=True, exist_ok=True) + fig.savefig(save_fig_path, bbox_inches='tight') + + return fig, ax + + + + + +def plot_roc_curve( + y_true: np.ndarray, + y_score: np.ndarray, + figsize=(5,5), + save_fig_path=None, + confidence_interval: float=0.95, + highlight_roc_area=True, + n_bootstraps=None) -> Figure: + """ + Creates a ROC curve for a binary classifier. Includes the option for bootstrapping. + + Parameters + ---------- + y_true : np.ndarray + The actual labels of the data. Either 0 or 1. + y_score : np.ndarray + The output scores of the classifier. Between 0 and 1. + figsize : tuple, optional + The size of the figure. By default (5,5). + save_fig_path : str, optional + Path to folder where the figure should be saved. If None then plot is not saved, by default None. E.g. 'figures/roc_curve.png'. + confidence_interval : float, optional + The confidence interval to use for the calibration plot. By default 0.95. Between 0 and 1. Has no effect when not using n_bootstraps. + highlight_roc_area : bool, optional + Whether to highlight the area under the ROC curve. By default True. Has no effect when using n_bootstraps. + n_bootstraps : int, optional + Number of bootstrap samples to use for the calibration plot. Recommended minimum: 1000, moderate: 5000-10000, high: 50000-100000. + If None, then no bootstrapping is done. By default None. + + Returns + ------- + fig : matplotlib.pyplot figure + The figure of the calibration plot + """ + + # create figure + fig = plt.figure(figsize=figsize) + ax = fig.add_subplot(111) + + if n_bootstraps is None: + base_fpr, mean_tprs, thresholds = roc_curve(y_true, y_score) + mean_auc = auc(base_fpr, mean_tprs) + if highlight_roc_area is True: + plt.fill_between(base_fpr, 0, mean_tprs, alpha=0.2, zorder=2) + if confidence_interval is not None: + print('Warning: confidence_intervals is not None, but n_bootstraps is None. Confidence intervals will not be plotted.') + else: + # Bootstrapping for AUROC + bootstrap_aucs, bootstrap_tprs = [], [] + base_fpr = np.linspace(0, 1, 101) + for _ in tqdm(range(n_bootstraps), desc='Bootstrapping'): + indices = resample(np.arange(len(y_true)), replace=True) + fpr_i, tpr_i, _ = roc_curve(y_true[indices], y_score[indices]) + roc_auc_i = auc(fpr_i, tpr_i) + bootstrap_aucs.append(roc_auc_i) + + # Interpolate tpr_i to base_fpr, so we have the tpr for the same fpr values for each bootstrap iteration + tpr_i_interp = np.interp(base_fpr, fpr_i, tpr_i) + tpr_i_interp[0] = 0.0 + bootstrap_tprs.append(tpr_i_interp) + + mean_auc = np.mean(bootstrap_aucs) + tprs = np.array(bootstrap_tprs) + mean_tprs = tprs.mean(axis=0) + + # visualize confidence intervals + if confidence_interval is not None: + CI_upper = confidence_interval + (1-confidence_interval)/2 + CI_lower = (1-confidence_interval)/2 + tprs_upper = np.quantile(tprs, CI_upper, axis=0) + tprs_lower = np.quantile(tprs, CI_lower, axis=0) + auc_upper = np.quantile(bootstrap_aucs, CI_upper) + auc_lower = np.quantile(bootstrap_aucs, CI_lower) + label = f'{confidence_interval:.0%} CI: [{auc_lower:.2f}, {auc_upper:.2f}]' + plt.fill_between(base_fpr, tprs_lower, tprs_upper, alpha=0.3, label=label, zorder=2) + + if highlight_roc_area is True: + print('Warning: highlight_roc_area is True, but n_bootstraps is not None. The area under the ROC curve will not be highlighted.') + + plt.plot(base_fpr, mean_tprs, label=f'ROC curve (AUROC = {mean_auc:.2f})', zorder=3) + plt.plot([0, 1], [0, 1], 'k--', label='Random classifier') + plt.xlim([0.0, 1.01]) + plt.ylim([-0.01, 1.01]) + plt.xlabel('False Positive Rate') + plt.ylabel('True Positive Rate') + plt.title('Receiver Operating Characteristic (ROC)') + # reverse legend entry order + handles, labels = plt.gca().get_legend_handles_labels() + handles = handles[::-1] + labels = labels[::-1] + plt.legend(handles, labels, loc="lower right", frameon=False) + ax.spines[:].set_visible(False) + ax.grid(True, linestyle='-', linewidth=0.5, color='grey', alpha=0.5) + ax.set_yticks(np.arange(0, 1.1, 0.2)) + plt.tight_layout() + + if save_fig_path: + path = Path(save_fig_path) + path.parent.mkdir(parents=True, exist_ok=True) + fig.savefig(save_fig_path, bbox_inches='tight') + + return fig + + + +def plot_calibration_curve(y_prob: np.ndarray, y_true: np.ndarray, save_fig_path=None): + """ + Creates calibration plot for a binary classifier. + + Parameters + ---------- + y_prob : np.ndarray + The output probabilities of the classifier. Between 0 and 1. + y_true : np.ndarray + The actual labels of the data. Either 0 or 1. + save_fig_path : _type_, optional + Path to folder where the figure should be saved. If None then plot is not saved, by default None + + Returns + ------- + fig : matplotlib.pyplot figure + The figure of the calibration plot + """ + prob_true, prob_pred = calibration_curve(y_true, y_prob, n_bins=10, strategy='uniform') + expected_cal_error = np.abs(prob_pred-prob_true).mean().round(2) + fig = plt.figure(figsize=(5,5)) + ax = fig.add_subplot(111) + + # Calculate bar width + bar_width = (prob_pred[1:] - prob_pred[:-1]).mean() * 0.75 + + # Plotting + ax.bar(prob_pred, prob_true, width=bar_width, zorder=3, facecolor=to_rgba('C0',0.75), edgecolor='midnightblue', linewidth=2, label=f'True Calibration') + ax.bar(prob_pred, prob_pred - prob_true, bottom=prob_true, width=bar_width, zorder=3, alpha=0.5, edgecolor='red', fill=False, linewidth=2, label=f'Mean ECE = {expected_cal_error}', hatch='//') + ax.plot([0, 1], [0, 1], linestyle='--', color='grey', zorder=3, label='Perfect Calibration') + + # Labels and titles + ax.set(xlabel='Predicted probability', ylabel='True probability') + plt.xlim([0.0, 1.005]) + plt.ylim([-0.01, 1.0]) + ax.legend(loc='upper left', frameon=False) + + # show y-grid + ax.spines[:].set_visible(False) + ax.grid(True, linestyle='-', linewidth=0.5, color='grey', alpha=0.5) + ax.set_yticks(np.arange(0, 1.1, 0.2)) + ax.set_xticks(np.arange(0, 1.1, 0.2)) + plt.tight_layout() + + # save plot + if (save_fig_path != None): + path = Path(save_fig_path) + path.parent.mkdir(parents=True, exist_ok=True) + fig.savefig(save_fig_path, bbox_inches='tight') + + return fig + + +def plot_y_prob_histogram(y_prob: np.ndarray, save_fig_path=None) -> Figure: + fig = plt.figure(figsize=(5,5)) + ax = fig.add_subplot(111) + ax.hist(y_prob, bins=10, alpha=0.9, edgecolor='midnightblue', linewidth=2, rwidth=1) + # same histogram as above, but with border lines + # ax.hist(y_prob, bins=10, alpha=0.5, edgecolor='black', linewidth=1.2) + ax.set(xlabel='Predicted probability [-]', ylabel='Count [-]', xlim=(-0.01, 1.0)) + ax.set_title('Histogram of predicted probabilities') + + ax.spines[:].set_visible(False) + ax.grid(True, linestyle='-', linewidth=0.5, color='grey', alpha=0.5) + ax.set_xticks(np.arange(0, 1.1, 0.2)) + plt.tight_layout() + + # save plot + if (save_fig_path != None): + path = Path(save_fig_path) + path.parent.mkdir(parents=True, exist_ok=True) + fig.savefig(save_fig_path, bbox_inches='tight') + + return fig + + + +def plot_pr_curve( + y_true: np.ndarray, + y_score: np.ndarray, + figsize=(5,5), + save_fig_path: Optional[str]=None, + color: Optional[str]= None, + label: Optional[str]=None, + title: Optional[str]=None + ) -> Figure: + """ + Visualize the Precision-Recall curve for a binary classifier. + + Parameters + ---------- + y_true : np.ndarray + The actual labels of the data. Either 0 or 1. + y_score : np.ndarray + The output scores of the classifier. Between 0 and 1. + figsize : tuple, optional + The size of the figure. By default (5,5). + save_fig_path : str, optional + Path to folder where the figure should be saved. If None then plot is not saved, by default None. E.g. 'figures/pr_curve.png'. + color : str, optional + Color of the PR curve, by default None. + label : str, optional + Custom label for the plot. If None, a default label is used. By default None. + + Returns + ------- + fig : matplotlib.pyplot figure + The figure of the PR curve + """ + + # Create a new figure + fig = plt.figure(figsize=figsize) + ax = fig.add_subplot(111) + + # Compute Precision-Recall curve and area for each class + precision, recall, _ = precision_recall_curve(y_true, y_score) + + pr_auc = auc(recall, precision) + + if label is None: + # Use a default label if none is provided + label = 'PR curve' + + label += f' (area = {pr_auc:.3f})' + + # Plot Precision-Recall curve + ax.plot(recall, precision, label=label, color=color) + ax.set_xlim([0.0, 1.01]) + ax.set_ylim([-0.01, 1.01]) + ax.set_xlabel('Recall') + ax.set_ylabel('Precision') + if title is not None: + ax.set_title(title) + ax.legend(loc="lower right") + ax.spines[:].set_visible(False) + ax.grid(True, linestyle='-', linewidth=0.5, color='grey', alpha=0.5) + ax.set_yticks(np.arange(0, 1.1, 0.2)) + plt.tight_layout() + + # Save the figure if save_fig_path is specified + if save_fig_path: + plt.savefig(save_fig_path, bbox_inches='tight') + + return fig + diff --git a/plotsandgraphs/compare_distributions.py b/plotsandgraphs/compare_distributions.py new file mode 100644 index 0000000..943c3e0 --- /dev/null +++ b/plotsandgraphs/compare_distributions.py @@ -0,0 +1,103 @@ +import numpy as np +import matplotlib.pyplot as plt +import matplotlib as mpl +import pandas as pd +from typing import List, Tuple + + +def plot_raincloud(df: pd.DataFrame, + x_col: str, + y_col: str, + colors: List[str] = None, + order: List[str] = None, + title: str = None, + x_label: str = None, + x_range: Tuple[float, float] = None, + show_violin = True, + show_scatter = True, + show_boxplot = True): + + """ + Generate a raincloud plot using Pandas DataFrame. + + Parameters: + - df (pd.DataFrame): The data frame containing the data. + - x_col (str): The column name for the x-axis data. + - y_col (str): The column name for the y-axis categories. + - colors (List[str], optional): List of colors for each category. Defaults to tab10 cmap. + - order (List[str], optional): Order of categories on y-axis. Defaults to unique values in y_col. + - title (str, optional): Title of the plot. + - x_label (str, optional): Label for the x-axis. + - x_range (Tuple[float, float], optional): Range for the x-axis. + - show_violin (bool, optional): Whether to show violin plot. Defaults to True. + - show_scatter (bool, optional): Whether to show scatter plot. Defaults to True. + - show_boxplot (bool, optional): Whether to show boxplot. Defaults to True. + + Returns: + - matplotlib.figure.Figure: The generated plot figure. + """ + + fig, ax = plt.subplots(figsize=(16, 8)) + offset = 0.2 # Offset value to move plots + + if order is None: + order = df[y_col].unique() + + # if colors are none, use distinct colors for each group + if colors is None: + cmap = plt.get_cmap('tab10') + colors = [mpl.colors.to_hex(cmap(i)) for i in np.linspace(0, 1, len(order))] + else: + assert len(colors) == len(order), 'colors and order must be the same length' + colors = colors + + # Boxplot + if show_boxplot: + bp = ax.boxplot([df[df[y_col] == grp][x_col].values for grp in order], + patch_artist=True, vert=False, positions=np.arange(1 + offset, len(order) + 1 + offset), widths=0.2) + + # Customize boxplot colors + for patch, color in zip(bp['boxes'], colors): + patch.set_facecolor(color) + patch.set_alpha(0.8) + + # Set median line color to black + for median in bp['medians']: + median.set_color('black') + + # Violinplot + if show_violin: + vp = ax.violinplot([df[df[y_col] == grp][x_col].values for grp in order], + positions=np.arange(1 + offset, len(order) + 1 + offset), showmeans=False, showextrema=False, showmedians=False, vert=False) + + # Customize violinplot colors + for idx, b in enumerate(vp['bodies']): + b.get_paths()[0].vertices[:, 1] = np.clip(b.get_paths()[0].vertices[:, 1], idx + 1 + offset, idx + 2 + offset) + b.set_color(colors[idx]) + + # Scatterplot with jitter + if show_scatter: + for idx, grp in enumerate(order): + features = df[df[y_col] == grp][x_col].values + y = np.full(len(features), idx + 1 - offset) + jitter_amount = 0.12 + y += np.random.uniform(low=-jitter_amount, high=jitter_amount, size=len(y)) + plt.scatter(features, y, s=10, c=colors[idx], alpha=0.3, facecolors='none') + + # Labels + plt.yticks(np.arange(1, len(order) + 1), order) + + if x_label is None: + x_label = x_col + plt.xlabel(x_label) + if title: + plt.title(title + '\n') + ax.spines['top'].set_visible(False) + ax.spines['right'].set_visible(False) + ax.spines['left'].set_visible(False) + ax.xaxis.grid(True) + + if x_range: + plt.xlim(x_range) + + return fig diff --git a/requirements-text.txt b/requirements-test.txt similarity index 100% rename from requirements-text.txt rename to requirements-test.txt From 9defcfd570b954291c3777d7a206b5ce2fada2d0 Mon Sep 17 00:00:00 2001 From: Joshua Wendland Date: Thu, 9 Nov 2023 18:40:48 +0100 Subject: [PATCH 06/16] update github workflows --- .github/release_message.sh | 3 + .github/workflows/main.yml | 138 ++++++++++++++++++++++++++++------ .github/workflows/pylint.yml | 2 +- .github/workflows/release.yml | 50 ++++++++++++ plotsandgraphs/VERSION | 2 +- plotsandgraphs/__init__.py | 2 + 6 files changed, 174 insertions(+), 23 deletions(-) create mode 100644 .github/release_message.sh create mode 100644 .github/workflows/release.yml diff --git a/.github/release_message.sh b/.github/release_message.sh new file mode 100644 index 0000000..cf04476 --- /dev/null +++ b/.github/release_message.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +previous_tag=$(git tag --sort=-creatordate | sed -n 2p) +git shortlog "${previous_tag}.." | s \ No newline at end of file diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 0f1ba9c..8cd38d7 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,25 +1,121 @@ -name: API workflow +# name: API workflow -on: [push, pull_request] +# on: [push, pull_request] + +# jobs: +# build: +# runs-on: ubuntu-latest +# name: Test python API +# steps: +# - uses: actions/checkout@v1 +# - name: Install requirements +# run: pip install -r requirements.txt +# - name: Run tests and collect coverage +# run: pytest --cov . +# - name: Upload coverage reports to Codecov +# run: | +# # Replace `linux` below with the appropriate OS +# # Options are `alpine`, `linux`, `macos`, `windows` +# curl -Os https://uploader.codecov.io/latest/linux/codecov +# chmod +x codecov +# ./codecov -t ${CODECOV_TOKEN} +# - name: Upload coverage reports to Codecov +# uses: codecov/codecov-action@v3 +# env: +# CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + + + + +# This is a basic workflow to help you get started with Actions + +name: CI + +# Controls when the workflow will run +on: + # Triggers the workflow on push or pull request events but only for the main branch + push: + branches: [ main ] + pull_request: + branches: [ main ] + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: jobs: - build: - runs-on: ubuntu-latest - name: Test python API + linter: + strategy: + fail-fast: false + matrix: + python-version: [3.9] + os: [ubuntu-latest] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install project + run: make install + - name: Run linter + run: make lint + + tests_linux: + needs: linter + strategy: + fail-fast: false + matrix: + python-version: [3.9] + os: [ubuntu-latest] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install project + run: make install + - name: Run tests + run: make test + - name: "Upload coverage to Codecov" + uses: codecov/codecov-action@v3 + # with: + # fail_ci_if_error: true + + tests_mac: + needs: linter + strategy: + fail-fast: false + matrix: + python-version: [3.9] + os: [macos-latest] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install project + run: make install + - name: Run tests + run: make test + + tests_win: + needs: linter + strategy: + fail-fast: false + matrix: + python-version: [3.9] + os: [windows-latest] + runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v1 - - name: Install requirements - run: pip install -r requirements.txt - - name: Run tests and collect coverage - run: pytest --cov . - - name: Upload coverage reports to Codecov - run: | - # Replace `linux` below with the appropriate OS - # Options are `alpine`, `linux`, `macos`, `windows` - curl -Os https://uploader.codecov.io/latest/linux/codecov - chmod +x codecov - ./codecov -t ${CODECOV_TOKEN} - - name: Upload coverage reports to Codecov - uses: codecov/codecov-action@v3 - env: - CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install Pip + run: pip install --user --upgrade pip + - name: Install project + run: pip install -e .[test] + - name: run tests + run: pytest -s -vvvv -l --tb=long tests \ No newline at end of file diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index 7a88e1d..867fd32 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -1,6 +1,6 @@ name: Pylint -on: [push] +on: [push, pull_request] jobs: build: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..06ea5bc --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,50 @@ +name: Upload Python Package + +on: + push: + # Sequence of patterns matched against refs/tags + tags: + - '*' # Push events to matching v*, i.e. v1.0, v20.15.10 + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +jobs: + release: + name: Create Release + runs-on: ubuntu-latest + permissions: + contents: write + steps: + - uses: actions/checkout@v3 + with: + # by default, it uses a depth of 1 + # this fetches all history so that we can read each commit + fetch-depth: 0 + - name: Generate Changelog + run: .github/release_message.sh > release_message.md + - name: Release + uses: softprops/action-gh-release@v1 + with: + body_path: release_message.md + + deploy: + needs: release + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools wheel twine + - name: Build and publish + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} + run: | + python setup.py sdist bdist_wheel + twine upload dist/* \ No newline at end of file diff --git a/plotsandgraphs/VERSION b/plotsandgraphs/VERSION index 6c6aa7c..9a26661 100644 --- a/plotsandgraphs/VERSION +++ b/plotsandgraphs/VERSION @@ -1 +1 @@ -0.1.0 \ No newline at end of file +0.1.01 \ No newline at end of file diff --git a/plotsandgraphs/__init__.py b/plotsandgraphs/__init__.py index e69de29..e7f5024 100644 --- a/plotsandgraphs/__init__.py +++ b/plotsandgraphs/__init__.py @@ -0,0 +1,2 @@ +from . import binary_classifier +from . import compare_distributions \ No newline at end of file From 5293e2edc2d1b0668f316ceffcda016b29a05f77 Mon Sep 17 00:00:00 2001 From: Joshua Wendland Date: Thu, 9 Nov 2023 19:14:53 +0100 Subject: [PATCH 07/16] fix linter workflow by installing linter --- .github/workflows/main.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 8cd38d7..ca34fcf 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -56,7 +56,10 @@ jobs: with: python-version: ${{ matrix.python-version }} - name: Install project - run: make install + run: | + make virtualenv + source .venv/bin/activate + make install - name: Run linter run: make lint From bc0094de193410a92422e937541c2584147f3524 Mon Sep 17 00:00:00 2001 From: Joshua Wendland Date: Thu, 9 Nov 2023 19:21:36 +0100 Subject: [PATCH 08/16] debugging --- .github/workflows/main.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ca34fcf..9055674 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -55,6 +55,10 @@ jobs: - uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} + - name: Debugging + run: | + ls -la + cat Makefile - name: Install project run: | make virtualenv From f1c0fccf015d848a1ff25d8f42b16f65aa4c9dd1 Mon Sep 17 00:00:00 2001 From: Joshua Wendland Date: Thu, 9 Nov 2023 19:44:54 +0100 Subject: [PATCH 09/16] bug fix? --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 9055674..6751f6f 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -58,7 +58,7 @@ jobs: - name: Debugging run: | ls -la - cat Makefile + cat MAKEFILE - name: Install project run: | make virtualenv From 6b8a6b7e1ab827a44005fc56420aa5ac29c0feb3 Mon Sep 17 00:00:00 2001 From: Joshua Wendland Date: Thu, 9 Nov 2023 19:47:52 +0100 Subject: [PATCH 10/16] bug fix again? --- .github/workflows/main.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 6751f6f..63c25c0 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -59,6 +59,7 @@ jobs: run: | ls -la cat MAKEFILE + make -f MAKEFILE virtualenv - name: Install project run: | make virtualenv From 15d984e5d2d254af1d1f284d46b5f114c88db6bb Mon Sep 17 00:00:00 2001 From: Joshua Wendland Date: Thu, 9 Nov 2023 19:56:08 +0100 Subject: [PATCH 11/16] renamed makefile --- .github/workflows/main.yml | 74 +++++++++++++++++++------------------- MAKEFILE => Makefile | 0 2 files changed, 37 insertions(+), 37 deletions(-) rename MAKEFILE => Makefile (100%) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 63c25c0..a71ff32 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -59,7 +59,7 @@ jobs: run: | ls -la cat MAKEFILE - make -f MAKEFILE virtualenv + make virtualenv - name: Install project run: | make virtualenv @@ -90,40 +90,40 @@ jobs: # with: # fail_ci_if_error: true - tests_mac: - needs: linter - strategy: - fail-fast: false - matrix: - python-version: [3.9] - os: [macos-latest] - runs-on: ${{ matrix.os }} - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - name: Install project - run: make install - - name: Run tests - run: make test + # tests_mac: + # needs: linter + # strategy: + # fail-fast: false + # matrix: + # python-version: [3.9] + # os: [macos-latest] + # runs-on: ${{ matrix.os }} + # steps: + # - uses: actions/checkout@v3 + # - uses: actions/setup-python@v4 + # with: + # python-version: ${{ matrix.python-version }} + # - name: Install project + # run: make install + # - name: Run tests + # run: make test - tests_win: - needs: linter - strategy: - fail-fast: false - matrix: - python-version: [3.9] - os: [windows-latest] - runs-on: ${{ matrix.os }} - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - name: Install Pip - run: pip install --user --upgrade pip - - name: Install project - run: pip install -e .[test] - - name: run tests - run: pytest -s -vvvv -l --tb=long tests \ No newline at end of file + # tests_win: + # needs: linter + # strategy: + # fail-fast: false + # matrix: + # python-version: [3.9] + # os: [windows-latest] + # runs-on: ${{ matrix.os }} + # steps: + # - uses: actions/checkout@v3 + # - uses: actions/setup-python@v4 + # with: + # python-version: ${{ matrix.python-version }} + # - name: Install Pip + # run: pip install --user --upgrade pip + # - name: Install project + # run: pip install -e .[test] + # - name: run tests + # run: pytest -s -vvvv -l --tb=long tests \ No newline at end of file diff --git a/MAKEFILE b/Makefile similarity index 100% rename from MAKEFILE rename to Makefile From ec29e4e553ace404408ca13eae08d7f60555eb81 Mon Sep 17 00:00:00 2001 From: Joshua Wendland Date: Thu, 9 Nov 2023 19:57:06 +0100 Subject: [PATCH 12/16] bug fix --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index a71ff32..1dfcdd8 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -58,7 +58,7 @@ jobs: - name: Debugging run: | ls -la - cat MAKEFILE + cat Makefile make virtualenv - name: Install project run: | From 9b25ec65a6ed554a253f8189601b8e26e809529a Mon Sep 17 00:00:00 2001 From: Joshua Wendland Date: Thu, 9 Nov 2023 20:00:09 +0100 Subject: [PATCH 13/16] add requirements-test.txt --- requirements-test.txt | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/requirements-test.txt b/requirements-test.txt index a7baf9d..f660f6a 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,6 +1,10 @@ -matplotlib -numpy -pandas -seaborn -scikit-learn -tqdm \ No newline at end of file +# This requirements are for development and testing only, not for production. +pytest +coverage +flake8 +black +isort +pytest-cov +mypy +gitchangelog +mkdocs \ No newline at end of file From 68181b86271557195d9b61d153226ce8f2cfd028 Mon Sep 17 00:00:00 2001 From: Joshua Wendland Date: Fri, 10 Nov 2023 09:57:05 +0100 Subject: [PATCH 14/16] bugfixes mypy --- plotsandgraphs/binary_classifier.py | 2 +- plotsandgraphs/compare_distributions.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/plotsandgraphs/binary_classifier.py b/plotsandgraphs/binary_classifier.py index 70e5069..e6fd62f 100644 --- a/plotsandgraphs/binary_classifier.py +++ b/plotsandgraphs/binary_classifier.py @@ -137,7 +137,7 @@ def plot_classification_report(y_test: np.ndarray, bounds = np.linspace(0, 1, 11) cmap = plt.cm.get_cmap('YlOrRd', len(bounds)+1) - norm = colors.BoundaryNorm(bounds, cmap.N) + norm = colors.BoundaryNorm(bounds, cmap.N) # type: ignore[attr-defined] ax = sns.heatmap(df, mask=mask, annot=False, cmap=cmap, fmt='.3g', cbar_kws={'ticks':bounds[::2], 'norm':norm, 'boundaries':bounds}, diff --git a/plotsandgraphs/compare_distributions.py b/plotsandgraphs/compare_distributions.py index 943c3e0..c9cbda2 100644 --- a/plotsandgraphs/compare_distributions.py +++ b/plotsandgraphs/compare_distributions.py @@ -2,17 +2,17 @@ import matplotlib.pyplot as plt import matplotlib as mpl import pandas as pd -from typing import List, Tuple +from typing import List, Tuple, Optional def plot_raincloud(df: pd.DataFrame, x_col: str, y_col: str, - colors: List[str] = None, - order: List[str] = None, - title: str = None, - x_label: str = None, - x_range: Tuple[float, float] = None, + colors: Optional[List[str]] = None, + order: Optional[List[str]] = None, + title: Optional[str] = None, + x_label: Optional[str] = None, + x_range: Optional[Tuple[float, float]] = None, show_violin = True, show_scatter = True, show_boxplot = True): From dcdf1a23fbfdc18a0ce5385c2d6df2be9556628f Mon Sep 17 00:00:00 2001 From: Joshua Wendland Date: Fri, 10 Nov 2023 10:11:05 +0100 Subject: [PATCH 15/16] small bug fixes --- Makefile | 4 ++++ plotsandgraphs/binary_classifier.py | 8 ++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index e0641ae..b541671 100644 --- a/Makefile +++ b/Makefile @@ -32,9 +32,13 @@ fmt: ## Format code using black & isort. .PHONY: lint lint: ## Run pep8, black, mypy linters. + @echo "Running linters ..." + @echo "--- Running flake8 ---" $(ENV_PREFIX)flake8 plotsandgraphs/ + @echo "--- Running black ---" $(ENV_PREFIX)black -l 79 --check plotsandgraphs/ $(ENV_PREFIX)black -l 79 --check tests/ + @echo "--- Running mypy ---" $(ENV_PREFIX)mypy --ignore-missing-imports plotsandgraphs/ .PHONY: test diff --git a/plotsandgraphs/binary_classifier.py b/plotsandgraphs/binary_classifier.py index e6fd62f..3c69486 100644 --- a/plotsandgraphs/binary_classifier.py +++ b/plotsandgraphs/binary_classifier.py @@ -35,7 +35,7 @@ def plot_accuracy(y_true, y_pred, name='', save_fig_path=None) -> Figure: path = Path(save_fig_path) path.parent.mkdir(parents=True, exist_ok=True) fig.savefig(save_fig_path, bbox_inches='tight') - return fig, accuracy + return fig def plot_confusion_matrix(y_true: np.ndarray, y_pred: np.ndarray, save_fig_path=None) -> Figure: import matplotlib.colors as colors @@ -136,7 +136,7 @@ def plot_classification_report(y_test: np.ndarray, mask[:,cols-1] = True bounds = np.linspace(0, 1, 11) - cmap = plt.cm.get_cmap('YlOrRd', len(bounds)+1) + cmap = plt.cm.get_cmap('YlOrRd', len(bounds)+1) # type: ignore norm = colors.BoundaryNorm(bounds, cmap.N) # type: ignore[attr-defined] ax = sns.heatmap(df, mask=mask, annot=False, cmap=cmap, fmt='.3g', @@ -428,8 +428,8 @@ def plot_pr_curve( # Plot Precision-Recall curve ax.plot(recall, precision, label=label, color=color) - ax.set_xlim([0.0, 1.01]) - ax.set_ylim([-0.01, 1.01]) + ax.set_xlim((0.0, 1.01)) + ax.set_ylim((-0.01, 1.01)) ax.set_xlabel('Recall') ax.set_ylabel('Precision') if title is not None: From 66a59e0488ab74894c2ae7744998ef70ea02ef4b Mon Sep 17 00:00:00 2001 From: Joshua Wendland Date: Fri, 10 Nov 2023 10:48:20 +0100 Subject: [PATCH 16/16] add pyproject.toml file --- pyproject.toml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 pyproject.toml diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..d390ec0 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,5 @@ +[tool.pylint."FORMAT"] +max-line-length = 120 + +[tool.pylint."BASIC"] +variable-rgx = "[a-z_][a-z0-9_]{0,30}$|[a-z0-9_]+([A-Z][a-z0-9_]+)*$" # Allow snake case and camel case for variable names