From bf21bdad4f659b25e3b6274a67f7ead41f5ffb1f Mon Sep 17 00:00:00 2001
From: Joshua Wendland <joshua.wendland@tum.de>
Date: Wed, 8 Nov 2023 16:33:37 +0100
Subject: [PATCH 01/16] add makefile and version

---
 MAKEFILE               | 122 +++++++++++++++++++++++++++++++++++++++++
 plotsandgraphs/VERSION |   1 +
 2 files changed, 123 insertions(+)
 create mode 100644 MAKEFILE
 create mode 100644 plotsandgraphs/VERSION
diff --git a/MAKEFILE b/MAKEFILE
new file mode 100644
index 0000000..e0641ae
--- /dev/null
+++ b/MAKEFILE
@@ -0,0 +1,122 @@
+.ONESHELL:
+ENV_PREFIX=$(shell python -c "if __import__('pathlib').Path('.venv/bin/pip').exists(): print('.venv/bin/')")
+USING_POETRY=$(shell grep "tool.poetry" pyproject.toml && echo "yes")
+
+.PHONY: help
+help:             ## Show the help.
+	@echo "Usage: make <target>"
+	@echo ""
+	@echo "Targets:"
+	@fgrep "##" Makefile | fgrep -v fgrep
+
+
+.PHONY: show
+show:             ## Show the current environment.
+	@echo "Current environment:"
+	@if [ "$(USING_POETRY)" ]; then poetry env info && exit; fi
+	@echo "Running using $(ENV_PREFIX)"
+	@$(ENV_PREFIX)python -V
+	@$(ENV_PREFIX)python -m site
+
+.PHONY: install
+install:          ## Install the project in dev mode.
+	@if [ "$(USING_POETRY)" ]; then poetry install && exit; fi
+	@echo "Don't forget to run 'make virtualenv' if you got errors."
+	$(ENV_PREFIX)pip install -e .[test]
+
+.PHONY: fmt
+fmt:              ## Format code using black & isort.
+	$(ENV_PREFIX)isort plotsandgraphs/
+	$(ENV_PREFIX)black -l 79 plotsandgraphs/
+	$(ENV_PREFIX)black -l 79 tests/
+
+.PHONY: lint
+lint:             ## Run pep8, black, mypy linters.
+	$(ENV_PREFIX)flake8 plotsandgraphs/
+	$(ENV_PREFIX)black -l 79 --check plotsandgraphs/
+	$(ENV_PREFIX)black -l 79 --check tests/
+	$(ENV_PREFIX)mypy --ignore-missing-imports plotsandgraphs/
+
+.PHONY: test
+test: lint        ## Run tests and generate coverage report.
+	$(ENV_PREFIX)pytest -v --cov-config .coveragerc --cov=plotsandgraphs -l --tb=short --maxfail=1 tests/
+	$(ENV_PREFIX)coverage xml
+	$(ENV_PREFIX)coverage html
+
+.PHONY: watch
+watch:            ## Run tests on every change.
+	ls **/**.py | entr $(ENV_PREFIX)pytest -s -vvv -l --tb=long --maxfail=1 tests/
+
+.PHONY: clean
+clean:            ## Clean unused files.
+	@find ./ -name '*.pyc' -exec rm -f {} \;
+	@find ./ -name '__pycache__' -exec rm -rf {} \;
+	@find ./ -name 'Thumbs.db' -exec rm -f {} \;
+	@find ./ -name '*~' -exec rm -f {} \;
+	@rm -rf .cache
+	@rm -rf .pytest_cache
+	@rm -rf .mypy_cache
+	@rm -rf build
+	@rm -rf dist
+	@rm -rf *.egg-info
+	@rm -rf htmlcov
+	@rm -rf .tox/
+	@rm -rf docs/_build
+
+.PHONY: virtualenv
+virtualenv:       ## Create a virtual environment.
+	@if [ "$(USING_POETRY)" ]; then poetry install && exit; fi
+	@echo "creating virtualenv ..."
+	@rm -rf .venv
+	@python3 -m venv .venv
+	@./.venv/bin/pip install -U pip
+	@./.venv/bin/pip install -e .[test]
+	@echo
+	@echo "!!! Please run 'source .venv/bin/activate' to enable the environment !!!"
+
+.PHONY: release
+release:          ## Create a new tag for release.
+	@echo "WARNING: This operation will create s version tag and push to github"
+	@read -p "Version? (provide the next x.y.z semver) : " TAG
+	@echo "$${TAG}" > plotsandgraphs/VERSION
+	@$(ENV_PREFIX)gitchangelog > HISTORY.md
+	@git add plotsandgraphs/VERSION HISTORY.md
+	@git commit -m "release: version $${TAG} 🚀"
+	@echo "creating git tag : $${TAG}"
+	@git tag $${TAG}
+	@git push -u origin HEAD --tags
+	@echo "Github Actions will detect the new tag and release the new version."
+
+.PHONY: docs
+docs:             ## Build the documentation.
+	@echo "building documentation ..."
+	@$(ENV_PREFIX)mkdocs build
+	URL="site/index.html"; xdg-open $$URL || sensible-browser $$URL || x-www-browser $$URL || gnome-open $$URL || open $$URL
+
+.PHONY: switch-to-poetry
+switch-to-poetry: ## Switch to poetry package manager.
+	@echo "Switching to poetry ..."
+	@if ! poetry --version > /dev/null; then echo 'poetry is required, install from https://python-poetry.org/'; exit 1; fi
+	@rm -rf .venv
+	@poetry init --no-interaction --name=a_flask_test --author=rochacbruno
+	@echo "" >> pyproject.toml
+	@echo "[tool.poetry.scripts]" >> pyproject.toml
+	@echo "plotsandgraphs = 'plotsandgraphs.__main__:main'" >> pyproject.toml
+	@cat requirements.txt | while read in; do poetry add --no-interaction "$${in}"; done
+	@cat requirements-test.txt | while read in; do poetry add --no-interaction "$${in}" --dev; done
+	@poetry install --no-interaction
+	@mkdir -p .github/backup
+	@mv requirements* .github/backup
+	@mv setup.py .github/backup
+	@echo "You have switched to https://python-poetry.org/ package manager."
+	@echo "Please run 'poetry shell' or 'poetry run plotsandgraphs'"
+
+.PHONY: init
+init:             ## Initialize the project based on an application template.
+	@./.github/init.sh
+
+
+# This project has been generated from rochacbruno/python-project-template
+# __author__ = 'rochacbruno'
+# __repo__ = https://github.com/rochacbruno/python-project-template
+# __sponsor__ = https://github.com/sponsors/rochacbruno/
\ No newline at end of file
diff --git a/plotsandgraphs/VERSION b/plotsandgraphs/VERSION
new file mode 100644
index 0000000..6c6aa7c
--- /dev/null
+++ b/plotsandgraphs/VERSION
@@ -0,0 +1 @@
+0.1.0
\ No newline at end of file

From 439543a6ae6dedc78e275d0980aca09dd389a9fd Mon Sep 17 00:00:00 2001
From: Joshua Wendland <joshua.wendland@tum.de>
Date: Wed, 8 Nov 2023 17:10:21 +0100
Subject: [PATCH 02/16] Add initial unit test

---
 tests/__init__.py  | 0
 tests/test_test.py | 4 ++++
 2 files changed, 4 insertions(+)
 create mode 100644 tests/__init__.py
 create mode 100644 tests/test_test.py

diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_test.py b/tests/test_test.py
new file mode 100644
index 0000000..b2820fc
--- /dev/null
+++ b/tests/test_test.py
@@ -0,0 +1,4 @@
+# This is just a test for a test
+
+def test_test():
+    assert True
\ No newline at end of file

From 32162a1a182fab11aa8653f83da65a08c5b1b781 Mon Sep 17 00:00:00 2001
From: Joshua Wendland <joshua.wendland@tum.de>
Date: Wed, 8 Nov 2023 17:15:23 +0100
Subject: [PATCH 03/16] add requirements-test.txt

---
 requirements-text.txt | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 requirements-text.txt

diff --git a/requirements-text.txt b/requirements-text.txt
new file mode 100644
index 0000000..a7baf9d
--- /dev/null
+++ b/requirements-text.txt
@@ -0,0 +1,6 @@
+matplotlib
+numpy
+pandas
+seaborn
+scikit-learn
+tqdm
\ No newline at end of file

From d0ab1f03c9a1b51e44d5d7fc19676bbd36c5e5a5 Mon Sep 17 00:00:00 2001
From: Joshua Wendland <joshua.wendland@tum.de>
Date: Wed, 8 Nov 2023 17:23:12 +0100
Subject: [PATCH 04/16] update setup.py

---
 setup.py | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 64 insertions(+), 3 deletions(-)

diff --git a/setup.py b/setup.py
index dcc5f73..823dfc7 100644
--- a/setup.py
+++ b/setup.py
@@ -1,7 +1,68 @@
+"""Python setup.py for plotsandgraphs package"""
+import io
+import os
 from setuptools import setup, find_packages
 
+# setup(
+#     name='plotsandgraphs',
+#     version='0.1.0',
+#     packages=find_packages(include=['plotsandgraphs', 'plotsandgraphs.*'])
+# )
+
+PROJECT_NAME = 'plotsandgraphs'
+
+
+
+
+def read(*paths, **kwargs):
+    """Read the contents of a text file safely.
+    >>> read("project_name", "VERSION")
+    '0.1.0'
+    >>> read("README.md")
+    ...
+    """
+
+    content = ""
+    with io.open(
+        os.path.join(os.path.dirname(__file__), *paths),
+        encoding=kwargs.get("encoding", "utf8"),
+    ) as open_file:
+        content = open_file.read().strip()
+    return content
+
+
+def read_requirements(path):
+    return [
+        line.strip()
+        for line in read(path).split("\n")
+        if not line.startswith(('"', "#", "-", "git+"))
+    ]
+
+
 setup(
-    name='plotsandgraphs',
-    version='0.1.0',
-    packages=find_packages(include=['plotsandgraphs', 'plotsandgraphs.*'])
+    name=PROJECT_NAME,
+    version=read(PROJECT_NAME, "VERSION"),
+    description="Create plots and graphs for your Machine Learning projects.",
+    url="https://github.com/joshuawe/plots_and_graphs",
+    long_description=read("README.md"),
+    long_description_content_type="text/markdown",
+    author="Joshua Wendland and Fabian Krüger",
+    packages=find_packages(exclude=["tests", ".github"]),
+    install_requires=read_requirements("requirements.txt"),
+    entry_points={
+        "console_scripts": ["project_name = project_name.__main__:main"]
+    },
+    extras_require={"test": read_requirements("requirements-test.txt")},
+    license='GNU General Public License v3.0',
+    keywords=['plots', 'graphs', 'machine learning', 'data science', 'data visualization', 'data analysis', 'matplotlib'],
+    classifiers=[
+        'Development Status :: 2 - Pre-Alpha',
+        'Environment :: Console',
+        'Intended Audience :: Science/Research',
+        'License :: OSI Approved :: GNU General Public License v3 (GPLv3)',
+        'Natural Language :: English',
+        'Operating System :: OS Independent',
+        'Programming Language :: Python :: 3',
+        'Topic :: Scientific/Engineering :: Artificial Intelligence'
+    ],
 )
\ No newline at end of file

From 2a5d01f8956c948daa8237ed0cc0cc906054e1c9 Mon Sep 17 00:00:00 2001
From: Joshua Wendland <joshua.wendland@tum.de>
Date: Wed, 8 Nov 2023 18:21:02 +0100
Subject: [PATCH 05/16] add the actual source

---
 plotsandgraphs/__init__.py                    |   0
 plotsandgraphs/binary_classifier.py           | 448 ++++++++++++++++++
 plotsandgraphs/compare_distributions.py       | 103 ++++
 ...irements-text.txt => requirements-test.txt |   0
 4 files changed, 551 insertions(+)
 create mode 100644 plotsandgraphs/__init__.py
 create mode 100644 plotsandgraphs/binary_classifier.py
 create mode 100644 plotsandgraphs/compare_distributions.py
 rename requirements-text.txt => requirements-test.txt (100%)

diff --git a/plotsandgraphs/__init__.py b/plotsandgraphs/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/plotsandgraphs/binary_classifier.py b/plotsandgraphs/binary_classifier.py
new file mode 100644
index 0000000..70e5069
--- /dev/null
+++ b/plotsandgraphs/binary_classifier.py
@@ -0,0 +1,448 @@
+import matplotlib.pyplot as plt
+from matplotlib.colors import to_rgba
+from matplotlib.figure import Figure
+import seaborn as sns
+import numpy as np
+import pandas as pd
+from sklearn.metrics import confusion_matrix, classification_report, ConfusionMatrixDisplay, roc_curve, auc, accuracy_score, precision_recall_curve
+from sklearn.calibration import calibration_curve
+from sklearn.utils import resample
+from pathlib import Path
+from tqdm import tqdm
+from typing import Optional
+
+
+def plot_accuracy(y_true, y_pred, name='', save_fig_path=None) -> Figure:
+    """ Really ugly plot, I am not sure if the scalar value for accuracy should receive an entire plot."""
+    accuracy = accuracy_score(y_true, y_pred)
+        
+    # accuracy = 0
+    # for t in range(max_seq_len): 
+    #     accuracy += accuracy_score( y[:,t,0].round()  , y_pred[:,t] )
+    # accuracy = accuracy / max_seq_len
+    fig= plt.figure( figsize=(4,5))
+    plt.bar( np.array([0]), np.array([  accuracy  ]))
+    # axs[0].set_xticks(ticks=range(2))
+    # axs[0].set_xticklabels(["train", "test"])
+    plt.ylabel('Accuracy')
+    plt.ylim([0,1])
+    # axs[0].set_xlabel('Features')
+    title = "Predictor model: {}".format(name )
+    plt.title(title)
+    plt.tight_layout()
+    
+    if (save_fig_path != None):
+        path = Path(save_fig_path)
+        path.parent.mkdir(parents=True, exist_ok=True)
+        fig.savefig(save_fig_path, bbox_inches='tight')
+    return fig, accuracy
+
+def plot_confusion_matrix(y_true: np.ndarray, y_pred: np.ndarray, save_fig_path=None) -> Figure:
+    import matplotlib.colors as colors
+    
+    # Compute the confusion matrix
+    cm = confusion_matrix(y_true, y_pred.round())
+    # normalize the confusion matrix
+    cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
+
+    # Create the ConfusionMatrixDisplay instance and plot it
+    cmd = ConfusionMatrixDisplay(cm, display_labels=['class 0\nnegative', 'class 1\npositive'])
+    fig, ax = plt.subplots(figsize=(4,4))
+    cmd.plot(cmap='YlOrRd', values_format='', colorbar=False, ax=ax, text_kw={'visible':False})
+    cmd.texts_ = []
+    cmd.text_ = []
+
+    text_labels = ['TN', 'FP', 'FN', 'TP']
+    cmap_min, cmap_max = cmd.im_.cmap(0), cmd.im_.cmap(1.0)
+    for i in range(2):
+        for j in range(2):
+            ax.text(j, i, f"{text_labels[i * 2 + j]}\n{cmd.im_.get_array()[i, j]:.2%}",
+                    ha="center", va="center", color=cmap_min if cmd.im_.get_array()[i, j] > 0.5 else cmap_max)
+            
+    ax.vlines([0.5], *ax.get_ylim(), color='white', linewidth=1)
+    ax.hlines([0.49], *ax.get_xlim(), color='white', linewidth=1)
+    ax.spines[:].set_visible(False)
+    
+    
+    bounds = np.linspace(0, 1, 11)
+    cmap = plt.cm.get_cmap('YlOrRd', len(bounds)+1)
+    norm = colors.BoundaryNorm(bounds, cmap.N)
+    cbar = ax.figure.colorbar(cmd.im_, ax=ax, cmap=cmap, norm=norm, boundaries=bounds, ticks=bounds[::2], location="right", shrink=0.8)
+    # cbar.set_ticks(np.arange(0,1.1,0.1))
+    cbar.ax.yaxis.set_ticks_position('both')
+    cbar.outline.set_visible(False)
+    plt.tight_layout()
+    
+    if (save_fig_path != None):
+        path = Path(save_fig_path)
+        path.parent.mkdir(parents=True, exist_ok=True)
+        fig.savefig(save_fig_path, bbox_inches='tight')
+    
+    return fig
+
+
+
+
+def plot_classification_report(y_test: np.ndarray, 
+                               y_pred: np.ndarray, 
+                               title='Classification Report', 
+                               figsize=(8, 4), 
+                               save_fig_path=None, **kwargs):
+    """
+    TODO: save all these plots
+    Plot the classification report of sklearn
+    
+    Parameters
+    ----------
+    y_test : pandas.Series of shape (n_samples,)
+        Targets.
+    y_pred : pandas.Series of shape (n_samples,)
+        Predictions.
+    title : str, default = 'Classification Report'
+        Plot title.
+    fig_size : tuple, default = (8, 6)
+        Size (inches) of the plot.
+    dpi : int, default = 70
+        Image DPI.
+    save_fig_path : str, defaut=None
+        Full path where to save the plot. Will generate the folders if they don't exist already.
+    **kwargs : attributes of classification_report class of sklearn
+    
+    Returns
+    -------
+        fig : Matplotlib.pyplot.Figure
+            Figure from matplotlib
+        ax : Matplotlib.pyplot.Axe
+            Axe object from matplotlib
+    """    
+    import matplotlib as mpl
+    import matplotlib.colors as colors
+    import seaborn as sns
+    import pathlib
+    
+    fig, ax = plt.subplots(figsize=figsize)
+    
+    cmap = 'YlOrRd'
+        
+    clf_report = classification_report(y_test, y_pred, output_dict=True, **kwargs)
+    keys_to_plot = [key for key in clf_report.keys() if key not in ('accuracy', 'macro avg', 'weighted avg')]
+    df = pd.DataFrame(clf_report, columns=keys_to_plot).T
+    #the following line ensures that dataframe are sorted from the majority classes to the minority classes
+    df.sort_values(by=['support'], inplace=True) 
+    
+    #first, let's plot the heatmap by masking the 'support' column
+    rows, cols = df.shape
+    mask = np.zeros(df.shape)
+    mask[:,cols-1] = True
+    
+    bounds = np.linspace(0, 1, 11)
+    cmap = plt.cm.get_cmap('YlOrRd', len(bounds)+1)
+    norm = colors.BoundaryNorm(bounds, cmap.N)
+    
+    ax = sns.heatmap(df, mask=mask, annot=False, cmap=cmap, fmt='.3g',
+            cbar_kws={'ticks':bounds[::2], 'norm':norm, 'boundaries':bounds},
+            vmin=0.0,
+            vmax=1.0,
+            linewidths=2, linecolor='white'
+                    )
+    cbar = ax.collections[0].colorbar
+    cbar.ax.yaxis.set_ticks_position('both')
+    
+    cmap_min, cmap_max = cbar.cmap(0), cbar.cmap(1.0)
+    
+    # add text annotation to heatmap
+    dx, dy = 0.5, 0.5
+    for i in range(rows):
+        for j in range(cols-1):
+            text = f"{df.iloc[i, j]:.2%}" #if (j<cols) else f"{df.iloc[i, j]:.0f}"
+            ax.text(j + dx , i + dy, text,
+                    # ha="center", va="center", color='black')
+                    ha="center", va="center", color=cmap_min if df.iloc[i, j] > 0.5 else cmap_max)
+    
+    #then, let's add the support column by normalizing the colors in this column
+    mask = np.zeros(df.shape)
+    mask[:,:cols-1] = True    
+    
+    ax = sns.heatmap(df, mask=mask, annot=False, cmap=cmap, cbar=False,
+            linewidths=2, linecolor='white', fmt='.0f',
+            vmin=df['support'].min(),
+            vmax=df['support'].sum(),         
+            norm=mpl.colors.Normalize(vmin=df['support'].min(),
+                                      vmax=df['support'].sum())
+                ) 
+    
+    cmap_min, cmap_max = cbar.cmap(0), cbar.cmap(1.0)
+    for i in range(rows):
+        j = cols-1
+        text = f"{df.iloc[i, j]:.0f}" #if (j<cols) else f"{df.iloc[i, j]:.0f}"
+        color = (df.iloc[i, j]) / (df['support'].sum())
+        ax.text(j + dx , i + dy, text,
+                # ha="center", va="center", color='black')
+                ha="center", va="center", color=cmap_min if color > 0.5 else cmap_max)
+            
+    plt.title(title)
+    plt.xticks(rotation = 45)
+    plt.yticks(rotation = 360)
+    plt.tight_layout()
+         
+    if (save_fig_path != None):
+        path = Path(save_fig_path)
+        path.parent.mkdir(parents=True, exist_ok=True)
+        fig.savefig(save_fig_path, bbox_inches='tight')
+    
+    return fig, ax
+
+
+
+
+
+def plot_roc_curve(
+        y_true: np.ndarray, 
+        y_score: np.ndarray, 
+        figsize=(5,5), 
+        save_fig_path=None, 
+        confidence_interval: float=0.95, 
+        highlight_roc_area=True, 
+        n_bootstraps=None) -> Figure:
+    """
+    Creates a ROC curve for a binary classifier. Includes the option for bootstrapping.
+
+    Parameters
+    ----------
+    y_true : np.ndarray
+        The actual labels of the data. Either 0 or 1.
+    y_score : np.ndarray
+        The output scores of the classifier. Between 0 and 1.
+    figsize : tuple, optional
+        The size of the figure. By default (5,5).
+    save_fig_path : str, optional
+        Path to folder where the figure should be saved. If None then plot is not saved, by default None. E.g. 'figures/roc_curve.png'.
+    confidence_interval : float, optional
+        The confidence interval to use for the calibration plot. By default 0.95. Between 0 and 1. Has no effect when not using n_bootstraps.
+    highlight_roc_area : bool, optional
+        Whether to highlight the area under the ROC curve. By default True. Has no effect when using n_bootstraps.
+    n_bootstraps : int, optional
+        Number of bootstrap samples to use for the calibration plot. Recommended minimum: 1000, moderate: 5000-10000, high: 50000-100000.
+        If None, then no bootstrapping is done. By default None.
+
+    Returns
+    -------
+    fig : matplotlib.pyplot figure
+        The figure of the calibration plot
+    """
+    
+    # create figure
+    fig = plt.figure(figsize=figsize)
+    ax = fig.add_subplot(111)
+    
+    if n_bootstraps is None:
+        base_fpr, mean_tprs, thresholds = roc_curve(y_true, y_score)
+        mean_auc = auc(base_fpr, mean_tprs)
+        if highlight_roc_area is True:
+            plt.fill_between(base_fpr, 0, mean_tprs, alpha=0.2, zorder=2)
+        if confidence_interval is not None:
+            print('Warning: confidence_intervals is not None, but n_bootstraps is None. Confidence intervals will not be plotted.')
+    else:
+        # Bootstrapping for AUROC
+        bootstrap_aucs, bootstrap_tprs = [], []
+        base_fpr = np.linspace(0, 1, 101)
+        for _ in tqdm(range(n_bootstraps), desc='Bootstrapping'):
+            indices = resample(np.arange(len(y_true)), replace=True)
+            fpr_i, tpr_i, _ = roc_curve(y_true[indices], y_score[indices])
+            roc_auc_i = auc(fpr_i, tpr_i)
+            bootstrap_aucs.append(roc_auc_i)
+            
+            # Interpolate tpr_i to base_fpr, so we have the tpr for the same fpr values for each bootstrap iteration
+            tpr_i_interp = np.interp(base_fpr, fpr_i, tpr_i)
+            tpr_i_interp[0] = 0.0
+            bootstrap_tprs.append(tpr_i_interp)
+
+        mean_auc = np.mean(bootstrap_aucs)
+        tprs = np.array(bootstrap_tprs)
+        mean_tprs = tprs.mean(axis=0)
+
+        # visualize confidence intervals
+        if confidence_interval is not None:
+            CI_upper = confidence_interval + (1-confidence_interval)/2
+            CI_lower = (1-confidence_interval)/2
+            tprs_upper = np.quantile(tprs, CI_upper, axis=0)
+            tprs_lower = np.quantile(tprs, CI_lower, axis=0)
+            auc_upper = np.quantile(bootstrap_aucs, CI_upper)
+            auc_lower = np.quantile(bootstrap_aucs, CI_lower)
+            label = f'{confidence_interval:.0%} CI: [{auc_lower:.2f}, {auc_upper:.2f}]'
+            plt.fill_between(base_fpr, tprs_lower, tprs_upper, alpha=0.3, label=label, zorder=2)
+            
+        if highlight_roc_area is True:
+            print('Warning: highlight_roc_area is True, but n_bootstraps is not None. The area under the ROC curve will not be highlighted.')
+    
+    plt.plot(base_fpr, mean_tprs, label=f'ROC curve (AUROC = {mean_auc:.2f})', zorder=3)
+    plt.plot([0, 1], [0, 1], 'k--', label='Random classifier')
+    plt.xlim([0.0, 1.01])
+    plt.ylim([-0.01, 1.01])
+    plt.xlabel('False Positive Rate')
+    plt.ylabel('True Positive Rate')
+    plt.title('Receiver Operating Characteristic (ROC)')
+    # reverse legend entry order
+    handles, labels = plt.gca().get_legend_handles_labels()
+    handles = handles[::-1]
+    labels = labels[::-1]
+    plt.legend(handles, labels, loc="lower right", frameon=False)
+    ax.spines[:].set_visible(False)
+    ax.grid(True, linestyle='-', linewidth=0.5, color='grey', alpha=0.5)
+    ax.set_yticks(np.arange(0, 1.1, 0.2))
+    plt.tight_layout()
+    
+    if save_fig_path:
+        path = Path(save_fig_path)
+        path.parent.mkdir(parents=True, exist_ok=True)
+        fig.savefig(save_fig_path, bbox_inches='tight')
+    
+    return fig
+
+
+
+def plot_calibration_curve(y_prob: np.ndarray, y_true: np.ndarray, save_fig_path=None):
+    """
+    Creates calibration plot for a binary classifier.
+
+    Parameters
+    ----------
+    y_prob : np.ndarray
+        The output probabilities of the classifier. Between 0 and 1.
+    y_true : np.ndarray
+        The actual labels of the data. Either 0 or 1.
+    save_fig_path : _type_, optional
+        Path to folder where the figure should be saved. If None then plot is not saved, by default None
+
+    Returns
+    -------
+    fig : matplotlib.pyplot figure
+        The figure of the calibration plot
+    """
+    prob_true, prob_pred = calibration_curve(y_true, y_prob, n_bins=10, strategy='uniform')
+    expected_cal_error = np.abs(prob_pred-prob_true).mean().round(2)
+    fig = plt.figure(figsize=(5,5))
+    ax = fig.add_subplot(111)
+    
+    # Calculate bar width
+    bar_width = (prob_pred[1:] - prob_pred[:-1]).mean() * 0.75
+    
+    # Plotting
+    ax.bar(prob_pred, prob_true, width=bar_width, zorder=3, facecolor=to_rgba('C0',0.75), edgecolor='midnightblue', linewidth=2, label=f'True Calibration')
+    ax.bar(prob_pred, prob_pred - prob_true, bottom=prob_true, width=bar_width, zorder=3, alpha=0.5, edgecolor='red', fill=False, linewidth=2, label=f'Mean ECE = {expected_cal_error}', hatch='//')
+    ax.plot([0, 1], [0, 1], linestyle='--', color='grey', zorder=3, label='Perfect Calibration')
+        
+    # Labels and titles
+    ax.set(xlabel='Predicted probability', ylabel='True probability')
+    plt.xlim([0.0, 1.005])
+    plt.ylim([-0.01, 1.0])
+    ax.legend(loc='upper left', frameon=False)
+    
+    # show y-grid
+    ax.spines[:].set_visible(False)
+    ax.grid(True, linestyle='-', linewidth=0.5, color='grey', alpha=0.5)
+    ax.set_yticks(np.arange(0, 1.1, 0.2))
+    ax.set_xticks(np.arange(0, 1.1, 0.2))
+    plt.tight_layout()
+    
+    # save plot
+    if (save_fig_path != None):
+        path = Path(save_fig_path)
+        path.parent.mkdir(parents=True, exist_ok=True)
+        fig.savefig(save_fig_path, bbox_inches='tight')
+    
+    return fig
+
+
+def plot_y_prob_histogram(y_prob: np.ndarray, save_fig_path=None) -> Figure:
+    fig = plt.figure(figsize=(5,5))
+    ax = fig.add_subplot(111)
+    ax.hist(y_prob, bins=10, alpha=0.9, edgecolor='midnightblue', linewidth=2, rwidth=1)
+    # same histogram as above, but with border lines
+    # ax.hist(y_prob, bins=10, alpha=0.5, edgecolor='black', linewidth=1.2)
+    ax.set(xlabel='Predicted probability [-]', ylabel='Count [-]', xlim=(-0.01, 1.0))
+    ax.set_title('Histogram of predicted probabilities')
+    
+    ax.spines[:].set_visible(False)
+    ax.grid(True, linestyle='-', linewidth=0.5, color='grey', alpha=0.5)
+    ax.set_xticks(np.arange(0, 1.1, 0.2))
+    plt.tight_layout()
+    
+    # save plot
+    if (save_fig_path != None):
+        path = Path(save_fig_path)
+        path.parent.mkdir(parents=True, exist_ok=True)
+        fig.savefig(save_fig_path, bbox_inches='tight')
+    
+    return fig
+
+
+
+def plot_pr_curve(
+        y_true: np.ndarray, 
+        y_score: np.ndarray, 
+        figsize=(5,5), 
+        save_fig_path: Optional[str]=None, 
+        color: Optional[str]= None, 
+        label: Optional[str]=None,
+        title: Optional[str]=None
+    ) -> Figure:
+    """
+    Visualize the Precision-Recall curve for a binary classifier.
+
+    Parameters
+    ----------
+    y_true : np.ndarray
+        The actual labels of the data. Either 0 or 1.
+    y_score : np.ndarray
+        The output scores of the classifier. Between 0 and 1.
+    figsize : tuple, optional
+        The size of the figure. By default (5,5).
+    save_fig_path : str, optional
+        Path to folder where the figure should be saved. If None then plot is not saved, by default None. E.g. 'figures/pr_curve.png'.
+    color : str, optional
+        Color of the PR curve, by default None.
+    label : str, optional
+        Custom label for the plot. If None, a default label is used. By default None.
+
+    Returns
+    -------
+    fig : matplotlib.pyplot figure
+        The figure of the PR curve
+    """
+    
+    # Create a new figure
+    fig = plt.figure(figsize=figsize)
+    ax = fig.add_subplot(111)
+    
+    # Compute Precision-Recall curve and area for each class
+    precision, recall, _ = precision_recall_curve(y_true, y_score)
+    
+    pr_auc = auc(recall, precision)
+    
+    if label is None:
+        # Use a default label if none is provided
+        label = 'PR curve'
+    
+    label += f' (area = {pr_auc:.3f})'
+    
+    # Plot Precision-Recall curve
+    ax.plot(recall, precision, label=label, color=color)
+    ax.set_xlim([0.0, 1.01])
+    ax.set_ylim([-0.01, 1.01])
+    ax.set_xlabel('Recall')
+    ax.set_ylabel('Precision')
+    if title is not None:
+        ax.set_title(title)
+    ax.legend(loc="lower right")
+    ax.spines[:].set_visible(False)
+    ax.grid(True, linestyle='-', linewidth=0.5, color='grey', alpha=0.5)
+    ax.set_yticks(np.arange(0, 1.1, 0.2))
+    plt.tight_layout()    
+    
+    # Save the figure if save_fig_path is specified
+    if save_fig_path:
+        plt.savefig(save_fig_path, bbox_inches='tight')
+    
+    return fig
+
diff --git a/plotsandgraphs/compare_distributions.py b/plotsandgraphs/compare_distributions.py
new file mode 100644
index 0000000..943c3e0
--- /dev/null
+++ b/plotsandgraphs/compare_distributions.py
@@ -0,0 +1,103 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib as mpl
+import pandas as pd
+from typing import List, Tuple
+
+
+def plot_raincloud(df: pd.DataFrame,
+                   x_col: str,
+                   y_col: str, 
+                   colors: List[str] = None, 
+                   order: List[str] = None, 
+                   title: str = None, 
+                   x_label: str = None, 
+                   x_range: Tuple[float, float] = None, 
+                   show_violin = True, 
+                   show_scatter = True, 
+                   show_boxplot = True):
+    
+    """
+    Generate a raincloud plot using Pandas DataFrame.
+    
+    Parameters:
+    - df (pd.DataFrame): The data frame containing the data.
+    - x_col (str): The column name for the x-axis data.
+    - y_col (str): The column name for the y-axis categories.
+    - colors (List[str], optional): List of colors for each category. Defaults to tab10 cmap.
+    - order (List[str], optional): Order of categories on y-axis. Defaults to unique values in y_col.
+    - title (str, optional): Title of the plot.
+    - x_label (str, optional): Label for the x-axis.
+    - x_range (Tuple[float, float], optional): Range for the x-axis.
+    - show_violin (bool, optional): Whether to show violin plot. Defaults to True.
+    - show_scatter (bool, optional): Whether to show scatter plot. Defaults to True.
+    - show_boxplot (bool, optional): Whether to show boxplot. Defaults to True.
+
+    Returns:
+    - matplotlib.figure.Figure: The generated plot figure.
+    """
+        
+    fig, ax = plt.subplots(figsize=(16, 8))
+    offset = 0.2  # Offset value to move plots
+
+    if order is None:
+        order = df[y_col].unique()
+
+    # if colors are none, use distinct colors for each group
+    if colors is None:
+        cmap = plt.get_cmap('tab10')
+        colors = [mpl.colors.to_hex(cmap(i)) for i in np.linspace(0, 1, len(order))]
+    else:
+        assert len(colors) == len(order), 'colors and order must be the same length'
+        colors = colors
+        
+    # Boxplot
+    if show_boxplot:
+        bp = ax.boxplot([df[df[y_col] == grp][x_col].values for grp in order],
+                        patch_artist=True, vert=False, positions=np.arange(1 + offset, len(order) + 1 + offset), widths=0.2)
+
+        # Customize boxplot colors
+        for patch, color in zip(bp['boxes'], colors):
+            patch.set_facecolor(color)
+            patch.set_alpha(0.8)
+
+        # Set median line color to black
+        for median in bp['medians']:
+            median.set_color('black')
+
+    # Violinplot
+    if show_violin:
+        vp = ax.violinplot([df[df[y_col] == grp][x_col].values for grp in order],
+                        positions=np.arange(1 + offset, len(order) + 1 + offset), showmeans=False, showextrema=False, showmedians=False, vert=False)
+
+        # Customize violinplot colors
+        for idx, b in enumerate(vp['bodies']):
+            b.get_paths()[0].vertices[:, 1] = np.clip(b.get_paths()[0].vertices[:, 1], idx + 1 + offset, idx + 2 + offset)
+            b.set_color(colors[idx])
+
+    # Scatterplot with jitter
+    if show_scatter:
+        for idx, grp in enumerate(order):
+            features = df[df[y_col] == grp][x_col].values
+            y = np.full(len(features), idx + 1 - offset)
+            jitter_amount = 0.12
+            y += np.random.uniform(low=-jitter_amount, high=jitter_amount, size=len(y))
+            plt.scatter(features, y, s=10, c=colors[idx], alpha=0.3, facecolors='none')
+
+    # Labels
+    plt.yticks(np.arange(1, len(order) + 1), order)
+
+    if x_label is None:
+        x_label = x_col
+    plt.xlabel(x_label)
+    if title:
+        plt.title(title + '\n')
+    ax.spines['top'].set_visible(False)
+    ax.spines['right'].set_visible(False)
+    ax.spines['left'].set_visible(False)
+    ax.xaxis.grid(True)
+
+    if x_range:
+        plt.xlim(x_range)
+        
+    return fig
diff --git a/requirements-text.txt b/requirements-test.txt
similarity index 100%
rename from requirements-text.txt
rename to requirements-test.txt

From 9defcfd570b954291c3777d7a206b5ce2fada2d0 Mon Sep 17 00:00:00 2001
From: Joshua Wendland <joshua.wendland@tum.de>
Date: Thu, 9 Nov 2023 18:40:48 +0100
Subject: [PATCH 06/16] update github workflows

---
 .github/release_message.sh    |   3 +
 .github/workflows/main.yml    | 138 ++++++++++++++++++++++++++++------
 .github/workflows/pylint.yml  |   2 +-
 .github/workflows/release.yml |  50 ++++++++++++
 plotsandgraphs/VERSION        |   2 +-
 plotsandgraphs/__init__.py    |   2 +
 6 files changed, 174 insertions(+), 23 deletions(-)
 create mode 100644 .github/release_message.sh
 create mode 100644 .github/workflows/release.yml

diff --git a/.github/release_message.sh b/.github/release_message.sh
new file mode 100644
index 0000000..cf04476
--- /dev/null
+++ b/.github/release_message.sh
@@ -0,0 +1,3 @@
+#!/usr/bin/env bash
+previous_tag=$(git tag --sort=-creatordate | sed -n 2p)
+git shortlog "${previous_tag}.." | s
\ No newline at end of file
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 0f1ba9c..8cd38d7 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -1,25 +1,121 @@
-name: API workflow
+# name: API workflow
 
-on: [push, pull_request]
+# on: [push, pull_request]
+
+# jobs:
+#   build:
+#     runs-on: ubuntu-latest
+#     name: Test python API
+#     steps:
+#     - uses: actions/checkout@v1
+#     - name: Install requirements
+#       run: pip install -r requirements.txt
+#     - name: Run tests and collect coverage
+#       run: pytest --cov .
+#     - name: Upload coverage reports to Codecov
+#       run: |
+#         # Replace `linux` below with the appropriate OS
+#         # Options are `alpine`, `linux`, `macos`, `windows`
+#         curl -Os https://uploader.codecov.io/latest/linux/codecov
+#         chmod +x codecov
+#         ./codecov -t ${CODECOV_TOKEN}
+#     - name: Upload coverage reports to Codecov
+#       uses: codecov/codecov-action@v3
+#       env:
+#         CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
+
+
+
+
+# This is a basic workflow to help you get started with Actions
+
+name: CI
+
+# Controls when the workflow will run
+on:
+  # Triggers the workflow on push or pull request events but only for the main branch
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
 
 jobs:
-  build:
-    runs-on: ubuntu-latest
-    name: Test python API
+  linter:
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: [3.9]
+        os: [ubuntu-latest]
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install project
+        run: make install
+      - name: Run linter
+        run: make lint
+
+  tests_linux:
+    needs: linter
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: [3.9]
+        os: [ubuntu-latest]
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install project
+        run: make install
+      - name: Run tests
+        run: make test
+      - name: "Upload coverage to Codecov"
+        uses: codecov/codecov-action@v3
+        # with:
+        #   fail_ci_if_error: true
+
+  tests_mac:
+    needs: linter
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: [3.9]
+        os: [macos-latest]
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install project
+        run: make install
+      - name: Run tests
+        run: make test
+
+  tests_win:
+    needs: linter
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: [3.9]
+        os: [windows-latest]
+    runs-on: ${{ matrix.os }}
     steps:
-    - uses: actions/checkout@v1
-    - name: Install requirements
-      run: pip install -r requirements.txt
-    - name: Run tests and collect coverage
-      run: pytest --cov .
-    - name: Upload coverage reports to Codecov
-      run: |
-        # Replace `linux` below with the appropriate OS
-        # Options are `alpine`, `linux`, `macos`, `windows`
-        curl -Os https://uploader.codecov.io/latest/linux/codecov
-        chmod +x codecov
-        ./codecov -t ${CODECOV_TOKEN}
-    - name: Upload coverage reports to Codecov
-      uses: codecov/codecov-action@v3
-      env:
-        CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install Pip
+        run: pip install --user --upgrade pip
+      - name: Install project
+        run: pip install -e .[test]
+      - name: run tests
+        run: pytest -s -vvvv -l --tb=long tests
\ No newline at end of file
diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
index 7a88e1d..867fd32 100644
--- a/.github/workflows/pylint.yml
+++ b/.github/workflows/pylint.yml
@@ -1,6 +1,6 @@
 name: Pylint
 
-on: [push]
+on: [push, pull_request]
 
 jobs:
   build:
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
new file mode 100644
index 0000000..06ea5bc
--- /dev/null
+++ b/.github/workflows/release.yml
@@ -0,0 +1,50 @@
+name: Upload Python Package
+
+on:
+  push:
+    # Sequence of patterns matched against refs/tags
+    tags:
+      - '*' # Push events to matching v*, i.e. v1.0, v20.15.10
+
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+
+jobs:
+  release:
+    name: Create Release
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          # by default, it uses a depth of 1
+          # this fetches all history so that we can read each commit
+          fetch-depth: 0
+      - name: Generate Changelog
+        run: .github/release_message.sh > release_message.md
+      - name: Release
+        uses: softprops/action-gh-release@v1
+        with:
+          body_path: release_message.md
+
+  deploy:
+    needs: release
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.x'
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install setuptools wheel twine
+    - name: Build and publish
+      env:
+        TWINE_USERNAME: __token__
+        TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
+      run: |
+        python setup.py sdist bdist_wheel
+        twine upload dist/*
\ No newline at end of file
diff --git a/plotsandgraphs/VERSION b/plotsandgraphs/VERSION
index 6c6aa7c..9a26661 100644
--- a/plotsandgraphs/VERSION
+++ b/plotsandgraphs/VERSION
@@ -1 +1 @@
-0.1.0
\ No newline at end of file
+0.1.01
\ No newline at end of file
diff --git a/plotsandgraphs/__init__.py b/plotsandgraphs/__init__.py
index e69de29..e7f5024 100644
--- a/plotsandgraphs/__init__.py
+++ b/plotsandgraphs/__init__.py
@@ -0,0 +1,2 @@
+from . import binary_classifier
+from . import compare_distributions
\ No newline at end of file

From 5293e2edc2d1b0668f316ceffcda016b29a05f77 Mon Sep 17 00:00:00 2001
From: Joshua Wendland <joshua.wendland@tum.de>
Date: Thu, 9 Nov 2023 19:14:53 +0100
Subject: [PATCH 07/16] fix linter workflow by installing linter

---
 .github/workflows/main.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 8cd38d7..ca34fcf 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -56,7 +56,10 @@ jobs:
         with:
           python-version: ${{ matrix.python-version }}
       - name: Install project
-        run: make install
+        run: |
+          make virtualenv
+          source .venv/bin/activate
+          make install
       - name: Run linter
         run: make lint
 

From bc0094de193410a92422e937541c2584147f3524 Mon Sep 17 00:00:00 2001
From: Joshua Wendland <joshua.wendland@tum.de>
Date: Thu, 9 Nov 2023 19:21:36 +0100
Subject: [PATCH 08/16] debugging

---
 .github/workflows/main.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index ca34fcf..9055674 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -55,6 +55,10 @@ jobs:
       - uses: actions/setup-python@v4
         with:
           python-version: ${{ matrix.python-version }}
+      - name: Debugging
+        run: |
+          ls -la
+          cat Makefile
       - name: Install project
         run: |
           make virtualenv

From f1c0fccf015d848a1ff25d8f42b16f65aa4c9dd1 Mon Sep 17 00:00:00 2001
From: Joshua Wendland <joshua.wendland@tum.de>
Date: Thu, 9 Nov 2023 19:44:54 +0100
Subject: [PATCH 09/16] bug fix?

---
 .github/workflows/main.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 9055674..6751f6f 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -58,7 +58,7 @@ jobs:
       - name: Debugging
         run: |
           ls -la
-          cat Makefile
+          cat MAKEFILE
       - name: Install project
         run: |
           make virtualenv

From 6b8a6b7e1ab827a44005fc56420aa5ac29c0feb3 Mon Sep 17 00:00:00 2001
From: Joshua Wendland <joshua.wendland@tum.de>
Date: Thu, 9 Nov 2023 19:47:52 +0100
Subject: [PATCH 10/16] bug fix again?

---
 .github/workflows/main.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 6751f6f..63c25c0 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -59,6 +59,7 @@ jobs:
         run: |
           ls -la
           cat MAKEFILE
+          make -f MAKEFILE virtualenv
       - name: Install project
         run: |
           make virtualenv

From 15d984e5d2d254af1d1f284d46b5f114c88db6bb Mon Sep 17 00:00:00 2001
From: Joshua Wendland <joshua.wendland@tum.de>
Date: Thu, 9 Nov 2023 19:56:08 +0100
Subject: [PATCH 11/16] renamed makefile

---
 .github/workflows/main.yml | 74 +++++++++++++++++++-------------------
 MAKEFILE => Makefile       |  0
 2 files changed, 37 insertions(+), 37 deletions(-)
 rename MAKEFILE => Makefile (100%)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 63c25c0..a71ff32 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -59,7 +59,7 @@ jobs:
         run: |
           ls -la
           cat MAKEFILE
-          make -f MAKEFILE virtualenv
+          make virtualenv
       - name: Install project
         run: |
           make virtualenv
@@ -90,40 +90,40 @@ jobs:
         # with:
         #   fail_ci_if_error: true
 
-  tests_mac:
-    needs: linter
-    strategy:
-      fail-fast: false
-      matrix:
-        python-version: [3.9]
-        os: [macos-latest]
-    runs-on: ${{ matrix.os }}
-    steps:
-      - uses: actions/checkout@v3
-      - uses: actions/setup-python@v4
-        with:
-          python-version: ${{ matrix.python-version }}
-      - name: Install project
-        run: make install
-      - name: Run tests
-        run: make test
+  # tests_mac:
+  #   needs: linter
+  #   strategy:
+  #     fail-fast: false
+  #     matrix:
+  #       python-version: [3.9]
+  #       os: [macos-latest]
+  #   runs-on: ${{ matrix.os }}
+  #   steps:
+  #     - uses: actions/checkout@v3
+  #     - uses: actions/setup-python@v4
+  #       with:
+  #         python-version: ${{ matrix.python-version }}
+  #     - name: Install project
+  #       run: make install
+  #     - name: Run tests
+  #       run: make test
 
-  tests_win:
-    needs: linter
-    strategy:
-      fail-fast: false
-      matrix:
-        python-version: [3.9]
-        os: [windows-latest]
-    runs-on: ${{ matrix.os }}
-    steps:
-      - uses: actions/checkout@v3
-      - uses: actions/setup-python@v4
-        with:
-          python-version: ${{ matrix.python-version }}
-      - name: Install Pip
-        run: pip install --user --upgrade pip
-      - name: Install project
-        run: pip install -e .[test]
-      - name: run tests
-        run: pytest -s -vvvv -l --tb=long tests
\ No newline at end of file
+  # tests_win:
+  #   needs: linter
+  #   strategy:
+  #     fail-fast: false
+  #     matrix:
+  #       python-version: [3.9]
+  #       os: [windows-latest]
+  #   runs-on: ${{ matrix.os }}
+  #   steps:
+  #     - uses: actions/checkout@v3
+  #     - uses: actions/setup-python@v4
+  #       with:
+  #         python-version: ${{ matrix.python-version }}
+  #     - name: Install Pip
+  #       run: pip install --user --upgrade pip
+  #     - name: Install project
+  #       run: pip install -e .[test]
+  #     - name: run tests
+  #       run: pytest -s -vvvv -l --tb=long tests
\ No newline at end of file
diff --git a/MAKEFILE b/Makefile
similarity index 100%
rename from MAKEFILE
rename to Makefile

From ec29e4e553ace404408ca13eae08d7f60555eb81 Mon Sep 17 00:00:00 2001
From: Joshua Wendland <joshua.wendland@tum.de>
Date: Thu, 9 Nov 2023 19:57:06 +0100
Subject: [PATCH 12/16] bug fix

---
 .github/workflows/main.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index a71ff32..1dfcdd8 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -58,7 +58,7 @@ jobs:
       - name: Debugging
         run: |
           ls -la
-          cat MAKEFILE
+          cat Makefile
           make virtualenv
       - name: Install project
         run: |

From 9b25ec65a6ed554a253f8189601b8e26e809529a Mon Sep 17 00:00:00 2001
From: Joshua Wendland <joshua.wendland@tum.de>
Date: Thu, 9 Nov 2023 20:00:09 +0100
Subject: [PATCH 13/16] add requirements-test.txt

---
 requirements-test.txt | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/requirements-test.txt b/requirements-test.txt
index a7baf9d..f660f6a 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -1,6 +1,10 @@
-matplotlib
-numpy
-pandas
-seaborn
-scikit-learn
-tqdm
\ No newline at end of file
+# This requirements are for development and testing only, not for production.
+pytest
+coverage
+flake8
+black
+isort
+pytest-cov
+mypy
+gitchangelog
+mkdocs
\ No newline at end of file

From 68181b86271557195d9b61d153226ce8f2cfd028 Mon Sep 17 00:00:00 2001
From: Joshua Wendland <joshua.wendland@tum.de>
Date: Fri, 10 Nov 2023 09:57:05 +0100
Subject: [PATCH 14/16] bugfixes mypy

---
 plotsandgraphs/binary_classifier.py     |  2 +-
 plotsandgraphs/compare_distributions.py | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/plotsandgraphs/binary_classifier.py b/plotsandgraphs/binary_classifier.py
index 70e5069..e6fd62f 100644
--- a/plotsandgraphs/binary_classifier.py
+++ b/plotsandgraphs/binary_classifier.py
@@ -137,7 +137,7 @@ def plot_classification_report(y_test: np.ndarray,
     
     bounds = np.linspace(0, 1, 11)
     cmap = plt.cm.get_cmap('YlOrRd', len(bounds)+1)
-    norm = colors.BoundaryNorm(bounds, cmap.N)
+    norm = colors.BoundaryNorm(bounds, cmap.N) # type: ignore[attr-defined]
     
     ax = sns.heatmap(df, mask=mask, annot=False, cmap=cmap, fmt='.3g',
             cbar_kws={'ticks':bounds[::2], 'norm':norm, 'boundaries':bounds},
diff --git a/plotsandgraphs/compare_distributions.py b/plotsandgraphs/compare_distributions.py
index 943c3e0..c9cbda2 100644
--- a/plotsandgraphs/compare_distributions.py
+++ b/plotsandgraphs/compare_distributions.py
@@ -2,17 +2,17 @@
 import matplotlib.pyplot as plt
 import matplotlib as mpl
 import pandas as pd
-from typing import List, Tuple
+from typing import List, Tuple, Optional
 
 
 def plot_raincloud(df: pd.DataFrame,
                    x_col: str,
                    y_col: str, 
-                   colors: List[str] = None, 
-                   order: List[str] = None, 
-                   title: str = None, 
-                   x_label: str = None, 
-                   x_range: Tuple[float, float] = None, 
+                   colors: Optional[List[str]] = None, 
+                   order: Optional[List[str]] = None, 
+                   title: Optional[str] = None, 
+                   x_label: Optional[str] = None, 
+                   x_range: Optional[Tuple[float, float]] = None, 
                    show_violin = True, 
                    show_scatter = True, 
                    show_boxplot = True):

From dcdf1a23fbfdc18a0ce5385c2d6df2be9556628f Mon Sep 17 00:00:00 2001
From: Joshua Wendland <joshua.wendland@tum.de>
Date: Fri, 10 Nov 2023 10:11:05 +0100
Subject: [PATCH 15/16] small bug fixes

---
 Makefile                            | 4 ++++
 plotsandgraphs/binary_classifier.py | 8 ++++----
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/Makefile b/Makefile
index e0641ae..b541671 100644
--- a/Makefile
+++ b/Makefile
@@ -32,9 +32,13 @@ fmt:              ## Format code using black & isort.
 
 .PHONY: lint
 lint:             ## Run pep8, black, mypy linters.
+	@echo "Running linters ..."
+	@echo "--- Running flake8 ---"
 	$(ENV_PREFIX)flake8 plotsandgraphs/
+	@echo "--- Running black ---"
 	$(ENV_PREFIX)black -l 79 --check plotsandgraphs/
 	$(ENV_PREFIX)black -l 79 --check tests/
+	@echo "--- Running mypy ---"
 	$(ENV_PREFIX)mypy --ignore-missing-imports plotsandgraphs/
 
 .PHONY: test
diff --git a/plotsandgraphs/binary_classifier.py b/plotsandgraphs/binary_classifier.py
index e6fd62f..3c69486 100644
--- a/plotsandgraphs/binary_classifier.py
+++ b/plotsandgraphs/binary_classifier.py
@@ -35,7 +35,7 @@ def plot_accuracy(y_true, y_pred, name='', save_fig_path=None) -> Figure:
         path = Path(save_fig_path)
         path.parent.mkdir(parents=True, exist_ok=True)
         fig.savefig(save_fig_path, bbox_inches='tight')
-    return fig, accuracy
+    return fig
 
 def plot_confusion_matrix(y_true: np.ndarray, y_pred: np.ndarray, save_fig_path=None) -> Figure:
     import matplotlib.colors as colors
@@ -136,7 +136,7 @@ def plot_classification_report(y_test: np.ndarray,
     mask[:,cols-1] = True
     
     bounds = np.linspace(0, 1, 11)
-    cmap = plt.cm.get_cmap('YlOrRd', len(bounds)+1)
+    cmap = plt.cm.get_cmap('YlOrRd', len(bounds)+1) # type: ignore
     norm = colors.BoundaryNorm(bounds, cmap.N) # type: ignore[attr-defined]
     
     ax = sns.heatmap(df, mask=mask, annot=False, cmap=cmap, fmt='.3g',
@@ -428,8 +428,8 @@ def plot_pr_curve(
     
     # Plot Precision-Recall curve
     ax.plot(recall, precision, label=label, color=color)
-    ax.set_xlim([0.0, 1.01])
-    ax.set_ylim([-0.01, 1.01])
+    ax.set_xlim((0.0, 1.01))
+    ax.set_ylim((-0.01, 1.01))
     ax.set_xlabel('Recall')
     ax.set_ylabel('Precision')
     if title is not None:

From 66a59e0488ab74894c2ae7744998ef70ea02ef4b Mon Sep 17 00:00:00 2001
From: Joshua Wendland <joshua.wendland@tum.de>
Date: Fri, 10 Nov 2023 10:48:20 +0100
Subject: [PATCH 16/16] add pyproject.toml file

---
 pyproject.toml | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 pyproject.toml

diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..d390ec0
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,5 @@
+[tool.pylint."FORMAT"]
+max-line-length = 120
+
+[tool.pylint."BASIC"]
+variable-rgx = "[a-z_][a-z0-9_]{0,30}$|[a-z0-9_]+([A-Z][a-z0-9_]+)*$"  # Allow snake case and camel case for variable names