In [7]:
!conda info -e && python -V

# conda environments:
#
base                     /opt/conda
py311                 *  /opt/conda/envs/py311

Python 3.11.12


In [1]:
from sklearn.calibration import CalibratedClassifierCV
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import LinearSVC

import numpy as np
np.random.seed(0)
# importing pylab or pyplot
import matplotlib.pyplot as plt

%matplotlib inline
plt.rcParams['figure.figsize'] = (12, 5)

# Import scikit-plot
import scikitplot as sp

sp.__version__

'0.5.dev0+git.20250811.21598aa'

In [2]:
from scikitplot.stats import histogram

histogram?

[31mSignature:[39m
histogram(
    a: [33m'ArrayLike'[39m,
    bins: [33m"int | list[float] | Literal['blocks', 'knuth', 'scott', 'freedman'] | None"[39m = [32m10[39m,
    range: [33m'tuple[float, float] | None'[39m = [38;5;28;01mNone[39;00m,
    weights: [33m'ArrayLike | None'[39m = [38;5;28;01mNone[39;00m,
    **kwargs,
) -> [33m'tuple[NDArray, NDArray]'[39m
[31mDocstring:[39m
Enhanced histogram function, providing adaptive binnings.

This is a histogram function that enables the use of more sophisticated
algorithms for determining bins.  Aside from the ``bins`` argument allowing
a string specified how bins are computed, the parameters are the same
as `numpy.histogram`.

Parameters
----------
a : array-like
    array of data to be histogrammed

bins : int, list, or str, optional
    If bins is a string, then it must be one of:

    - 'blocks' : use bayesian blocks for dynamic bin widths

    - 'knuth' : use Knuth's rule to determine bins

    - 'scott' : use Scott'

In [3]:
# Load the data
X, y = make_classification(
	n_samples=int(1e6),
	n_features=5,
	n_informative=2,
	n_redundant=2,
	n_repeated=0,
	n_classes=2,
	n_clusters_per_class=2,
	random_state=0,
)
X_train, y_train, X_val, y_val = X[:1000], y[:1000], X[1000:], y[1000:]

X_train.shape, X_val.shape, 

((1000, 5), (999000, 5))

In [4]:
t = X_train[:,0]
tr = t.copy()

In [5]:
for bins in ['blocks', 'freedman', 'knuth', 'scott', ]:
    print(f"{bins=}")
    hist, bin_edges = histogram(t, bins=bins)
    print( f"size: {len(hist)}" )
    print( hist )
    print( bin_edges )
    print()

bins='blocks'
size: 7
[ 10  33 134 676  80  46  21]
[-3.28014194 -2.41568203 -2.02193243 -1.28908697  1.24124305  1.87244844
  2.65264082  3.89811045]

bins='freedman'
size: 20
[  3   5  24  43  82  78  85  92  98 109 114  93  55  46  29  17  11   8
   7   1]
[-3.28014194 -2.91403872 -2.5479355  -2.18183227 -1.81572905 -1.44962582
 -1.0835226  -0.71741938 -0.35131615  0.01478707  0.3808903   0.74699352
  1.11309674  1.47919997  1.84530319  2.21140642  2.57750964  2.94361286
  3.30971609  3.67581931  4.04192253]

bins='knuth'
size: 24
[ 3  3  7 25 44 70 55 77 65 83 76 91 93 89 61 42 39 27 17 13  6  6  6  2]
[-3.28014194 -2.98104809 -2.68195424 -2.38286039 -2.08376655 -1.7846727
 -1.48557885 -1.186485   -0.88739115 -0.5882973  -0.28920345  0.0098904
  0.30898425  0.6080781   0.90717195  1.2062658   1.50535965  1.8044535
  2.10354735  2.4026412   2.70173505  3.0008289   3.29992275  3.5990166
  3.89811045]

bins='scott'
size: 17
[  5   7  45  92  92 104 117 129 136 109  62  48  24  14   9 