In [1]:
!conda info -e && python -V

# conda environments:
#
base                     /opt/conda
py311                 *  /opt/conda/envs/py311

Python 3.11.12


In [1]:
from sklearn.calibration import CalibratedClassifierCV
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import LinearSVC

import numpy as np
np.random.seed(0)
# importing pylab or pyplot
import matplotlib.pyplot as plt

%matplotlib inline
plt.rcParams['figure.figsize'] = (12, 5)

# Import scikit-plot
import scikitplot as sp

sp.__version__

'0.5.dev0+git.20250811.21598aa'

In [None]:
from scikitplot.cexternals import _astropy

dir(_astropy.stats)

['Events',
 'FitnessFunc',
 'PointMeasures',
 'RegularEvents',
 'RipleysKEstimator',
 'SigmaClip',
 'SigmaClippedStats',
 '__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '_bb',
 '_fast_sigma_clip',
 '_hist',
 '_stats',
 'akaike_info_criterion',
 'akaike_info_criterion_lsq',
 'bayesian_blocks',
 'bayesian_info_criterion',
 'bayesian_info_criterion_lsq',
 'binned_binom_proportion',
 'binom_conf_interval',
 'biweight',
 'biweight_location',
 'biweight_midcorrelation',
 'biweight_midcovariance',
 'biweight_midvariance',
 'biweight_scale',
 'bootstrap',
 'calculate_bin_edges',
 'cdf_from_intervals',
 'circcorrcoef',
 'circmean',
 'circmoment',
 'circstats',
 'circstd',
 'circvar',
 'fold_intervals',
 'freedman_bin_width',
 'funcs',
 'gaussian_fwhm_to_sigma',
 'gaussian_sigma_to_fwhm',
 'histogram',
 'histogram_intervals',
 'info_theory',
 'interval_overlap_length',
 'jackknife',
 'jackknife_resampling

In [3]:
from scikitplot.stats import bayesian_blocks

bayesian_blocks?

[31mSignature:[39m
bayesian_blocks(
    t: [33m'ArrayLike'[39m,
    x: [33m'ArrayLike | None'[39m = [38;5;28;01mNone[39;00m,
    sigma: [33m'ArrayLike | float | None'[39m = [38;5;28;01mNone[39;00m,
    fitness: [33m"Literal['events', 'regular_events', 'measures'] | FitnessFunc"[39m = [33m'events'[39m,
    **kwargs,
) -> [33m'NDArray[float]'[39m
[31mDocstring:[39m
Compute optimal segmentation of data with Scargle's Bayesian Blocks.

This is a flexible implementation of the Bayesian Blocks algorithm
described in Scargle 2013 [1]_.

Parameters
----------
t : array-like
    data times (one dimensional, length N)
x : array-like, optional
    data values
sigma : array-like or float, optional
    data errors
fitness : str or object
    the fitness function to use for the model.
    If a string, the following options are supported:

    - 'events' : binned or unbinned event data.  Arguments are ``gamma``,
      which gives the slope of the prior on the number of bins, or
  

In [4]:
# Load the data
X, y = make_classification(
	n_samples=int(1e6),
	n_features=5,
	n_informative=2,
	n_redundant=2,
	n_repeated=0,
	n_classes=2,
	n_clusters_per_class=2,
	random_state=0,
)
X_train, y_train, X_val, y_val = X[:1000], y[:1000], X[1000:], y[1000:]

X_train.shape, X_val.shape, 

((1000, 5), (999000, 5))

In [5]:
t = X_train[:,0]
tr = t.copy()

In [6]:
for f in ['events', 'regular_events', 'measures']:
    edges = bayesian_blocks(t, fitness='events', p0=0.01)
    print(f'\n{f=}\n', edges, '\nmean:', edges.mean())


f='events'
 [-3.28014194 -2.41568203 -2.02193243 -1.28908697  1.24124305  1.87244844
  2.65264082  3.89811045] 
mean: 0.08219992231151574

f='regular_events'
 [-3.28014194 -2.41568203 -2.02193243 -1.28908697  1.24124305  1.87244844
  2.65264082  3.89811045] 
mean: 0.08219992231151574

f='measures'
 [-3.28014194 -2.41568203 -2.02193243 -1.28908697  1.24124305  1.87244844
  2.65264082  3.89811045] 
mean: 0.08219992231151574


In [7]:
tr[tr.size//2:] = tr[:tr.size//2]
edges = bayesian_blocks(t, fitness='events', p0=0.01)
edges

array([-3.28014194, -2.41568203, -2.02193243, -1.28908697,  1.24124305,
        1.87244844,  2.65264082,  3.89811045])