# Test the histogramm variable binning

- author : Sylvie Dagoret-Campagne
- creation date : 2025-02-07

## Import

In [None]:
import numpy as np
from numpy.linalg import inv
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib.colors import LogNorm,SymLogNorm
from matplotlib.patches import Circle,Annulus
from astropy.visualization import ZScaleInterval
props = dict(boxstyle='round', facecolor="white", alpha=0.1)
#props = dict(boxstyle='round')

import matplotlib.colors as colors
import matplotlib.cm as cmx

import matplotlib.ticker                         # here's where the formatter is
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,AutoMinorLocator)
  

plt.rcParams["figure.figsize"] = (4,3)
plt.rcParams["axes.labelsize"] = 'xx-large'
plt.rcParams['axes.titlesize'] = 'xx-large'
plt.rcParams['xtick.labelsize']= 'xx-large'
plt.rcParams['ytick.labelsize']= 'xx-large'

from matplotlib.gridspec import GridSpec

from astropy.visualization import (MinMaxInterval, SqrtStretch,ZScaleInterval,PercentileInterval,
                                   ImageNormalize,imshow_norm)
from astropy.visualization.stretch import SinhStretch, LinearStretch,AsinhStretch,LogStretch

from astropy.io import fits
from astropy.wcs import WCS
from astropy import units as u
from astropy import constants as c

from astropy.coordinates.earth import EarthLocation
from datetime import datetime
from pytz import timezone
import pandas as pd

## Configuration

In [None]:
# where are stored the figures
pathfigs = "figsTestVariableBinning"
if not os.path.exists(pathfigs):
    os.makedirs(pathfigs) 
figtype = ".png"

In [None]:
pathdata = "dataHoloCorrelationPWVTime-v3"

In [None]:
input_filename_pairs_clearsky = f"{pathdata}/pairs_tpwv_vs_dt_clearsky_nobinning.csv"
input_filename_pairs_clouddy = f"{pathdata}/pairs_tpwv_vs_dt_clouddy_nobinning.csv"

## Initialisation

In [None]:
df_clearsky = pd.read_csv(input_filename_pairs_clearsky)
df_clouddy = pd.read_csv(input_filename_pairs_clouddy)

## Histograms

In [None]:
df_clearsky 

In [None]:
fig,axs = plt.subplots(1,2,figsize=(16,4))
ax1,ax2 = axs.flatten()
df_clearsky['dt'].hist(bins=50,ax=ax1)
df_clouddy['dt'].hist(bins=50,ax=ax2)
ax1.set_title("clearsky : pair time difference")
ax1.set_xlabel("$\Delta t$ (hours)")
ax2.set_title("clouddy : pair time difference")
ax2.set_xlabel("$\Delta t$ (hours)")

## Special binning
https://docs.astropy.org/en/stable/visualization/histogram.html

In [None]:
from astropy.visualization import hist

In [None]:
dt = df_clearsky['dt'].values

In [None]:
# draw histograms with two different bin widths
fig, ax = plt.subplots(1, 2, figsize=(14, 6),layout="constrained")

fig.subplots_adjust(left=0.1, right=0.95, bottom=0.15)
for i, bins in enumerate(['knuth', 'blocks']):
    hist(dt, bins=bins, ax=ax[i], histtype='stepfilled',alpha=0.2, density=True)
    ax[i].set_xlabel('t')
    ax[i].set_ylabel('P(t)')
    ax[i].set_title(f'hist(t, bins="{bins}")',fontdict=dict(family='monospace'))
    if i==1:
        ax[i].set_yscale('log')
plt.suptitle("clearsky",fontsize=20)


In [None]:
dt = df_clouddy['dt'].values

In [None]:
# draw histograms with two different bin widths
fig, ax = plt.subplots(1, 2, figsize=(14, 6),layout="constrained")

fig.subplots_adjust(left=0.1, right=0.95, bottom=0.15)
for i, bins in enumerate(['knuth', 'blocks']):
    hist(dt, bins=bins, ax=ax[i], histtype='stepfilled',alpha=0.2, density=True)
    ax[i].set_xlabel('t')
    ax[i].set_ylabel('P(t)')
    ax[i].set_title(f'hist(t, bins="{bins}")',fontdict=dict(family='monospace'))
    if i==1:
        ax[i].set_yscale('log')
plt.suptitle("clouddy",fontsize=20)

## Binning with Bayesian blocks

In [None]:
from astropy.stats import bayesian_blocks
from astropy.stats import histogram

In [None]:
dt = df_clearsky['dt'].values
edges_clearsky = bayesian_blocks(dt,fitness='events', p0=0.01)
hist1,bin_edge1 = np.histogram(dt, bins=edges_clearsky)
dt = df_clouddy['dt'].values
edges_clouddy = bayesian_blocks(dt,fitness='events', p0=0.01)
hist2,bin_edge2 = np.histogram(dt, bins=edges_clouddy)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(12, 6),layout="constrained")
ax.plot(edges_clearsky,'-or',label="clearsky")
ax.plot(bin_edge1,'-.r')
ax.plot(edges_clouddy,'-ob',label="clouddy")
ax.plot(bin_edge2,'-.b')
ax.set_xlabel("bin number")
ax.set_ylabel("bin-edge (hours)")
ax.set_title("Histogram binning with. bayesian block")
ax.legend()

ax2 = ax.twinx()
ax2.step(hist1,'r',label="clearsky")
ax2.step(hist2,'b',label="clouddy")
ax2.legend()
ax2.set_ylabel("counts per bin")

In [None]:
fig, axs = plt.subplots(2, 1, figsize=(12, 8),layout="constrained")
ax1,ax2 = axs.flatten()
dt = df_clearsky['dt'].values
edges_clearsky = bayesian_blocks(dt,fitness='events', p0=0.01)
hist1,bin_edge1 = np.histogram(dt, bins=edges_clearsky)
ax1.hist(dt,bins=edges_clearsky,color='r',label="clearsky",histtype="step",lw=3);
ax2.hist(dt,bins=edges_clearsky,color='r',label="clearsky",histtype="step",lw=3);
dt = df_clouddy['dt'].values
edges_clouddy = bayesian_blocks(dt,fitness='events', p0=0.01)
hist2,bin_edge2 = np.histogram(dt, bins=edges_clouddy)
ax1.hist(dt,bins=edges_clouddy,color='b',label="clouddy",histtype="step",lw=3);
ax2.hist(dt,bins=edges_clouddy,color='b',label="clouddy",histtype="step",lw=3);
ax1.legend()
ax2.legend()
ax2.set_yscale("log")

ax1.set_title("counts per bin")
ax2.set_xlabel("$\Delta t$ (hours)")
plt.suptitle("Histogram with Bayesian blocks",fontsize=20)

## Binning Using astropy histograms with Blocks

In [None]:
dt = df_clearsky['dt'].values
hist_clearsky, bin_edges_clearsky = histogram(dt, bins='blocks')
dt = df_clouddy['dt'].values
hist_clouddy, bin_edges_clouddy = histogram(dt, bins='blocks')

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(12, 6),layout="constrained")
ax.plot(bin_edges_clearsky,'-or',label="clearsky")
ax.plot(bin_edges_clouddy,'-ob',label="clouddy")
ax.set_xlabel("bin number")
ax.set_ylabel("bin-edge (hours)")
ax.set_title("Histogram binning with histrogram blocks")
ax.legend()

ax2 = ax.twinx()
ax2.step(hist_clearsky,'r',label="clearsky")
ax2.step(hist_clouddy,'b',label="clouddy")
ax2.legend()
ax2.set_ylabel("counts per bin")

In [None]:
fig, axs = plt.subplots(2, 1, figsize=(12, 8),layout="constrained")
ax1,ax2 = axs.flatten()
dt = df_clearsky['dt'].values
hist_clearsky, bin_edges_clearsky = histogram(dt, bins='blocks')
ax1.hist(dt,bins=bin_edges_clearsky,color='r',label="clearsky",histtype="step",lw=3);
ax2.hist(dt,bins=bin_edges_clearsky,color='r',label="clearsky",histtype="step",lw=3);
dt = df_clouddy['dt'].values
hist_clouddy, bin_edges_clouddy = histogram(dt, bins='blocks')
ax1.hist(dt,bins=bin_edges_clouddy,color='b',label="clouddy",histtype="step",lw=3);
ax2.hist(dt,bins=bin_edges_clouddy,color='b',label="clouddy",histtype="step",lw=3);
ax1.legend()
ax2.legend()
ax2.set_yscale("log")
ax1.set_title("counts per bin")
ax2.set_xlabel("$\Delta t$ (hours)")
plt.suptitle("Histogram with astropy histogram blocks",fontsize=20)

## Compute the function generating the bin num and the bin center

### Digitize compute the bin number

In [None]:
dt1 = df_clearsky['dt'].values
edges_clearsky = bayesian_blocks(dt1,fitness='events', p0=0.01)
bin_numbers_clearsky = np.digitize(dt1, bins=edges_clearsky, right=False)
dt2 = df_clouddy['dt'].values
edges_clouddy = bayesian_blocks(dt2,fitness='events', p0=0.01)
bin_numbers_clouddy = np.digitize(dt2, bins=edges_clouddy, right=False)

In [None]:
print(f"clearsky : number of bins {len(edges_clearsky)} bin_numbers_clearsky")
print(f"\t binnum : min-max = {bin_numbers_clearsky.min()} - {bin_numbers_clearsky.max()} ")

In [None]:
print(f"clouddy number of bins {len(edges_clouddy)} bin_numbers_clouddy")
print(f"binnum-min = {bin_numbers_clouddy.min()} - {bin_numbers_clouddy.max()} ")

In [None]:
np.digitize([.0,60./3600.], bins=edges_clearsky, right=False)

In [None]:
np.digitize([.0], bins=edges_clouddy, right=False)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(12, 6),layout="constrained")
ax.scatter(dt1,bin_numbers_clearsky,marker='.',color="r",label="clearsky")
ax.scatter(dt2,bin_numbers_clouddy,marker='.',color="b",label="clouddy")
ax.set_xlabel("$\Delta t$ (hours)")
ax.set_ylabel("bin-number")
ax.set_title("mapping $\Delta t$ to bin-number")
ax.legend()
for x in edges_clearsky:
    ax.axvline(x,ls='-',color="r",linewidth=0.3)
for x in edges_clouddy:
    ax.axvline(x,ls='-',color="b",linewidth=0.3)

xcenters = (edges_clearsky[:-1] + edges_clearsky[1:]) / 2
for x in xcenter:
    ax.axvline(x,ls='-',color="r",linewidth=0.3)

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(12, 6),layout="constrained")
ax1,ax2 = axs.flatten()
ax1.scatter(dt1*3600,bin_numbers_clearsky,marker='.',color="r",label="clearsky")
ax1.scatter(dt2*3600,bin_numbers_clouddy,marker='.',color="b",label="clouddy")
ax1.set_xlabel("$\Delta t$ (seconds)")
ax1.set_ylabel("bin-number")
ax1.set_title("mapping $\Delta t$ to bin-number")
ax1.legend()
for x in edges_clearsky:
    ax1.axvline(x*3600,ls='-',color="r",linewidth=0.3)
for x in edges_clouddy:
    ax1.axvline(x*3600,ls='-',color="b",linewidth=0.3)
ax1.set_xlim(0.,0.1*3600)
ax1.grid()

ax2.scatter(dt1*3600,bin_numbers_clearsky,marker='.',color="r",label="clearsky")
ax2.scatter(dt2*3600,bin_numbers_clouddy,marker='.',color="b",label="clouddy")
ax2.set_xlabel("$\Delta t$ (seconds)")
ax2.set_ylabel("bin-number")
ax2.set_title("mapping $\Delta t$ to bin-number")
ax2.legend()
for x in edges_clearsky:
    ax2.axvline(x*3600,ls='-',color="r",linewidth=0.3)
for x in edges_clouddy:
    ax2.axvline(x*3600,ls='-',color="b",linewidth=0.3)
ax2.set_xlim(20000. ,10*3600)



In [None]:
len(edges_clearsky)

In [None]:
edges_clearsky

In [None]:
np.roll(edges_clearsky,-1)

In [None]:
xcenters = (edges_clearsky[:-1] + edges_clearsky[1:]) / 2

In [None]:
xcenters