# OZ02:Explore hologram data Repeatability on Ozone : Update March 2025 about selection

- author Sylvie Dagoret-Campagne
- creation date 2025-09-16 : version v1
- affiliation : IJCLab
- Kernel @usdf **w_2025_36*
- Home emac : base (conda)
- laptop : conda_py310

In [None]:
from platform import python_version
print(python_version())

In [None]:
import warnings
warnings.resetwarnings()
warnings.simplefilter('ignore')

In [None]:
from platform import python_version
print(python_version())

In [None]:
import os

In [None]:
# where are stored the figures
pathfigs = "figs_Ozone02_Repeatability"
if not os.path.exists(pathfigs):
    os.makedirs(pathfigs) 
figtype = ".png"

In [None]:
import numpy as np
from numpy.linalg import inv
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib.colors import LogNorm,SymLogNorm
from matplotlib.patches import Circle,Annulus
from astropy.visualization import ZScaleInterval
props = dict(boxstyle='round', facecolor="white", alpha=0.1)
#props = dict(boxstyle='round')

import matplotlib.colors as colors
import matplotlib.cm as cmx

import matplotlib.ticker                         # here's where the formatter is
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)

from matplotlib.gridspec import GridSpec

from astropy.visualization import (MinMaxInterval, SqrtStretch,ZScaleInterval,PercentileInterval,
                                   ImageNormalize,imshow_norm)
from astropy.visualization.stretch import SinhStretch, LinearStretch,AsinhStretch,LogStretch

from astropy.io import fits
from astropy.wcs import WCS
from astropy import units as u
from astropy import constants as c

from astropy.coordinates.earth import EarthLocation
from datetime import datetime
from pytz import timezone

from scipy import interpolate
from sklearn.neighbors import NearestNeighbors
from sklearn.neighbors import KDTree, BallTree

import pandas as pd
pd.set_option("display.max_columns", None)
pd.set_option('display.max_rows', 100)

import matplotlib.ticker                         # here's where the formatter is
import os
import re
import pandas as pd
import pickle
from collections import OrderedDict

plt.rcParams["figure.figsize"] = (4,3)
plt.rcParams["axes.labelsize"] = 'xx-large'
plt.rcParams['axes.titlesize'] = 'xx-large'
plt.rcParams['xtick.labelsize']= 'xx-large'
plt.rcParams['ytick.labelsize']= 'xx-large'

import scipy
from scipy.optimize import curve_fit,least_squares


# new color correction model
import pickle
from scipy.interpolate import RegularGridInterpolator

In [None]:
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)

from astropy.visualization import (MinMaxInterval, SqrtStretch,ZScaleInterval,PercentileInterval,
                                   ImageNormalize,imshow_norm)
from astropy.visualization.stretch import SinhStretch, LinearStretch,AsinhStretch,LogStretch

from astropy.time import Time


In [None]:
# Remove to run faster the notebook
#import ipywidgets as widgets
#%matplotlib widget

In [None]:
from importlib.metadata import version

In [None]:
# wavelength bin colors
#jet = plt.get_cmap('jet')
#cNorm = mpl.colors.Normalize(vmin=0, vmax=NSED)
#scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet)
#all_colors = scalarMap.to_rgba(np.arange(NSED), alpha=1)

In [None]:
np.__version__

In [None]:
pd.__version__

### quartiles

In [None]:
def q50(x):
    return x.quantile(0.5)

# 90th Percentile
def q90(x):
    return x.quantile(0.9)

def q25(x):
    return x.quantile(0.25)

def q75(x):
    return x.quantile(0.75)

def iqr(x):
    irq =  q75(x)-q25(x)
    return irq

#https://en.wikipedia.org/wiki/Interquartile_range    
#SD = IQR/1.35. for a normal distribution
def std_iqr(x):
    return iqr(x)/1.349
    
#df.groupby("AGGREGATE").quantile([0, 0.25, 0.5, 0.75, 0.95, 1])
#df.groupby("AGGREGATE").agg(("YOUR_COL_NAME", lambda x: x.quantile(0.5))

In [None]:
1.35/2

In [None]:
def convertNumToDatestr(num):
    year = num//10_000
    month= (num-year*10_000)//100
    day = (num-year*10_000-month*100)

    year_str = str(year).zfill(4)
    month_str = str(month).zfill(2)
    day_str = str(day).zfill(2)
    
    datestr = f"{year_str}-{month_str}-{day_str}"
    return pd.to_datetime(datestr)

### Configuration

In [None]:
observing_location = EarthLocation.of_site('Rubin Observatory')
tz = timezone('America/Santiago')

In [None]:
FLAG_WITHCOLLIMATOR = False
DATE_WITHCOLLIMATOR = 20230930
datetime_WITHCOLLIMATOR = convertNumToDatestr(DATE_WITHCOLLIMATOR)
datetime_WITHCOLLIMATOR = pd.to_datetime("2023-09-30 00:00:00.0+0000")
datetime_WITHCOLLIMATOR

In [None]:
version_results = "v1"
legendtag = {"v1" : "v3.1.0"}

In [None]:
atmfilenamesdict = {"v1":"../2025-06-26-SpectractorExtraction-FromButler/data/spectro/auxtel_atmosphere_20250912a_repomain_v1.npy",}

In [None]:
atmfilename = atmfilenamesdict[version_results]
tag = legendtag[version_results] 

## Initialisation

### Read the file

In [None]:
specdata = np.load(atmfilename,allow_pickle=True)

In [None]:
df_spec = pd.DataFrame(specdata)

In [None]:
#list(df_spec.columns)

### Compute NightObs

In [None]:
df_spec["nightObs"] = df_spec.apply(lambda x: x['id']//100_000, axis=1)

In [None]:
if FLAG_WITHCOLLIMATOR:
    df_spec = df_spec[df_spec["nightObs"]> DATE_WITHCOLLIMATOR]
    

#### Series on spec

In [None]:
ser_spec_size = df_spec.groupby(["nightObs"]).size()

In [None]:
fig,ax = plt.subplots(1,1)
ax.hist(ser_spec_size.values,bins=50,facecolor="b")
ax.set_title("nb obs per night")
ax.set_xlabel("Nobs/night")

##### Make 3 series

In [None]:
ser_CHI2_FIT = df_spec[["CHI2_FIT","nightObs"]].groupby(["nightObs"]).agg(['count','min', 'max','mean','std','median'])
ser_O3 = df_spec[["ozone [db]_x","nightObs"]].groupby(["nightObs"]).agg(['count','min', 'max','mean','std','median'])
ser_O3_CHI2_FIT = df_spec[["ozone [db]_x","CHI2_FIT","nightObs"]].groupby(["nightObs"]).agg(['count','min', 'max','mean','std','median'])

### Plot Ozone and Chi2 from sereis before any selection

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,3))
ser_O3.unstack()["ozone [db]_x"]["count"].plot(kind='bar', ax=ax,subplots=False, rot=90,figsize=(18,4),facecolor="b",grid=True,title="Number of measurements per night")
plt.tight_layout()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,3))
ser_O3.unstack()["ozone [db]_x"]["mean"].plot(kind='bar',ax=ax ,subplots=False, rot=90,figsize=(18,4),facecolor='b',grid=True,title="Mean PWV per night")
plt.tight_layout()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,3))
ser_O3.unstack()["ozone [db]_x"]["median"].plot(kind='bar',ax=ax ,subplots=False, rot=90,figsize=(18,4),facecolor='b',grid=True,title="Median PWV per night")
plt.tight_layout()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,3))
ser_O3.unstack()["ozone [db]_x"]["std"].plot(kind='bar', subplots=False, rot=90,figsize=(18,4),facecolor='b',grid=True,title="STD variation for PWV per night")
plt.tight_layout()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,3))
ser_CHI2_FIT.unstack()["CHI2_FIT"]["count"].plot(kind='bar', ax=ax,subplots=False, rot=90,figsize=(18,4),facecolor="r",grid=True,title="Number of measurements per night")
plt.tight_layout()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,3))
ser_CHI2_FIT.unstack()["CHI2_FIT"]["mean"].plot(kind='bar',ax=ax ,subplots=False, rot=90,figsize=(18,4),facecolor='r',grid=True,title="Mean CHI2 per night")
plt.tight_layout()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,3))
ser_CHI2_FIT.unstack()["CHI2_FIT"]["median"].plot(kind='bar',ax=ax ,subplots=False, rot=90,figsize=(18,4),facecolor='r',grid=True,title="Median CHI2 per night")
plt.tight_layout()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,3))
ser_CHI2_FIT.unstack()["CHI2_FIT"]["std"].plot(kind='bar',ax=ax ,subplots=False, rot=90,figsize=(18,4),facecolor='r',grid=True,title="STD variation CHI2 per night")
plt.tight_layout()

In [None]:
ser_O3_CHI2_FIT.loc[20231010,:]

In [None]:
df_unstack = ser_O3_CHI2_FIT.loc[20231010,:].unstack()
df_unstack

In [None]:
ser_O3_CHI2_FIT.loc[20231010,"ozone [db]_x"]["count"]

In [None]:
df_unstack.loc["ozone [db]_x","count"]

#### Add aggregate data added to pandas dataframe

In [None]:
def FillAgreggates(row):
    the_nightObs = row["nightObs"]
    df_night = ser_O3_CHI2_FIT.loc[the_nightObs,:].unstack()
    count = df_night.loc["ozone [db]_x","count"]
    o3min = df_night.loc["ozone [db]_x","min"]
    o3max = df_night.loc["ozone [db]_x","max"]
    o3mean = df_night.loc["ozone [db]_x","mean"]
    o3median = df_night.loc["ozone [db]_x","median"]
    o3std = df_night.loc["ozone [db]_x","std"]
    chi2min = df_night.loc["CHI2_FIT","min"]
    chi2max = df_night.loc["CHI2_FIT","max"]
    chi2mean = df_night.loc["CHI2_FIT","mean"]
    chi2median = df_night.loc["CHI2_FIT","median"]
    chi2std = df_night.loc["CHI2_FIT","std"]
    
    d = {"_count":count,"_o3min":o3min,"_o3max":o3max,"_o3mean":o3mean,"_o3median":o3median,"_o3std":o3std,
        "_chi2min":chi2min,"_chi2max":chi2max,"_chi2mean":chi2mean,"_chi2median":chi2median,"_chi2std":chi2std}
    
    ser = pd.Series(data=d)

    return ser

In [None]:
row = df_spec.iloc[0]

In [None]:
row

In [None]:
FillAgreggates(row)

#### join dataframe + aggregates

In [None]:
df_spec = df_spec.join(df_spec.apply(FillAgreggates,axis=1,result_type="expand"))

## Apply Quality selection

In [None]:
fig,ax = plt.subplots(1,1)
df_spec["CHI2_FIT"].hist(bins=50,ax=ax,range=(0,200))
ax.set_yscale("log")

### Add the Time in pd.datetime

In [None]:
df_spec["Time"] = pd.to_datetime(df_spec["DATE-OBS"])

## Compute night boundaries

In [None]:
def GetNightBoundariesDict(df_spec):
    """
    input:
      df_spec the dataframe for spectroscopy summary results
    output:
      the dict of night boudaries
    """
    
    Dt = pd.Timedelta(minutes=30)
    d = {}
    list_of_nightobs = df_spec["nightObs"].unique()
    for nightobs in list_of_nightobs:
        sel_flag = df_spec["nightObs"]== nightobs
        df_night = df_spec[sel_flag]
        tmin = df_night["Time"].min()-Dt
        tmax = df_night["Time"].max()+Dt
        d[nightobs] = (tmin,tmax)
    return d

In [None]:
dn = GetNightBoundariesDict(df_spec)

## Plot all data

In [None]:
from matplotlib.dates import DateFormatter
date_form = DateFormatter("%y-%m-%dT%H:%M")
fig,axs = plt.subplots(1,1,figsize=(14,6))
ax  = axs
leg=ax.get_legend()
df_spec.plot(x="Time",y="ozone [db]_x",ax=ax,marker='+',c="r",lw=0.0,grid=True,label=tag,legend=leg)
ax.set_ylabel("ozone [db]_x")

ax.set_xlabel("time")
ax.xaxis.set_major_formatter(date_form)
ax.set_title("Ozone meas by holo vs time")

for key, tt in dn.items():
    ax.axvspan(tt[0],tt[1], color='blue', alpha=0.1)

plt.tight_layout()

## Apply Quality selection cuts

In [None]:
def getSelectionCutVeryOld(df_spec, chi2max=20., pwvmin=0.1, pwvmax = 14.9):
    cut =  (df_spec["CHI2_FIT"]<chi2max) & (df_spec["PWV [mm]_x"] > pwvmin) & (df_spec["PWV [mm]_x"] < pwvmax) 
    return cut

In [None]:
def getSelectionCutOld(df_spec, chi2max=20., pwvmin=0.1, pwvmax = 14.9,ozmin=100.,ozmax=600.):
    cut =  (df_spec["CHI2_FIT"]<chi2max) & (df_spec["PWV [mm]_x"] > pwvmin) & (df_spec["PWV [mm]_x"] < pwvmax) & (df_spec["D2CCD"]>186.5) &  (df_spec["D2CCD"]<187.3) & \
    (df_spec['EXPTIME'] > 20.) & (df_spec["PWV [mm]_y"] > pwvmin) & (df_spec["PWV [mm]_y"] < pwvmax) & \
    (df_spec["ozone [db]_y"] > ozmin) & (df_spec["ozone [db]_y"] < ozmax) 
    return cut

In [None]:
def getSelectionCutOld2(df_spec, chi2max=20., pwvmin=0.1, pwvmax = 14.9,ozmin=100.,ozmax=600.,vaodmax=0.1):
    #cut =  (df_spec["CHI2_FIT"]<chi2max) & (df_spec["PWV [mm]_x"] > pwvmin) & (df_spec["PWV [mm]_x"] < pwvmax) & (df_spec["D2CCD"]>186.5) &  (df_spec["D2CCD"]<187.3) & \
    cut =  (df_spec["CHI2_FIT"]<chi2max) & (df_spec["PWV [mm]_x"] > pwvmin) & (df_spec["PWV [mm]_x"] < pwvmax) & (df_spec["D2CCD"]>186.75) &  (df_spec["D2CCD"]<187.75) &  (df_spec["VAOD_x"]<0.3) & \
    (df_spec['EXPTIME'] > 20.) & (df_spec["PWV [mm]_y"] > pwvmin) & (df_spec["PWV [mm]_y"] < pwvmax) & \
    (df_spec["ozone [db]_y"] > ozmin) & (df_spec["ozone [db]_y"] < ozmax) 
    return cut

In [None]:
#def getSelectionCut(df_spec, chi2max=20., pwvmin=0.1, pwvmax = 14.9,ozmin=100.,ozmax=600.):
def getSelectionCut(df_spec, chi2max=200., pwvmin=0, pwvmax = 16,ozmin=0.,ozmax=650.):
    cut =  (df_spec["CHI2_FIT"]<chi2max) & (df_spec["ozone [db]_x"] > ozmin) & (df_spec["ozone [db]_x"] < ozmax) & (df_spec["D2CCD"]>186.0) &  (df_spec["D2CCD"]<189.0)
    #(df_spec["ozone [db]_y"] > ozmin) & (df_spec["ozone [db]_y"] < ozmax) & (df_spec["TARGET"] == "HD185975")
    return cut

In [None]:
cut = getSelectionCut(df_spec)

In [None]:
df_spec_sel = df_spec[cut].drop(labels=['_count', '_o3min', '_o3max', '_o3mean', '_o3median', '_o3std','_chi2min', '_chi2max', '_chi2mean', '_chi2median', '_chi2std'],axis=1)

In [None]:
df_spec_sel.reset_index(drop=True,inplace=True)

## Compute per-night aggregates

### Compute series per night

In [None]:
ser_O3_CHI2_FIT_sel = df_spec_sel[["ozone [db]_x","CHI2_FIT","nightObs"]].groupby(["nightObs"]).agg(['count','min', 'max','mean','std','median'])

In [None]:
def FillAgreggatesSel(row):
    the_nightObs = row["nightObs"]
    df_night = ser_O3_CHI2_FIT_sel.loc[the_nightObs,:].unstack()
    count = df_night.loc["ozone [db]_x","count"]
    o3min = df_night.loc["ozone [db]_x","min"]
    o3max = df_night.loc["ozone [db]_x","max"]
    o3mean = df_night.loc["ozone [db]_x","mean"]
    o3median = df_night.loc["ozone [db]_x","median"]
    o3std = df_night.loc["ozone [db]_x","std"]
    chi2min = df_night.loc["CHI2_FIT","min"]
    chi2max = df_night.loc["CHI2_FIT","max"]
    chi2mean = df_night.loc["CHI2_FIT","mean"]
    chi2median = df_night.loc["CHI2_FIT","median"]
    chi2std = df_night.loc["CHI2_FIT","std"]
    
    d = {"_count":count,"_o3min":o3min,"_o3max":o3max,"_o3mean":o3mean,"_o3median":o3median,"_o3std":o3std,
        "_chi2min":chi2min,"_chi2max":chi2max,"_chi2mean":chi2mean,"_chi2median":chi2median,"_chi2std":chi2std}
    
    ser = pd.Series(data=d)

    return ser

In [None]:
df_spec_sel = df_spec_sel.join(df_spec_sel.apply(FillAgreggatesSel,axis=1,result_type="expand"))

In [None]:
TMIN_sel = df_spec_sel["nightObs"].min()
TMAX_sel = df_spec_sel["nightObs"].max()

## Recompute night boundaries

In [None]:
dn = GetNightBoundariesDict(df_spec_sel)

## Plot all data

In [None]:
from matplotlib.dates import DateFormatter
date_form = DateFormatter("%y-%m-%dT%H:%M")
fig,axs = plt.subplots(1,1,figsize=(14,6))
ax  = axs
leg=ax.get_legend()

df_spec_sel.plot(x="Time",y="ozone [db]_x",ax=ax,marker='+',c="r",lw=0.0,grid=True,label=tag,legend=leg,ms=10)
ax.set_ylabel("ozone [db]_x")

ax.set_xlabel("time")
ax.xaxis.set_major_formatter(date_form)
ax.set_title("Ozone measured by holo selected vs time")

for key, tt in dn.items():
    ax.axvspan(tt[0],tt[1], color='blue', alpha=0.1)

plt.tight_layout()
figname =f"{pathfigs}/ozone_allpoints_allnights"+figtype
plt.savefig(figname)
plt.show()

## Plot series on selected data

In [None]:
ser_CHI2_FIT_sel = df_spec_sel[["CHI2_FIT","nightObs"]].groupby(["nightObs"]).agg(['count','min', 'max','mean','std','median'])
ser_O3_sel = df_spec_sel[["ozone [db]_x","nightObs"]].groupby(["nightObs"]).agg(['count','min', 'max','mean','std','median'])

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,3))
ser_O3_sel.unstack()["ozone [db]_x"]["count"].plot(kind='bar', ax=ax,subplots=False, rot=90,figsize=(18,4),facecolor="b",grid=True,title="Number of measurements per night after selection")
plt.tight_layout()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,3))
ser_O3_sel.unstack()["ozone [db]_x"]["mean"].plot(kind='bar',ax=ax ,subplots=False, rot=90,figsize=(18,4),facecolor='b',grid=True,title="Mean PWV per night after selection")
plt.tight_layout()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,3))
ser_O3_sel.unstack()["ozone [db]_x"]["median"].plot(kind='bar',ax=ax ,subplots=False, rot=90,figsize=(18,4),facecolor='b',grid=True,title="Median PWV per night after selection")
plt.tight_layout()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,3))
ser_O3_sel.unstack()["ozone [db]_x"]["std"].plot(kind='bar', subplots=False, rot=90,figsize=(18,4),facecolor='b',grid=True,title="STD variation for PWV per night after selection")
plt.tight_layout()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,3))
ser_CHI2_FIT_sel.unstack()["CHI2_FIT"]["mean"].plot(kind='bar',ax=ax ,subplots=False, rot=90,figsize=(18,4),facecolor='r',grid=True,title="Mean CHI2 per night after selection")
plt.tight_layout()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,3))
ser_CHI2_FIT_sel.unstack()["CHI2_FIT"]["median"].plot(kind='bar',ax=ax ,subplots=False, rot=90,figsize=(18,4),facecolor='r',grid=True,title="Median CHI2 per night after selection")
plt.tight_layout()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,3))
ser_CHI2_FIT_sel.unstack()["CHI2_FIT"]["std"].plot(kind='bar',ax=ax ,subplots=False, rot=90,figsize=(18,4),facecolor='r',grid=True,title="STD variation CHI2 per night after selection")
plt.tight_layout()

## Show time variation each night

In [None]:
all_selected_nights = df_spec_sel["nightObs"].unique()

In [None]:
def funclineres(params, x, y, yerr):
    # Return residual = fit-observed
    return (y-params[0] -params[1]*x)/yerr
def funcline(params,x):
    return params[0] + params[1]*x

In [None]:
def MakeLineFit(df_night_pwv_curve):
    """
    """

    x = df_night_pwv_curve["dt"].values
    y = df_night_pwv_curve["ozone [db]_x"].values
    yerr = df_night_pwv_curve["ozone [db]_err_x"].values
    n = len(y)
        
    #popt, pcov = optimize.curve_fit(f, x, y, [1,-4])
    fit_res = least_squares(funclineres,[5.,0],args = (x,y,yerr))
    popt = fit_res.x 
    npar = len(popt)
    J = fit_res.jac
    cov = np.linalg.inv(J.T.dot(J))
    chi2dof = ((funclineres(popt,x,y,yerr))**2).sum()/(n-npar)
    cov *= chi2dof
    perr = np.sqrt(np.diagonal(cov)) 
    
    xfit = np.linspace(x.min()*0.99,x.max()*1.05)
    yfit = funcline(popt,xfit)
        
    slope = popt[1]
    slope_err = perr[1]

    return x,y,yerr,n,chi2dof,xfit,yfit,slope,slope_err

### Plot night by night

In [None]:
all_dateObs_sel = {}

# loop on nights
for night in all_selected_nights:
    #select the night
    df_spec_night = df_spec_sel[df_spec_sel["nightObs"] == night]
    
    #select the variables
    df_night_o3_curve = df_spec_night[["Time","ozone [db]_x","ozone [db]_err_x"]]

    tmin = df_night_o3_curve["Time"].min()

    # convert in hours
    df_night_o3_curve["dt"] = (df_night_o3_curve["Time"] - tmin).dt.total_seconds()/3600.
    
    
    stat = df_night_o3_curve[["ozone [db]_x"]].describe()
    date_form = DateFormatter("%y-%m-%dT%H:%M")
    count = int(stat.loc["count"].values[0])
    mean = stat.loc["mean"].values[0]
    median = stat.loc["50%"].values[0]
    std = stat.loc["std"].values[0]
    textstr = "\n".join((f"count : {count}",
                     f"mean : {mean:.1f} mm",
                     f"median : {median:.1f} mm",
                     f"std : {std:.1f} mm",
                    ))


    

    N= len(df_night_o3_curve)

    if N>10:
        # does the fit
        
        x,y,yerr,n,chi2dof,xfit,yfit,slope,slope_err = MakeLineFit(df_night_o3_curve)
        
        textstr2 = "\n".join((f"d(Ozone/dt) : ",
                     f"slope : {slope:.3f} DU/h ",
                     f"slope err : {slope_err:.3f} DU/h",
                     f"chi2/ndeg : {chi2dof:.2f}",       
                    ))

        textstr3 = "\n".join((f"d(Ozone/dt) : ",
                     f"slope : {slope:.3f} DU/h ",
                     f"slope err : {slope_err:.3f} DU/h",  
                    ))
    

        stat.loc["slope","ozone [db]_x"] = slope 
        stat.loc["slope_err","ozone [db]_err_x"] = slope_err 
        stat.loc["chi2","ozone [db]_x"] = chi2dof
        
        # plot
        
        fig = plt.figure(figsize=(16,6))
        gs = GridSpec(1, 2,  width_ratios=[2,1],figure=fig)

        # left figure
        ax = fig.add_subplot(gs[0])
        ax2 = fig.add_subplot(gs[1],sharey=ax)
        #ax2 = fig.add_subplot(gs[1])
        
        df_night_o3_curve.plot(x="Time",y="ozone [db]_x",ax=ax,marker='+',c="r",lw=0.0,grid=True,legend=leg,label=tag,ms=10)
        ax.errorbar(x=df_night_o3_curve.Time, y=df_night_o3_curve["ozone [db]_x"],yerr=df_night_o3_curve["ozone [db]_err_x"],fmt=".",color="r",ecolor="k")
        
        ax.legend(loc="upper right")
        ax.set_ylabel("ozone [db]_x")
        ax.set_xlabel("time")
        ax.xaxis.set_major_formatter(date_form)
        ax.set_ylim(0.,600.)
        ax.text(0.03, 0.95, textstr, transform=ax.transAxes, fontsize=14,
        verticalalignment='top', bbox=props)
        ax.text(0.35, 0.95, textstr3, transform=ax.transAxes, fontsize=14,
        verticalalignment='top', bbox=props)
        ax.set_title(f"night {night}")
        
        # right figure
        ax2.errorbar(x=df_night_o3_curve.dt, y=df_night_o3_curve["ozone [db]_x"],yerr=df_night_o3_curve["ozone [db]_err_x"],fmt="+",color="r",ecolor="k",ms=10,label=tag)
        ax2.set_xlabel("hours")
        ax2.set_ylim(0.,600.)
        ax2.grid()
        ax2.legend(loc="upper right")
        ax2.plot(xfit,yfit,"k-")
        ax2.text(0.45, 0.95, textstr2, transform=ax2.transAxes, fontsize=14,
        verticalalignment='top', bbox=props)
        ax2.set_title(f"night {night}")
        
        plt.tight_layout()
        figname =f"{pathfigs}/o3_per_night_{night}"+figtype
        plt.savefig(figname)
        plt.show()

In [None]:
df_spec_night = df_spec_sel[df_spec_sel["nightObs"] == 20240923]
#select the variables
df_night_o3_curve = df_spec_night[["nightObs","Time","ozone [db]_x","ozone [db]_err_x","ozone [db]_y","ozone [db]_err_y","TARGET"]]
tmin = df_night_o3_curve["Time"].min()
df_night_o3_curve["dt"] = (df_night_o3_curve["Time"] - tmin).dt.total_seconds()/3600.

In [None]:
df_night_o3_curve.iloc[0]

In [None]:
def ComputeRepeatability(df):
    """
    Compute Repeatability of O3 for Spectrogram and and Spectrum
    
    """
    N = len(df)
    dfout = pd.DataFrame(index=df.index,columns = ["nightObs","dt","dt_rep","dO3x_rep","dO3y_rep","targflag_rep","Npoints"])
    dfout["targflag_rep"].astype(bool)
    #dfout["Npoints"].astype(int)
    
    target_old = "No"
    time_old = 0.
    o3x_old = 0.
    o3y_old = 0.
    
    for index in range(N):
        
        nightObs =  df.iloc[index]["nightObs"]
        
        if index ==0:
            dt0 = df.iloc[index]["dt"]
            dfout.iloc[index] = [ nightObs,dt0, 0., 0., 0., False,N]
        else:
            target_new = df.iloc[index]["TARGET"]
            time_new = df.iloc[index]["dt"]
            o3x_new = df.iloc[index]["ozone [db]_x"]
            o3y_new = df.iloc[index]["ozone [db]_y"]
            
            flag_target = (target_new == target_old)
            dO3x_rep = o3x_new - o3x_old
            dO3y_rep = o3y_new - o3y_old
            dt_rep = (time_new-time_old)*3600. # in seconds
            
            dfout.iloc[index] = [ nightObs,time_new, dt_rep, dO3x_rep, dO3y_rep, flag_target,N]
        
        target_old = df.iloc[index]["TARGET"]
        time_old = df.iloc[index]["dt"]
        o3x_old = df.iloc[index]["ozone [db]_x"]
        o3y_old = df.iloc[index]["ozone [db]_y"]
        
    return dfout

In [None]:
ComputeRepeatability(df_night_o3_curve).head()

In [None]:
all_df_repeatability = []

# loop on nights
for night in all_selected_nights:
    # Select the night
    df_spec_night = df_spec_sel[df_spec_sel["nightObs"] == night]
    
    # Select the variables
    df_night_o3_curve = df_spec_night[["nightObs","Time","ozone [db]_x","ozone [db]_err_x","ozone [db]_y","ozone [db]_err_y","TARGET"]]

    tmin = df_night_o3_curve["Time"].min()

    # Convert in hours
    df_night_o3_curve["dt"] = (df_night_o3_curve["Time"] - tmin).dt.total_seconds()/3600.
    
    # Compute the repeatability on PWV x and y
    df_rep = ComputeRepeatability(df_night_o3_curve)
    
    # Keep all repeatability
    all_df_repeatability.append(df_rep)
    
    cut_on_dt = (df_rep["dt_rep"]>0.) & (df_rep["dt_rep"]< 120.)   
    cut_on_target = df_rep["targflag_rep"]
    
    cut_loose = cut_on_dt
    cut_strong = cut_on_dt & cut_on_target 
        
    # plot  
    fig,(ax1,ax2,ax3) = plt.subplots(1,3,figsize=(12,4))
    
    leg1 = ax1.get_legend()
    leg2 = ax2.get_legend()
    
      
    df_rep[cut_strong]["dO3x_rep"].hist(ax=ax1,bins=60,range=(-3,3),facecolor="b",legend=leg1,label="Same target")
    df_rep[cut_strong]["dO3y_rep"].hist(ax=ax2,bins=60,range=(-3,3),facecolor="b")
    
    df_rep[cut_loose]["dO3x_rep"].hist(ax=ax1,bins=60,range=(-3,3),color="r",lw=3,histtype="step")
    df_rep[cut_loose]["dO3y_rep"].hist(ax=ax2,bins=60,range=(-3,3),color="r",lw=3,histtype="step")
    
    ax1.set_xlabel("O3 (DU)")
    ax1.set_title("Spectrogram")
    ax2.set_xlabel("O3 (DU)")
    ax2.set_title("1D-Spectrum")
    
    df_rep[cut_strong].plot.scatter(x="dO3x_rep",y="dO3y_rep",marker='.',c="b",ax=ax3)
    ax3.set_xlim(-3,3.)
    ax3.set_ylim(-3,3.)
    ax3.grid()
    ax3.set_xlabel("O3 (2D) (DU)")
    ax3.set_ylabel("O3 (1D) (DU)")
    
    
    title = f"Ozone repeatability for Night {night}"
    plt.suptitle(title) 
    plt.tight_layout()
    #figname =f"{pathfigs}/pwv_per_night_{night}"+figtype
    #    plt.savefig(figname)
    plt.show()

       

## Merge all repeatability dataFrame

In [None]:
df_allrep = pd.concat(all_df_repeatability)

In [None]:
df_allrep.head()

## Selection

In [None]:
cut_on_dt = (df_allrep["dt_rep"]>0.) & (df_allrep["dt_rep"]< 120.) & (df_allrep["Npoints"] > 10.)     
cut_on_target = df_allrep["targflag_rep"]
    
cut_loose = cut_on_dt
cut_strong = cut_on_dt & cut_on_target 

In [None]:
dfl = df_allrep[cut_loose][["dt","dt_rep","dO3x_rep","dO3y_rep"]]
dfs = df_allrep[cut_strong][["dt","dt_rep","dO3x_rep","dO3y_rep"]]

### Compute Statistics

- apply statistics on strong criteria

In [None]:
#dfl_stat = dfl.aggregate(["count","mean","std"])
dfl_stat = dfl.aggregate(["count","mean","std",lambda x : std_iqr(x)])

In [None]:
dfs_stat = dfl.aggregate(["count","mean","std",lambda x : std_iqr(x)])

In [None]:
dfs_stat

In [None]:
# strong cut
#dfs_stat = dfs.aggregate(["count","mean","std"])
dfs_stat = dfl.aggregate(["count","mean","std",lambda x : std_iqr(x)])
meanx = dfs_stat.loc["mean","dO3x_rep"]
stdx = dfs_stat.loc["std","dO3x_rep"]
stdx_iqr = dfs_stat.loc["<lambda>","dO3x_rep"]

meany = dfs_stat.loc["mean","dO3y_rep"]
stdy = dfs_stat.loc["std","dO3y_rep"]
stdy_iqr = dfs_stat.loc["<lambda>","dO3y_rep"]

dfs["ddO3x_rep"] = np.abs(dfs["dO3x_rep"] - meanx)
dfs["ddO3y_rep"] = np.abs(dfs["dO3y_rep"] - meany)

dfs_stat = dfs.aggregate(["count","mean","std"])

meanx = dfs_stat.loc["mean","dO3x_rep"]
stdx = dfs_stat.loc["std","dO3x_rep"]
madx =  dfs_stat.loc["mean","ddO3x_rep"]
sigma_madx = np.sqrt(np.pi/2.)*madx

meany = dfs_stat.loc["mean","dO3y_rep"]
stdy = dfs_stat.loc["std","dO3y_rep"]
mady =  dfs_stat.loc["mean","ddO3y_rep"]
sigma_mady = np.sqrt(np.pi/2.)*mady

textstr_o3x = "$\delta_{o3}$ = "+ f"{meanx:.2f}" + "$\pm + $" + f"{stdx:.2f} DU" + "\n" + "$\\sigma_{MAD} = $" + f" {sigma_madx:.2f} DU"
textstr_o3y = "$\delta_{o3}$ = "+ f"{meany:.2f}" + "$\pm + $" + f"{stdy:.2f} DU" + "\n" + "$\\sigma_{MAD} = $" + f" {sigma_mady:.2f} DU"

In [None]:
# strong cut
#dfs_stat = dfs.aggregate(["count","mean","std"])
dfs_stat = dfs.aggregate(["count","mean","std",lambda x : std_iqr(x)])
meanx = dfs_stat.loc["mean","dO3x_rep"]
stdx = dfs_stat.loc["std","dO3x_rep"]
stdx_iqr = dfs_stat.loc["<lambda>","dO3x_rep"]

meany = dfs_stat.loc["mean","dO3y_rep"]
stdy = dfs_stat.loc["std","dO3y_rep"]
stdy_iqr = dfs_stat.loc["<lambda>","dO3y_rep"]

dfs["ddO3x_rep"] = np.abs(dfs["dO3x_rep"] - meanx)
dfs["ddO3y_rep"] = np.abs(dfs["dO3y_rep"] - meany)

#dfs_stat = dfs.aggregate(["count","mean","std"])
dfs_stat = dfs.aggregate(["count","mean","std",lambda x : std_iqr(x)])

meanx = dfs_stat.loc["mean","dO3x_rep"]
stdx = dfs_stat.loc["std","dO3x_rep"]
stdx_iqr = dfs_stat.loc["<lambda>","dO3x_rep"]
madx =  dfs_stat.loc["mean","ddO3x_rep"]
sigma_madx = np.sqrt(np.pi/2.)*madx

meany = dfs_stat.loc["mean","dO3y_rep"]
stdy = dfs_stat.loc["std","dO3y_rep"]
stdy_iqr = dfs_stat.loc["<lambda>","dO3y_rep"]
mady =  dfs_stat.loc["mean","ddO3y_rep"]
sigma_mady = np.sqrt(np.pi/2.)*mady

textstr_o3x = "$\delta_{o3}$ = "+ f"{meanx:.2f}" + "$\pm + $" + f"{stdx:.2f} DU" + "\n" + "$\\sigma_{MAD} = $" + f" {sigma_madx:.2f} DU"
textstr_o3y = "$\delta_{o3}$ = "+ f"{meany:.2f}" + "$\pm + $" + f"{stdy:.2f} DU" + "\n" + "$\\sigma_{MAD} = $" + f" {sigma_mady:.2f} DU"

In [None]:
textstr_o3x = "$\delta_{o3}$ = "+ f"{meanx:.2f}" + "$\pm + $" + f"{stdx:.2f} DU" + "\n" + "$\\sigma_{MAD} = $" + f" {sigma_madx:.2f} DU" + "\n" + "$RMS = $" + f" {stdx:.2f} DU" + "\n" + "$\sigma_{IQR} = $" + f" {stdx_iqr:.2f} DU"
textstr_o3y = "$\delta_{o3}$ = "+ f"{meany:.2f}" + "$\pm + $" + f"{stdy:.2f} DU" + "\n" + "$\\sigma_{MAD} = $" + f" {sigma_mady:.2f} DU" + "\n" + "$RMS = $" + f" {stdy:.2f} DU"

In [None]:
dfs_stat

In [None]:
dfl_stat

## Plot repeatability

In [None]:
# plot  
fig,ax1 = plt.subplots(1,1,figsize=(5,4),layout='constrained')
    
leg1 = ax1.get_legend()
     
df_allrep[cut_strong]["dO3x_rep"].hist(ax=ax1,bins=60,range=(-100,100),facecolor="b",legend=leg1,label="Same target") 
df_allrep[cut_loose]["dO3x_rep"].hist(ax=ax1,bins=60,range=(-100,100),color="r",lw=3,histtype="step")

ax1.set_xlabel("$\Delta$ Ozone (DU)")

title = f"Ozone repetability ({TMIN_sel} - {TMAX_sel})"

ax1.set_title(title,fontsize=12)
ax1.text(0.05, 0.95, textstr_o3x, transform=ax1.transAxes, fontsize=14,verticalalignment='top', bbox=props)
#ax1.legend()

figname =f"{pathfigs}/ozone_repeatability_allnights_single"+figtype
plt.savefig(figname)
plt.show()