# PWV01 : Explore hologram data  quality on PWV

- author Sylvie Dagoret-Campagne
- creation date 2025-09-16 : version v1
- last update : 2025-09-18 : show cuts and put them in parameter files
- last update : 2025-09-19 : put input selection in configuration file
- last update : 2025-09-22 : New run_v6":"u/dagoret/auxtel_run_20250921_w_2025_38_spectractorv32_main_gains_holoallfilt_a" with updated PWV00_parameters.py
- last update : 2025-09-23 : Show cut impact , filter by filter and add list of filters in parameter file
- last update : 2025-09-26 : add PWV repeatability in PWV curves
- last update : 2025-09-30 : run_v5 with joined /repo/main + /repo/embargo
- last update : 2025-10-02 : add midnights
- last update 2026-01-12 : work on understanding Selection criteria
- affiliation : IJCLab
- Kernel @usdf **w_2025_36*
- Home emac : base (conda)
- laptop : conda_py313

**Goal** : Show Night variations of PWV wrt date and Time. Fit a straight line.

In [None]:
from platform import python_version
print(python_version())

In [None]:
import warnings
warnings.resetwarnings()
warnings.simplefilter('ignore')

In [None]:
from platform import python_version
print(python_version())

In [None]:
import os

In [None]:
# where are stored the figures
pathfigs = "figs_PWV01"
prefix = "pwv01"
if not os.path.exists(pathfigs):
    os.makedirs(pathfigs) 
figtype = ".png"

In [None]:
import numpy as np
from numpy.linalg import inv
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
%matplotlib inline
import seaborn as sns
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib.colors import LogNorm,SymLogNorm
from matplotlib.patches import Circle,Annulus
from astropy.visualization import ZScaleInterval
props = dict(boxstyle='round', facecolor="white", alpha=0.1)
#props = dict(boxstyle='round')

import matplotlib.colors as colors
import matplotlib.cm as cmx

import matplotlib.ticker                         # here's where the formatter is
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)

from matplotlib.gridspec import GridSpec

from astropy.visualization import (MinMaxInterval, SqrtStretch,ZScaleInterval,PercentileInterval,
                                   ImageNormalize,imshow_norm)
from astropy.visualization.stretch import SinhStretch, LinearStretch,AsinhStretch,LogStretch

from astropy.io import fits
from astropy.wcs import WCS
from astropy import units as u
from astropy import constants as c

from scipy import interpolate
from sklearn.neighbors import NearestNeighbors
from sklearn.neighbors import KDTree, BallTree

import pandas as pd
pd.set_option("display.max_columns", None)
pd.set_option('display.max_rows', 100)

import matplotlib.ticker                         # here's where the formatter is
import os
import re
import pandas as pd
import pickle
from collections import OrderedDict

plt.rcParams["figure.figsize"] = (16,8)
plt.rcParams["axes.labelsize"] = 'xx-large'
plt.rcParams['axes.titlesize'] = 'xx-large'
plt.rcParams['xtick.labelsize']= 'xx-large'
plt.rcParams['ytick.labelsize']= 'xx-large'
plt.rcParams["legend.fontsize"] = "xx-large"

import scipy
from scipy.optimize import curve_fit,least_squares


# new color correction model
import pickle
from scipy.interpolate import RegularGridInterpolator

In [None]:
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)

from astropy.visualization import (MinMaxInterval, SqrtStretch,ZScaleInterval,PercentileInterval,
                                   ImageNormalize,imshow_norm)
from astropy.visualization.stretch import SinhStretch, LinearStretch,AsinhStretch,LogStretch

from astropy.time import Time


In [None]:
# Remove to run faster the notebook
#import ipywidgets as widgets
#%matplotlib widget

In [None]:
from PWV00_parameters import *

In [None]:
DumpConfig()

In [None]:
from importlib.metadata import version

In [None]:
# wavelength bin colors
#jet = plt.get_cmap('jet')
#cNorm = mpl.colors.Normalize(vmin=0, vmax=NSED)
#scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet)
#all_colors = scalarMap.to_rgba(np.arange(NSED), alpha=1)

In [None]:
np.__version__

In [None]:
pd.__version__

### Configuration

In [None]:
def convertNumToDatestr(num):
    year = num//10_000
    month= (num-year*10_000)//100
    day = (num-year*10_000-month*100)

    year_str = str(year).zfill(4)
    month_str = str(month).zfill(2)
    day_str = str(day).zfill(2)
    
    datestr = f"{year_str}-{month_str}-{day_str}"
    return pd.to_datetime(datestr)

In [None]:
PWVMIN = 0.
PWVMAX = 20.

In [None]:
FLAG_WITHCOLLIMATOR = False
DATE_WITHCOLLIMATOR = 20230930
datetime_WITHCOLLIMATOR = convertNumToDatestr(DATE_WITHCOLLIMATOR)
datetime_WITHCOLLIMATOR = pd.to_datetime("2023-09-30 00:00:00.0+0000")
datetime_WITHCOLLIMATOR

## Initialisation

### Read the file
- `atmfilename` is defined in `PW00_parameters.py` 

In [None]:
the_suptitle = butlerusercollectiondict[version_run] 

In [None]:
specdata = np.load(atmfilename,allow_pickle=True)

In [None]:
df_spec = pd.DataFrame(specdata)

In [None]:
#if FLAG_WITHCOLLIMATOR:
#    df_spec = df_spec[df_spec["nightObs"]> DATE_WITHCOLLIMATOR]    

In [None]:
# add time for plotting
#df_spec["Time"] = pd.to_datetime(df_spec["DATE-OBS"])
df_spec["Time"] = pd.to_datetime(df_spec["DATE-OBS"],utc=True)

In [None]:
df_spec["nightObs"] = df_spec.apply(lambda x: x['id']//100_000, axis=1)

In [None]:
df_spec["seq_num"]  = df_spec["id"] % 100_000

In [None]:
df_spec[["id","FILTER"]]

In [None]:
df_spec["FILTER"].unique()

In [None]:
print(list(df_spec.columns))

### Check Filters

In [None]:
# conversion en datetime

plt.figure(figsize=(20,8))
sns.scatterplot(
    data=df_spec, 
    x="Time",       # abscisse en datetime
    y="seq_num",    # ou ra, dec, etc.
    hue="FILTER", 
    palette="tab10",
    s=100,
    edgecolor="black",
    linewidth=0.2
)

plt.title(f"Auxtel Holo observations wrt date and filter type, {tag}")
plt.xlabel("Date of observation")
plt.ylabel("Seq Num")
plt.xticks(rotation=45)  # lisibilité des dates
#plt.legend(loc="upper left",ncol=8)
plt.legend(bbox_to_anchor=(1.01, 1.05),ncols=1)
plt.tight_layout()
plt.show()


In [None]:
plt.figure(figsize=(20,8))
df_spec["FILTER_seq"] = df_spec["FILTER"].astype(str) + "_" + df_spec["seq_num"].astype(str)
sns.stripplot(
    data=df_spec,
    x="Time",
    y="FILTER",
    hue="FILTER",
    palette="Set1",
    size=10,         # taille des points
    jitter=True,    # évite que les points se chevauchent
    alpha=1.0,
    edgecolor="black",
    linewidth=0.1
)
plt.title(f"Auxtel Holo observations wrt date and filter type,  {tag}")
plt.xlabel("Time")
plt.ylabel("Filter")
plt.xticks(rotation=45)
plt.tight_layout()
plt.grid()
plt.show()


## Suppress Blue filters

In [None]:
if FLAG_PWVFILTERS: 
    df_spec = df_spec[df_spec["FILTER"].isin(PWV_FILTER_LIST) ]

In [None]:

# Compter le nombre d’entrées par nightObs et FILTER
counts = df_spec.groupby(["nightObs", "FILTER"]).size().unstack(fill_value=0)

# Plot en barres empilées
counts.plot(kind="bar", stacked=False, figsize=(18,6))

plt.ylabel("Nombre d'entrées")
plt.xlabel("nightObs")
plt.title(f"Nombre d'entrées par FILTER et par nightObs, {tag}")
plt.legend(title="FILTER")
plt.tight_layout()
plt.show()


### Visualize Selection cuts

In [None]:
fig,axs = plt.subplots(1,3,figsize=(18,3))
ax1,ax2,ax3  = axs.flatten()
df_spec.hist("D_CCD [mm]_x",ax=ax1,bins=50,range=(DCCDMINFIG,DCCDMAXFIG),facecolor="b")
ax1.axvline(DCCDMINCUT,ls="-.",c="k")
ax1.axvline(DCCDMAXCUT,ls="-.",c="k")

df_spec.hist("CHI2_FIT",ax=ax2,bins=50,range=(0,500),facecolor="b")
#ax2.set_yscale("log")
ax2.axvline(CHI2CUT,ls="-.",c="k")


df_spec.hist("EXPTIME",ax=ax3,bins=20,facecolor="b")
ax3.axvline(EXPTIMECUT,ls="-.",c="k") 
plt.suptitle(tag)
plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt

filters = df_spec["FILTER"].unique()

fig, axs = plt.subplots(len(filters), 3, figsize=(18, 3*len(filters)))

for i, f in enumerate(filters):
    subdf = df_spec[df_spec["FILTER"] == f]  # sélectionne uniquement ce filtre
    
    ax1, ax2, ax3 = axs[i] if len(filters) > 1 else axs  # gestion si 1 seul filtre
    
    subdf.hist("D_CCD [mm]_x", ax=ax1, bins=50,
               range=(DCCDMINFIG, DCCDMAXFIG), facecolor="b")
    ax1.axvline(DCCDMINCUT, ls="-.", c="k")
    ax1.axvline(DCCDMAXCUT, ls="-.", c="k")
    ax1.set_title(f"{f} – D_CCD [mm]_x")

    subdf.hist("CHI2_FIT", ax=ax2, bins=50,range=(0,300) ,facecolor="b")
    #ax2.set_yscale("log")
    ax2.axvline(CHI2CUT, ls="-.", c="k")
    ax2.set_title(f"{f} – CHI2_FIT")
   

    subdf.hist("EXPTIME", ax=ax3, bins=20, facecolor="b")
    ax3.axvline(EXPTIMECUT, ls="-.", c="k")
    ax3.set_title(f"{f} – EXPTIME")

plt.suptitle(f" Quality selection cut , {tag}")
plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter

#date_form = DateFormatter("%y-%m-%dT%H:%M")
date_form = DateFormatter("%y-%m-%d")
fig, axs = plt.subplots(2, 1, figsize=(18, 12))
ax1, ax2 = axs

# Scatter avec couleur selon FILTER
sc = ax1.scatter(
    df_spec["Time"],
    df_spec["D_CCD [mm]_x"],
    c=df_spec["FILTER"].astype("category").cat.codes,  # codes numériques
    cmap="Set1",  # palette discrète
    marker="+",lw=5,alpha=0.5
)
ax1.set_ylim(DCCDMINFIG,DCCDMAXFIG)
ax1.axhline(DCCDMINCUT,ls="-.",c="k")
ax1.axhline(DCCDMAXCUT,ls="-.",c="k")

# Légende avec les filtres
handles, labels = sc.legend_elements(prop="colors", alpha=0.6)
ax1.legend(handles, df_spec["FILTER"].unique(), title="FILTER",ncols=8)
#ax1.legend(bbox_to_anchor=(1.01, 1.05),ncols=1,title="FILTER")

plt.setp(ax1.get_xticklabels(), rotation=45, ha="right")
ax1.set_ylabel("D_CCD [mm]_x")
ax1.set_xlabel("time")
ax1.xaxis.set_major_formatter(date_form)
ax1.set_title("DCCD vs time")
ax1.set_ylim(DCCDMINFIG,DCCDMAXFIG)
ax1.axhline(DCCDMINCUT,ls="-.",c="k")
ax1.axhline(DCCDMAXCUT,ls="-.",c="k")

sc = ax2.scatter(
    df_spec["Time"],
    df_spec["CHI2_FIT"],
    c=df_spec["FILTER"].astype("category").cat.codes,  # codes numériques
    cmap="Set1",  # palette discrète
    marker="+",lw=5,alpha=0.5
)
ax2.set_yscale("log")
ax2.axhline(CHI2CUT,ls="-.",c="k")

handles, labels = sc.legend_elements(prop="colors", alpha=0.6)
ax2.legend(handles, df_spec["FILTER"].unique(), title="FILTER",ncols=8)
#x2.legend(bbox_to_anchor=(1.01, 1.05),ncols=1,title="FILTER")

plt.setp(ax2.get_xticklabels(), rotation=45, ha="right")
ax2.set_ylabel("CHI2_FIT")
ax2.set_xlabel("time")
ax2.xaxis.set_major_formatter(date_form)
ax2.set_title("CHI2_FIT")

plt.suptitle(tag)
plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter

date_form = DateFormatter("%y-%m-%d")

filters = df_spec["FILTER"].unique()

fig, axs = plt.subplots(len(filters), 2, figsize=(18, 6*len(filters)))
if len(filters) == 1:
    axs = [axs]  # uniformiser si un seul filtre

for i, f in enumerate(filters):
    subdf = df_spec[df_spec["FILTER"] == f]
    ax1, ax2 = axs[i]

    # --- DCCD vs time ---
    ax1.scatter(
        subdf["Time"],
        subdf["D_CCD [mm]_x"],
        marker="+", lw=5, alpha=0.5, color="b"
    )
    ax1.set_ylim(DCCDMINFIG, DCCDMAXFIG)
    ax1.axhline(DCCDMINCUT, ls="-.", c="k")
    ax1.axhline(DCCDMAXCUT, ls="-.", c="k")
    ax1.set_ylabel("D_CCD [mm]_x")
    ax1.set_xlabel("time")
    ax1.xaxis.set_major_formatter(date_form)
    ax1.set_title(f"{f} – DCCD vs time")
    plt.setp(ax1.get_xticklabels(), rotation=45, ha="right")

    # --- CHI2 vs time ---
    ax2.scatter(
        subdf["Time"],
        subdf["CHI2_FIT"],
        marker="+", lw=5, alpha=0.5, color="b"
    )
    ax2.set_yscale("log")
    ax2.axhline(CHI2CUT, ls="-.", c="k")
    ax2.set_ylabel("CHI2_FIT")
    ax2.set_xlabel("time")
    ax2.xaxis.set_major_formatter(date_form)
    ax2.set_title(f"{f} – CHI2_FIT")
    plt.setp(ax2.get_xticklabels(), rotation=45, ha="right")

plt.suptitle(tag)
plt.tight_layout()
plt.show()


In [None]:
fig, ax = plt.subplots(figsize=(10,6))

for filt, group in df_spec.groupby("FILTER"):
    ax.scatter(
        group["AIRMASS"],
        group["CHI2_FIT"],
        label=filt,
        alpha=0.6
    )

ax.set_xlabel("Airmass")
ax.set_ylabel("CHI2_FIT")
ax.set_yscale("log")   # utile si chi2 très dispersé
ax.legend(title="FILTER")
ax.grid(True)
plt.tight_layout()
plt.show()

### Atmospheric parameters distribution before selection

In [None]:
fig, axs = plt.subplots(2, 2, figsize=(16,10))
ax1,ax2,ax3,ax4 = axs.flatten()
    
df_spec.hist("PWV [mm]_x",ax=ax1,bins=50,facecolor="b")
df_spec.hist("PWV [mm]_y",ax=ax2,bins=50,facecolor="b")
df_spec.hist("ozone [db]_x",ax=ax3,bins=50,facecolor="r")
df_spec.hist("ozone [db]_y",ax=ax4,bins=50,facecolor="r")
plt.suptitle(f"Atmospheric parameters before selection, {tag}")
plt.tight_layout()
plt.show()

### Target used

In [None]:
df_spec["TARGET"].unique()

In [None]:
plt.figure(figsize=(20,12))
df_spec["TARGET_seq"] = df_spec["TARGET"].astype(str) + "_" + df_spec["seq_num"].astype(str)
sns.stripplot(
    data=df_spec,
    x="Time",
    y="TARGET",
    hue="TARGET",
    palette="Set1",
    size=10,         # taille des points
    jitter=True,    # évite que les points se chevauchent
    alpha=1.0,
    edgecolor="black",
    linewidth=0.1
)
plt.title(f"Auxtel Holo observations wrt date and target {tag}")
plt.xlabel("Time")
plt.ylabel("Target")
plt.xticks(rotation=45)
plt.tight_layout()
plt.grid()
plt.show()

### Is the star Faint or bright

In [None]:
def IsFaint(row):
    List_Of_Faint_targets = ['Feige110','HD074000','HD115169','HD031128','HD200654','HD167060','HD009051','HD142331','HD160617','HD111980']
    List_Of_faint_selected = List_Of_Faint_targets[:4]
    if row["TARGET"] in List_Of_faint_selected:
        return True
    else:
        return False

In [None]:
df_spec["isFaint"] = df_spec.apply(IsFaint,axis=1)

### Collimator

In [None]:
if FLAG_WITHCOLLIMATOR:
    df_spec = df_spec[df_spec["nightObs"]> DATE_WITHCOLLIMATOR ]

## Apply or not correction on errors related to PWV repeatability

In [None]:
# Take into account Photometric Repeatability
if FLAG_CORRECTFOR_PWV_REPEAT:
    if FLAG_CORRECTFOR_PWV_REPEAT_RATIO:
        df_spec["PWV [mm]_err_x"] =  df_spec["PWV [mm]_err_x"] * FACTORERR_PWV_REPEAT
    else:
        df_spec["PWV [mm]_err_x"] =  np.sqrt(df_spec["PWV [mm]_err_x"]**2  +   SIGMA_PWV_REPEAT**2)
    

### Series on spec

In [None]:
ser_spec_size = df_spec.groupby(["nightObs"]).size()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(6,5))
ax.hist(ser_spec_size.values,bins=50,facecolor="b")
ax.set_title(f"nb obs per night {tag}")
ax.set_xlabel("Nobs/night")
plt.show()

##### Make 3 series

In [None]:
ser_CHI2_FIT = df_spec[["CHI2_FIT","nightObs"]].groupby(["nightObs"]).agg(['count','min', 'max','mean','std','median'])
ser_PWV = df_spec[["PWV [mm]_x","nightObs"]].groupby(["nightObs"]).agg(['count','min', 'max','mean','std','median'])
ser_PWV_CHI2_FIT = df_spec[["PWV [mm]_x","CHI2_FIT","nightObs"]].groupby(["nightObs"]).agg(['count','min', 'max','mean','std','median'])

### Plot PWV and Chi2 from series before any selection

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,3))
ser_PWV.unstack()["PWV [mm]_x"]["count"].plot(kind='bar', ax=ax,subplots=False, rot=90,figsize=(18,4),facecolor="b",grid=True,title=f"Number of measurements per night, {tag}")
plt.tight_layout()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,3))
ser_PWV.unstack()["PWV [mm]_x"]["mean"].plot(kind='bar',ax=ax ,subplots=False, rot=90,figsize=(18,4),facecolor='b',grid=True,title=f"Mean PWV per night , {tag}")
plt.tight_layout()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,3))
ser_PWV.unstack()["PWV [mm]_x"]["median"].plot(kind='bar',ax=ax ,subplots=False, rot=90,figsize=(18,4),facecolor='b',grid=True,title=f"Median PWV per night ,{tag}")
plt.tight_layout()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,3))
ser_PWV.unstack()["PWV [mm]_x"]["std"].plot(kind='bar', subplots=False, rot=90,figsize=(18,4),facecolor='b',grid=True,title=f"STD variation for PWV per night , {tag}")
plt.tight_layout()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,3))
ser_CHI2_FIT.unstack()["CHI2_FIT"]["count"].plot(kind='bar', ax=ax,subplots=False, rot=90,figsize=(18,4),facecolor="r",grid=True,title=f"Number of measurements per night, {tag}")
plt.tight_layout()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,3))
ser_CHI2_FIT.unstack()["CHI2_FIT"]["mean"].plot(kind='bar',ax=ax ,subplots=False, rot=90,figsize=(18,4),facecolor='r',grid=True,title=f"Mean CHI2 per night, {tag}")
plt.tight_layout()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,3))
ser_CHI2_FIT.unstack()["CHI2_FIT"]["median"].plot(kind='bar',ax=ax ,subplots=False, rot=90,figsize=(18,4),facecolor='r',grid=True,title=f"Median CHI2 per night , {tag}")
plt.tight_layout()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,3))
ser_CHI2_FIT.unstack()["CHI2_FIT"]["std"].plot(kind='bar',ax=ax ,subplots=False, rot=90,figsize=(18,4),facecolor='r',grid=True,title=f"STD variation CHI2 per night , {tag}")
plt.tight_layout()

#### Add aggregate data added to pandas dataframe

In [None]:
def FillAgreggates(row):
    the_nightObs = row["nightObs"]
    df_night = ser_PWV_CHI2_FIT.loc[the_nightObs,:].unstack()
    count = df_night.loc["PWV [mm]_x","count"]
    pwvmin = df_night.loc["PWV [mm]_x","min"]
    pwvmax = df_night.loc["PWV [mm]_x","max"]
    pwvmean = df_night.loc["PWV [mm]_x","mean"]
    pwvmedian = df_night.loc["PWV [mm]_x","median"]
    pwvstd = df_night.loc["PWV [mm]_x","std"]
    chi2min = df_night.loc["CHI2_FIT","min"]
    chi2max = df_night.loc["CHI2_FIT","max"]
    chi2mean = df_night.loc["CHI2_FIT","mean"]
    chi2median = df_night.loc["CHI2_FIT","median"]
    chi2std = df_night.loc["CHI2_FIT","std"]
    
    d = {"_count":count,"_pwvmin":pwvmin,"_pwvmax":pwvmax,"_pwvmean":pwvmean,"_pwvmedian":pwvmedian,"_pwvstd":pwvstd,
        "_chi2min":chi2min,"_chi2max":chi2max,"_chi2mean":chi2mean,"_chi2median":chi2median,"_chi2std":chi2std}
    
    ser = pd.Series(data=d)

    return ser

In [None]:
row = df_spec.iloc[0]

In [None]:
row

In [None]:
FillAgreggates(row)

#### join dataframe + aggregates

In [None]:
df_spec = df_spec.join(df_spec.apply(FillAgreggates,axis=1,result_type="expand"))

## Apply Quality selection

In [None]:
DT = pd.Timedelta(minutes=7*24*60)
TMIN  = df_spec["Time"].min()-DT
TMAX  = df_spec["Time"].max()+DT

## Compute night boundaries

In [None]:
def GetNightBoundariesDict(df_spec):
    """
    input:
      df_spec the dataframe for spectroscopy summary results
    output:
      the dict of night boudaries
    """
    
    Dt = pd.Timedelta(minutes=30)
    d = {}
    list_of_nightobs = df_spec["nightObs"].unique()
    for nightobs in list_of_nightobs:
        sel_flag = df_spec["nightObs"]== nightobs
        df_night = df_spec[sel_flag]
        tmin = df_night["Time"].min()-Dt
        tmax = df_night["Time"].max()+Dt
        d[nightobs] = (tmin,tmax)
    return d

In [None]:
dn = GetNightBoundariesDict(df_spec)

## Compute night midnights

In [None]:
def GetNightMidnightsDict(df_spec):
    """
    input:
      df_spec the dataframe for spectroscopy summary results
    output:
      the dict of midnights
    """
    
    Dt = pd.Timedelta(minutes=30)
    d = {}
    list_of_nightobs = df_spec["nightObs"].unique()
    for nightobs in list_of_nightobs:
        #sel_flag = df_spec["nightObs"]== nightobs
        #df_night = df_spec[sel_flag]
        #tmin = df_night["Time"].min()-Dt
        #tmax = df_night["Time"].max()+Dt
        nightstr = datetime.strptime(str(nightobs), "%Y%m%d")
        midnight = get_astronomical_midnight(site_lsst, nightstr.date())
        d[nightobs] = midnight
        
    return d

In [None]:
dnidnights = GetNightMidnightsDict(df_spec)

## Plot all data

In [None]:
list_all_filts = df_spec["FILTER"].unique()
list_all_filts= sorted(list_all_filts )
colors = {filt: col for filt, col in zip(list_all_filts, ["r","b"])}

In [None]:
colors = {
    'empty': "blue", 
    'FELH0600': "purple",
    'OG550_65mm_1':"red",
}

In [None]:
date_form = DateFormatter("%y-%m-%d")

fig,axs = plt.subplots(1,1,figsize=(18,8))
ax  = axs
leg=ax.get_legend()

#df_spec.plot(x="Time",y="PWV [mm]_x",ax=ax,marker='+',c="r",lw=0.0,grid=True,label=tag,legend=leg)

for filt, group in df_spec.groupby("FILTER"):
    ax.errorbar(
                    group["Time"],
                    group["PWV [mm]_x"],
                    yerr= group["PWV [mm]_err_x"],
                    fmt="o",
                    label=filt,
                    color=colors[filt],
                    ecolor="k",
                    capsize=1,
                    markersize=5
                    )


ax.set_ylabel("PWV [mm]_x")

ax.set_xlabel("Time")
ax.xaxis.set_major_formatter(date_form)
ax.set_title(f"Precipitable water vapor measured by holo vs time (before selection cut) , {tag}")
ax.tick_params(axis="x", rotation=45)
ax.legend(loc="upper right")
ax.set_ylim(PWVMIN,PWVMAX)

for key, tt in dn.items():
    ax.axvspan(tt[0],tt[1], color='blue', alpha=0.05)

for key, midn in dnidnights.items():
    ax.axvline( midn ,color="purple",ls=":",alpha=0.5)


ax.axvspan(TMIN,datetime_WITHCOLLIMATOR, color='yellow', alpha=0.1)

plt.tight_layout()

figname =f"{pathfigs}/{prefix}_pwv_allpoints_allnights_noqualcuts"+figtype
plt.savefig(figname)
plt.show()


## Apply Quality selection cuts

In [None]:
cut = getSelectionCut(df_spec)

In [None]:
df_spec_sel = df_spec[cut].drop(labels=['_count', '_pwvmin', '_pwvmax', '_pwvmean', '_pwvmedian', '_pwvstd','_chi2min', '_chi2max', '_chi2mean', '_chi2median', '_chi2std'],axis=1)

In [None]:
df_spec_sel.reset_index(drop=True,inplace=True)

### Atmospheric parameters **after** selection

In [None]:
fig, axs = plt.subplots(2, 2, figsize=(16,10))
ax1,ax2,ax3,ax4 = axs.flatten()
    
df_spec_sel.hist("PWV [mm]_x",ax=ax1,bins=50,facecolor="b")
df_spec_sel.hist("PWV [mm]_y",ax=ax2,bins=50,facecolor="b")
df_spec_sel.hist("ozone [db]_x",ax=ax3,bins=50,facecolor="r")
df_spec_sel.hist("ozone [db]_y",ax=ax4,bins=50,facecolor="r")
plt.suptitle(f"Atmospheric parameters after selection, {tag}")
plt.tight_layout()
plt.show()

## Compute per-night aggregates

### Compute series per night

In [None]:
ser_PWV_CHI2_FIT_sel = df_spec_sel[["PWV [mm]_x","CHI2_FIT","nightObs"]].groupby(["nightObs"]).agg(['count','min', 'max','mean','std','median'])

In [None]:
def FillAgreggatesSel(row):
    the_nightObs = row["nightObs"]
    df_night = ser_PWV_CHI2_FIT_sel.loc[the_nightObs,:].unstack()
    count = df_night.loc["PWV [mm]_x","count"]
    pwvmin = df_night.loc["PWV [mm]_x","min"]
    pwvmax = df_night.loc["PWV [mm]_x","max"]
    pwvmean = df_night.loc["PWV [mm]_x","mean"]
    pwvmedian = df_night.loc["PWV [mm]_x","median"]
    pwvstd = df_night.loc["PWV [mm]_x","std"]
    chi2min = df_night.loc["CHI2_FIT","min"]
    chi2max = df_night.loc["CHI2_FIT","max"]
    chi2mean = df_night.loc["CHI2_FIT","mean"]
    chi2median = df_night.loc["CHI2_FIT","median"]
    chi2std = df_night.loc["CHI2_FIT","std"]
    
    d = {"_count":count,"_pwvmin":pwvmin,"_pwvmax":pwvmax,"_pwvmean":pwvmean,"_pwvmedian":pwvmedian,"_pwvstd":pwvstd,
        "_chi2min":chi2min,"_chi2max":chi2max,"_chi2mean":chi2mean,"_chi2median":chi2median,"_chi2std":chi2std}
    
    ser = pd.Series(data=d)

    return ser
    

In [None]:
df_spec_sel = df_spec_sel.join(df_spec_sel.apply(FillAgreggatesSel,axis=1,result_type="expand"))

## Recompute night boundaries

In [None]:
dn = GetNightBoundariesDict(df_spec_sel)

## Plot all data

In [None]:
list_all_filts = df_spec_sel["FILTER"].unique()
list_all_filts= sorted(list_all_filts )
colors = {filt: col for filt, col in zip(list_all_filts, ["r","b"])}
colors = {
    'empty': "blue", 
    'FELH0600': "purple",
    'OG550_65mm_1':"red",
}

In [None]:
from matplotlib.dates import DateFormatter
#date_form = DateFormatter("%y-%m-%dT%H:%M")
date_form = DateFormatter("%y-%m-%d")


fig,axs = plt.subplots(1,1,figsize=(18,8))
ax  = axs
leg=ax.get_legend()

#df_spec.plot(x="Time",y="PWV [mm]_x",ax=ax,marker='+',c="r",lw=0.0,grid=True,label=tag,legend=leg)

for filt, group in df_spec_sel.groupby("FILTER"):
    ax.errorbar(
                    group["Time"],
                    group["PWV [mm]_x"],
                    yerr= group["PWV [mm]_err_x"],
                    fmt="o",
                    label=filt,
                    color=colors[filt],
                    ecolor="k",
                    capsize=1,
                    markersize=5
                    )


ax.set_ylabel("PWV [mm]_x")

ax.set_xlabel("time")
ax.xaxis.set_major_formatter(date_form)
ax.set_title(f"Precipitable water vapor measured by holo vs time (after selection cut) , {tag}")
ax.tick_params(axis="x", rotation=45)
ax.legend(loc="upper right")
ax.grid()

for key, tt in dn.items():
    ax.axvspan(tt[0],tt[1], color='blue', alpha=0.05)

for key, midn in dnidnights.items():
    ax.axvline( midn ,color="purple",ls=":",alpha=0.5)

ax.set_ylim(PWVMIN,PWVMAX)

ax.axvspan(TMIN,datetime_WITHCOLLIMATOR, color='yellow', alpha=0.1)

plt.tight_layout()
figname =f"{pathfigs}/{prefix}_pwv_allpoints_allnights_withqualcuts"+figtype
plt.savefig(figname)
plt.ylim(0.,20.)
plt.show()

## Plot series on selected data

In [None]:
ser_CHI2_FIT_sel = df_spec_sel[["CHI2_FIT","nightObs"]].groupby(["nightObs"]).agg(['count','min', 'max','mean','std','median'])
ser_PWV_sel = df_spec_sel[["PWV [mm]_x","nightObs"]].groupby(["nightObs"]).agg(['count','min', 'max','mean','std','median'])

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,3))
ser_PWV_sel.unstack()["PWV [mm]_x"]["count"].plot(kind='bar', ax=ax,subplots=False, rot=90,figsize=(18,4),facecolor="b",grid=True,title=f"Number of measurements per night after selection {tag}")
plt.tight_layout()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,3))
ser_PWV_sel.unstack()["PWV [mm]_x"]["mean"].plot(kind='bar',ax=ax ,subplots=False, rot=90,figsize=(18,4),facecolor='b',grid=True,title=f"Mean PWV per night after selection {tag}")
plt.tight_layout()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,3))
ser_PWV_sel.unstack()["PWV [mm]_x"]["median"].plot(kind='bar',ax=ax ,subplots=False, rot=90,figsize=(18,4),facecolor='b',grid=True,title=f"Median PWV per night after selection {tag}")
plt.tight_layout()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,3))
ser_PWV_sel.unstack()["PWV [mm]_x"]["std"].plot(kind='bar', subplots=False, rot=90,figsize=(18,4),facecolor='b',grid=True,title=f"STD variation for PWV per night after selection {tag}")
plt.tight_layout()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,3))
ser_CHI2_FIT_sel.unstack()["CHI2_FIT"]["mean"].plot(kind='bar',ax=ax ,subplots=False, rot=90,figsize=(18,4),facecolor='r',grid=True,title=f"Mean CHI2 per night after selection {tag}")
plt.tight_layout()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,3))
ser_CHI2_FIT_sel.unstack()["CHI2_FIT"]["median"].plot(kind='bar',ax=ax ,subplots=False, rot=90,figsize=(18,4),facecolor='r',grid=True,title=f"Median CHI2 per night after selection {tag}")
plt.tight_layout()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,3))
ser_CHI2_FIT_sel.unstack()["CHI2_FIT"]["std"].plot(kind='bar',ax=ax ,subplots=False, rot=90,figsize=(18,4),facecolor='r',grid=True,title=f"STD variation CHI2 per night after selection {tag}")
plt.tight_layout()

## Show time variation each night

In [None]:
all_selected_nights = df_spec_sel["nightObs"].unique()

In [None]:
def funclineres(params, x, y, yerr):
    # Return residual = fit-observed
    return (y-params[0] -params[1]*x)/yerr
def funcline(params,x):
    return params[0] + params[1]*x

In [None]:
def MakeLineFit(df_night_pwv_curve):
    """
    """

    x = df_night_pwv_curve["dt"].values
    y = df_night_pwv_curve["PWV [mm]_x"].values
    yerr = df_night_pwv_curve["PWV [mm]_err_x"].values
    n = len(y)
        
    #popt, pcov = optimize.curve_fit(f, x, y, [1,-4])
    fit_res = least_squares(funclineres,[5.,0],args = (x,y,yerr))
    popt = fit_res.x 
    npar = len(popt)
    J = fit_res.jac
    cov = np.linalg.inv(J.T.dot(J))
    chi2dof = ((funclineres(popt,x,y,yerr))**2).sum()/(n-npar)
    cov *= chi2dof
    perr = np.sqrt(np.diagonal(cov)) 
    
    xfit = np.linspace(x.min()*0.99,x.max()*1.05)
    yfit = funcline(popt,xfit)
        
    slope = popt[1]
    slope_err = perr[1]

    return x,y,yerr,n,chi2dof,xfit,yfit,slope,slope_err

In [None]:
all_selected_nights

### Plot night by night

In [None]:
all_dateObs_sel = {}
    
# loop on nights
for night in all_selected_nights:
    #select the night
    df_spec_night = df_spec_sel[df_spec_sel["nightObs"] == night]
    
    #select the variables
    df_night_pwv_curve = df_spec_night[["Time","PWV [mm]_x","PWV [mm]_err_x","FILTER","AIRMASS"]]

    tmin = df_night_pwv_curve["Time"].min()

    # convert dt in hours
    df_night_pwv_curve["dt"] = (df_night_pwv_curve["Time"] - tmin).dt.total_seconds()/3600.
    
    # extract statistics on pwv
    stat = df_night_pwv_curve[["PWV [mm]_x"]].describe()
    date_form = DateFormatter("%y-%m-%dT%H:%M")
    count = int(stat.loc["count"].values[0])
    mean = stat.loc["mean"].values[0]
    median = stat.loc["50%"].values[0]
    std = stat.loc["std"].values[0]
    textstr = "\n".join((f"count : {count}",
                     f"mean : {mean:.1f} mm",
                     f"median : {median:.1f} mm",
                     f"std : {std:.1f} mm",
                    ))

    df_night_pwv_curve_empty =  df_night_pwv_curve[df_night_pwv_curve.FILTER=="empty"]

    nightstr = datetime.strptime(str(night), "%Y%m%d")
    dt_midnight = get_astronomical_midnight(site_lsst, nightstr.date())
    x_midnight = mdates.date2num(dt_midnight)
   
   
    print(f"{night} :: {dt_midnight}",type(dt_midnight),x_midnight)
    #N= len(df_night_pwv_curve)
    N= len(df_night_pwv_curve_empty)

    if N>10:
        # does the fit
        try:
            x,y,yerr,n,chi2dof,xfit,yfit,slope,slope_err = MakeLineFit(df_night_pwv_curve_empty)
            
        
            textstr2 = "\n".join((f"d(PWV/dt) : ",
                     f"slope : {slope:.3f} mm/h",
                     f"slope err : {slope_err:.3f} mm/h",
                     f"chi2/ndeg : {chi2dof:.2f}",       
                    ))

            textstr3 = "\n".join((f"d(PWV/dt) : ",
                     f"slope : {slope:.3f} mm/h",
                     f"slope err : {slope_err:.3f} mm/h",  
                    ))
    

            stat.loc["slope","PWV [mm]_x"] = slope 
            stat.loc["slope_err","PWV [mm]_x"] = slope_err 
            stat.loc["chi2","PWV [mm]_x"] = chi2dof
        
            # plot
            fig = plt.figure(figsize=(18,8))
            #gs = GridSpec(1, 2,  width_ratios=[2,1],figure=fig)
            gs = GridSpec(1, 1, figure=fig)

            ax = fig.add_subplot(gs[0])
            ax_bis = ax.twinx()  # crée un axe y secondaire partageant le même x
            ax_bis.invert_yaxis()
            #ax2 = fig.add_subplot(gs[1],sharey=ax)
        
            leg=ax.get_legend()
            #leg2=ax2.get_legend()
      
        
            # left figure
            #df_night_pwv_curve.plot(x="Time",y="PWV [mm]_x",ax=ax,marker='+',c="r",lw=0.0,grid=True,legend=leg,label=tag,ms=10)
            #ax.errorbar(x=df_night_pwv_curve.Time, y=df_night_pwv_curve["PWV [mm]_x"],yerr=df_night_pwv_curve["PWV [mm]_err_x"],fmt=".",color="r",ecolor="k")

            df = df_night_pwv_curve    
            list_all_filts = df["FILTER"].unique()
            list_all_filts= sorted(list_all_filts )

            
            
            # Couleurs distinctes pour chaque filtre
            colors = {filt: col for filt, col in zip(list_all_filts, plt.cm.tab10.colors)}
            colors = {filt: col for filt, col in zip(list_all_filts, ["r","b"])}
            colors = {'empty': "blue", 'FELH0600': "purple",'OG550_65mm_1':"red"}

            
            for filt, group in df.groupby("FILTER"):
                ax.errorbar(
                    group["Time"],
                    group["PWV [mm]_x"],
                    yerr=group["PWV [mm]_err_x"],
                    fmt="o",
                    label=filt,
                    color=colors[filt],
                    ecolor="k",
                    capsize=5,
                    markersize=10
                    )
                ax_bis.scatter(group["Time"],group["AIRMASS"], color=colors[filt],marker="+",label="airmass")

         
            ax_bis.set_ylim(2.5,0.5)
            ax_bis.set_ylabel("airmass")
            ax.axvline(x=x_midnight, color='r', linestyle='--', linewidth=2)
            ax.legend(loc="upper right")
            ax.grid()
            ax.set_ylabel("PWV [mm]_x")
            ax.set_xlabel("time")
            ax.xaxis.set_major_formatter(date_form)
            ax.xaxis_date()
            ax.set_ylim(PWVMIN,PWVMAX)
            ax.text(0.03, 0.95, textstr, transform=ax.transAxes, fontsize=14,
            verticalalignment='top', bbox=props)
            ax.text(0.35, 0.95, textstr3, transform=ax.transAxes, fontsize=14,
            verticalalignment='top', bbox=props)
            ax.set_title(f"night {night}")
            ax.tick_params(axis="x", labelrotation=45)

           
            # right figure
            if 0:
            #ax2.errorbar(x=df_night_pwv_curve.dt, y=df_night_pwv_curve["PWV [mm]_x"],yerr=df_night_pwv_curve["PWV [mm]_err_x"],fmt="+",color="r",ecolor="k",ms=10,label=tag)
                for filt, group in df.groupby("FILTER"):
                    ax2.errorbar(
                        group["dt"],
                        group["PWV [mm]_x"],
                        yerr=ERRORBARFACTOR*group["PWV [mm]_err_x"],
                        fmt="o",
                        label=filt,
                        color=colors[filt],
                        ecolor="k",
                        capsize=5,
                        markersize=10
                        )
                ax2.set_xlabel("hours")
                ax2.set_ylim(Ymin,Ymax)
                ax2.grid()
                #ax2.legend(loc="upper right")
                ax2.plot(xfit,yfit,"k-")
                ax2.text(0.45, 0.95, textstr2, transform=ax2.transAxes, fontsize=14,
                verticalalignment='top', bbox=props)
                ax2.set_title(f"night {night}")

        
            plt.tight_layout()
            plt.suptitle(tag)
            figname =f"{pathfigs}/{prefix}_per_night_{night}"+figtype
            plt.savefig(figname)
            plt.show()

        except Exception as inst:
            print(type(inst))    # the exception type
            print(inst.args)     # arguments stored in .args
            print(inst)  

            
            stat.loc["slope","PWV [mm]_x"] = 0.
            stat.loc["slope_err","PWV [mm]_x"] = 0.
            stat.loc["chi2","PWV [mm]_x"] = -1.

        # add statistics
        all_dateObs_sel[night] = stat
        

## Make a summary of Night quality

In [None]:

def GetStatistics(all_dateObs_sel):
    df = pd.DataFrame(columns = ["count","mean","median","std","slope","slope_err"])
    
    for nightObs, stat in all_dateObs_sel.items():
        count = int(stat.loc["count"].values[0])
        mean = stat.loc["mean"].values[0]
        median = stat.loc["50%"].values[0]
        std = stat.loc["std"].values[0]
        slope = stat.loc["slope"].values[0]
        slope_err = stat.loc["slope_err"].values[0]
        df.loc[nightObs] = [count,mean,median,std,slope,slope_err]
    return df

In [None]:
df = GetStatistics(all_dateObs_sel)

In [None]:
def ComputeDataTimeStr(num):
    year = num//10000
    remain = (num-year*10000)
    month = remain//100
    day = remain-100*month
    yearstr = f"{year}"
    monthstr = f"{month}"
    daystr= f"{day}"
    stry = yearstr.zfill(4)
    strm = monthstr.zfill(2)
    strd = daystr.zfill(2)
    
    str = f"{stry}-{strm}-{strd}"
    return str

### Convert dateobs into datetime

In [None]:
df["Time"] = pd.to_datetime([ ComputeDataTimeStr(num) for  num in df.index])

### Plot summary

In [None]:
from matplotlib.dates import DateFormatter
date_form = DateFormatter("%y-%m-%d")
fig,axs = plt.subplots(2,1,figsize=(18,10),sharex=True)
ax1,ax2  = axs


leg1=ax1.get_legend()
leg2=ax2.get_legend()
        
    
#df.plot(x="Time",y="median",ax=ax1,marker='+',c="r",lw=0.0,grid=True,ms=10,label=tag,legend=leg1)
df.plot(x="Time",y="median",ax=ax1,marker='+',c="r",lw=0.0,grid=True,ms=10)
ax1.errorbar(x=df["Time"], y=df["median"],yerr=df["std"],fmt=".",color="r",ecolor="k")

ax1.set_ylabel("PWV (mm)")
ax1.set_xlabel("time")
ax1.xaxis.set_major_formatter(date_form)
ax1.set_title(f"Median Precipitable water vapor per night {tag}")
ax1.set_ylim(PWVMIN,PWVMAX)
ax1.legend(loc="upper right")

ax2.set_title("Fitted drift per night")
#ax2.errorbar(x=df["Time"], y=df["slope"],yerr=df["slope_err"],fmt=".",color="r",ecolor="k",ms=10,label=tag,legend=leg2)
ax2.errorbar(x=df["Time"], y=df["slope"],yerr=df["slope_err"],fmt=".",color="r",ecolor="k",ms=10)
ax2.grid()
ax2.set_ylabel("dPWV/dt (mm/h)")
ax2.set_xlabel("time")
ax2.set_ylim(-1.,1.)
ax2.xaxis.set_major_formatter(date_form)
ax2.legend(loc="upper right")

ax2.tick_params(axis='x', labelrotation=45)

plt.tight_layout()
figname =f"{pathfigs}/{prefix}_pwv_medianspredslope_allnights"+figtype
plt.savefig(figname)
plt.show()

In [None]:
from matplotlib.dates import DateFormatter
date_form = DateFormatter("%y-%m-%d")
fig,axs = plt.subplots(1,1,figsize=(18,8))
ax = axs
leg=ax.get_legend()

#df.plot(x="Time",y="median",ax=ax,marker='+',c="r",ms=20,lw=0.0,grid=True,label=tag,legend=leg)
df.plot(x="Time",y="median",ax=ax,marker='+',c="r",ms=20,lw=0.0,grid=True)
ax.errorbar(x=df["Time"], y=df["median"],yerr=df["std"],fmt="o",ms=5,color="r",ecolor="k")
ax.set_ylabel("PWV (mm)")
ax.set_xlabel("time")
ax.xaxis.set_major_formatter(date_form)
ax.set_title(f"Median and spread of Precipitable water vapor per night {tag}")
ax.set_ylim(PWVMIN,PWVMAX)
ax.legend(loc="upper right")

ax.axvspan(TMIN,datetime_WITHCOLLIMATOR, color='yellow', alpha=0.1)

figname =f"{pathfigs}/{prefix}_pwv_medians_allnights"+figtype
plt.savefig(figname)
plt.show()