# GAIN01 : Explore hologram data  quality on Gain

- author Sylvie Dagoret-Campagne
- creation date 2025-09-29 : anomaie with gains
- affiliation : IJCLab
- Kernel @usdf **w_2025_36*
- Home emac : base (conda)
- laptop : conda_py313

**Goal** : Show Night variations of PWV wrt date and Time. Fit a straight line.

In [None]:
from platform import python_version
print(python_version())

In [None]:
import warnings
warnings.resetwarnings()
warnings.simplefilter('ignore')

In [None]:
from platform import python_version
print(python_version())

In [None]:
import os

In [None]:
# where are stored the figures
pathfigs = "figs_GAIN01"
prefix = "gain01"
if not os.path.exists(pathfigs):
    os.makedirs(pathfigs) 
figtype = ".png"

In [None]:
import numpy as np
from numpy.linalg import inv
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
%matplotlib inline
import seaborn as sns
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib.colors import LogNorm,SymLogNorm
from matplotlib.patches import Circle,Annulus
from astropy.visualization import ZScaleInterval
props = dict(boxstyle='round', facecolor="white", alpha=0.1)
#props = dict(boxstyle='round')

import matplotlib.colors as colors
import matplotlib.cm as cmx

import matplotlib.ticker                         # here's where the formatter is
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)

from matplotlib.gridspec import GridSpec

from astropy.visualization import (MinMaxInterval, SqrtStretch,ZScaleInterval,PercentileInterval,
                                   ImageNormalize,imshow_norm)
from astropy.visualization.stretch import SinhStretch, LinearStretch,AsinhStretch,LogStretch

from astropy.io import fits
from astropy.wcs import WCS
from astropy import units as u
from astropy import constants as c

from scipy import interpolate
from sklearn.neighbors import NearestNeighbors
from sklearn.neighbors import KDTree, BallTree

import pandas as pd
pd.set_option("display.max_columns", None)
pd.set_option('display.max_rows', 100)

import matplotlib.ticker                         # here's where the formatter is
import os
import re
import pandas as pd
import pickle
from collections import OrderedDict

plt.rcParams["figure.figsize"] = (16,8)
plt.rcParams["axes.labelsize"] = 'xx-large'
plt.rcParams['axes.titlesize'] = 'xx-large'
plt.rcParams['xtick.labelsize']= 'xx-large'
plt.rcParams['ytick.labelsize']= 'xx-large'
plt.rcParams["legend.fontsize"] = "xx-large"

import scipy
from scipy.optimize import curve_fit,least_squares


# new color correction model
import pickle
from scipy.interpolate import RegularGridInterpolator

In [None]:
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)

from astropy.visualization import (MinMaxInterval, SqrtStretch,ZScaleInterval,PercentileInterval,
                                   ImageNormalize,imshow_norm)
from astropy.visualization.stretch import SinhStretch, LinearStretch,AsinhStretch,LogStretch

from astropy.time import Time


In [None]:
# Remove to run faster the notebook
#import ipywidgets as widgets
#%matplotlib widget

In [None]:
from GAIN00_parameters import *

In [None]:
DumpConfig()

In [None]:
from importlib.metadata import version

In [None]:
# wavelength bin colors
#jet = plt.get_cmap('jet')
#cNorm = mpl.colors.Normalize(vmin=0, vmax=NSED)
#scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet)
#all_colors = scalarMap.to_rgba(np.arange(NSED), alpha=1)

In [None]:
np.__version__

In [None]:
pd.__version__

### Configuration

In [None]:
def convertNumToDatestr(num):
    year = num//10_000
    month= (num-year*10_000)//100
    day = (num-year*10_000-month*100)

    year_str = str(year).zfill(4)
    month_str = str(month).zfill(2)
    day_str = str(day).zfill(2)
    
    datestr = f"{year_str}-{month_str}-{day_str}"
    return pd.to_datetime(datestr)

In [None]:
FLAG_WITHCOLLIMATOR = False
DATE_WITHCOLLIMATOR = 20230930
datetime_WITHCOLLIMATOR = convertNumToDatestr(DATE_WITHCOLLIMATOR)
datetime_WITHCOLLIMATOR = pd.to_datetime("2023-09-30 00:00:00.0+0000")
datetime_WITHCOLLIMATOR

## Initialisation

### Read the file
- `atmfilename` is defined in `PW00_parameters.py` 

In [None]:
specdata = np.load(atmfilename,allow_pickle=True)

In [None]:
df_spec = pd.DataFrame(specdata)

In [None]:
#if FLAG_WITHCOLLIMATOR:
#    df_spec = df_spec[df_spec["nightObs"]> DATE_WITHCOLLIMATOR]    

In [None]:
# add time for plotting
df_spec["Time"] = pd.to_datetime(df_spec["DATE-OBS"])

In [None]:
df_spec["nightObs"] = df_spec.apply(lambda x: x['id']//100_000, axis=1)

In [None]:
df_spec["seq_num"]  = df_spec["id"] % 100_000

In [None]:
df_spec[["id","FILTER"]]

In [None]:
df_spec["FILTER"].unique()

In [None]:
print(list(df_spec.columns))

### Check Filters

In [None]:
# conversion en datetime

plt.figure(figsize=(20,8))
sns.scatterplot(
    data=df_spec, 
    x="Time",       # abscisse en datetime
    y="seq_num",    # ou ra, dec, etc.
    hue="FILTER", 
    palette="tab10",
    s=100,
    edgecolor="black",
    linewidth=0.2
)

plt.title(f"Auxtel Holo observations wrt date and filter type, {tag}")
plt.xlabel("Date of observation")
plt.ylabel("Seq Num")
plt.xticks(rotation=45)  # lisibilité des dates
#plt.legend(loc="upper left",ncol=8)
plt.legend(bbox_to_anchor=(1.01, 1.05),ncols=1)
plt.tight_layout()
plt.show()


In [None]:
plt.figure(figsize=(20,8))
df_spec["FILTER_seq"] = df_spec["FILTER"].astype(str) + "_" + df_spec["seq_num"].astype(str)
sns.stripplot(
    data=df_spec,
    x="Time",
    y="FILTER",
    hue="FILTER",
    palette="Set1",
    size=10,         # taille des points
    jitter=True,    # évite que les points se chevauchent
    alpha=1.0,
    edgecolor="black",
    linewidth=0.1
)
plt.title(f"Auxtel Holo observations wrt date and filter type,  {tag}")
plt.xlabel("Time")
plt.ylabel("Filter")
plt.xticks(rotation=45)
plt.tight_layout()
plt.grid()
plt.show()


### Visualize Selection cuts

In [None]:
fig,axs = plt.subplots(1,3,figsize=(18,3))
ax1,ax2,ax3  = axs.flatten()
df_spec.hist("D_CCD [mm]_x",ax=ax1,bins=50,range=(DCCDMINFIG,DCCDMAXFIG),facecolor="b")
ax1.axvline(DCCDMINCUT,ls="-.",c="k")
ax1.axvline(DCCDMAXCUT,ls="-.",c="k")

df_spec.hist("CHI2_FIT",ax=ax2,bins=50,range=(0,500),facecolor="b")
#ax2.set_yscale("log")
ax2.axvline(CHI2CUT,ls="-.",c="k")


df_spec.hist("EXPTIME",ax=ax3,bins=20,facecolor="b")
ax3.axvline(EXPTIMECUT,ls="-.",c="k") 
plt.suptitle(tag)
plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt

filters = df_spec["FILTER"].unique()

fig, axs = plt.subplots(len(filters), 3, figsize=(18, 3*len(filters)))

for i, f in enumerate(filters):
    subdf = df_spec[df_spec["FILTER"] == f]  # sélectionne uniquement ce filtre
    
    ax1, ax2, ax3 = axs[i] if len(filters) > 1 else axs  # gestion si 1 seul filtre
    
    subdf.hist("D_CCD [mm]_x", ax=ax1, bins=50,
               range=(DCCDMINFIG, DCCDMAXFIG), facecolor="b")
    ax1.axvline(DCCDMINCUT, ls="-.", c="k")
    ax1.axvline(DCCDMAXCUT, ls="-.", c="k")
    ax1.set_title(f"{f} – D_CCD [mm]_x")

    subdf.hist("CHI2_FIT", ax=ax2, bins=50,range=(0,300) ,facecolor="b")
    #ax2.set_yscale("log")
    ax2.axvline(CHI2CUT, ls="-.", c="k")
    ax2.set_title(f"{f} – CHI2_FIT")
   

    subdf.hist("EXPTIME", ax=ax3, bins=20, facecolor="b")
    ax3.axvline(EXPTIMECUT, ls="-.", c="k")
    ax3.set_title(f"{f} – EXPTIME")

plt.suptitle(f" Quality selection cut , {tag}")
plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter

#date_form = DateFormatter("%y-%m-%dT%H:%M")
date_form = DateFormatter("%y-%m-%d")
fig, axs = plt.subplots(2, 1, figsize=(18, 12))
ax1, ax2 = axs

# Scatter avec couleur selon FILTER
sc = ax1.scatter(
    df_spec["Time"],
    df_spec["D_CCD [mm]_x"],
    c=df_spec["FILTER"].astype("category").cat.codes,  # codes numériques
    cmap="Set1",  # palette discrète
    marker="+",lw=5,alpha=0.5
)
ax1.set_ylim(DCCDMINFIG,DCCDMAXFIG)
ax1.axhline(DCCDMINCUT,ls="-.",c="k")
ax1.axhline(DCCDMAXCUT,ls="-.",c="k")

# Légende avec les filtres
handles, labels = sc.legend_elements(prop="colors", alpha=0.6)
ax1.legend(handles, df_spec["FILTER"].unique(), title="FILTER",ncols=8)
#ax1.legend(bbox_to_anchor=(1.01, 1.05),ncols=1,title="FILTER")

plt.setp(ax1.get_xticklabels(), rotation=45, ha="right")
ax1.set_ylabel("D_CCD [mm]_x")
ax1.set_xlabel("time")
ax1.xaxis.set_major_formatter(date_form)
ax1.set_title("DCCD vs time")
ax1.set_ylim(DCCDMINFIG,DCCDMAXFIG)
ax1.axhline(DCCDMINCUT,ls="-.",c="k")
ax1.axhline(DCCDMAXCUT,ls="-.",c="k")

sc = ax2.scatter(
    df_spec["Time"],
    df_spec["CHI2_FIT"],
    c=df_spec["FILTER"].astype("category").cat.codes,  # codes numériques
    cmap="Set1",  # palette discrète
    marker="+",lw=5,alpha=0.5
)
ax2.set_yscale("log")
ax2.axhline(CHI2CUT,ls="-.",c="k")

handles, labels = sc.legend_elements(prop="colors", alpha=0.6)
ax2.legend(handles, df_spec["FILTER"].unique(), title="FILTER",ncols=8)
#x2.legend(bbox_to_anchor=(1.01, 1.05),ncols=1,title="FILTER")

plt.setp(ax2.get_xticklabels(), rotation=45, ha="right")
ax2.set_ylabel("CHI2_FIT")
ax2.set_xlabel("time")
ax2.xaxis.set_major_formatter(date_form)
ax2.set_title("CHI2_FIT")

plt.suptitle(tag)
plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter

date_form = DateFormatter("%y-%m-%d")

filters = df_spec["FILTER"].unique()

fig, axs = plt.subplots(len(filters), 2, figsize=(18, 6*len(filters)))
if len(filters) == 1:
    axs = [axs]  # uniformiser si un seul filtre

for i, f in enumerate(filters):
    subdf = df_spec[df_spec["FILTER"] == f]
    ax1, ax2 = axs[i]

    # --- DCCD vs time ---
    ax1.scatter(
        subdf["Time"],
        subdf["D_CCD [mm]_x"],
        marker="+", lw=5, alpha=0.5, color="b"
    )
    ax1.set_ylim(DCCDMINFIG, DCCDMAXFIG)
    ax1.axhline(DCCDMINCUT, ls="-.", c="k")
    ax1.axhline(DCCDMAXCUT, ls="-.", c="k")
    ax1.set_ylabel("D_CCD [mm]_x")
    ax1.set_xlabel("time")
    ax1.xaxis.set_major_formatter(date_form)
    ax1.set_title(f"{f} – DCCD vs time")
    plt.setp(ax1.get_xticklabels(), rotation=45, ha="right")

    # --- CHI2 vs time ---
    ax2.scatter(
        subdf["Time"],
        subdf["CHI2_FIT"],
        marker="+", lw=5, alpha=0.5, color="b"
    )
    ax2.set_yscale("log")
    ax2.axhline(CHI2CUT, ls="-.", c="k")
    ax2.set_ylabel("CHI2_FIT")
    ax2.set_xlabel("time")
    ax2.xaxis.set_major_formatter(date_form)
    ax2.set_title(f"{f} – CHI2_FIT")
    plt.setp(ax2.get_xticklabels(), rotation=45, ha="right")

plt.suptitle(tag)
plt.tight_layout()
plt.show()


### Target used

In [None]:
df_spec["TARGET"].unique()

In [None]:
plt.figure(figsize=(20,12))
df_spec["TARGET_seq"] = df_spec["TARGET"].astype(str) + "_" + df_spec["seq_num"].astype(str)
sns.stripplot(
    data=df_spec,
    x="Time",
    y="TARGET",
    hue="TARGET",
    palette="Set1",
    size=10,         # taille des points
    jitter=True,    # évite que les points se chevauchent
    alpha=1.0,
    edgecolor="black",
    linewidth=0.1
)
plt.title(f"Auxtel Holo observations wrt date and target {tag}")
plt.xlabel("Time")
plt.ylabel("Target")
plt.xticks(rotation=45)
plt.tight_layout()
plt.grid()
plt.show()

### Is the star Faint or bright

In [None]:
def IsFaint(row):
    List_Of_Faint_targets = ['Feige110','HD074000','HD115169','HD031128','HD200654','HD167060','HD009051','HD142331','HD160617','HD111980']
    List_Of_faint_selected = List_Of_Faint_targets[:4]
    if row["TARGET"] in List_Of_faint_selected:
        return True
    else:
        return False

In [None]:
df_spec["isFaint"] = df_spec.apply(IsFaint,axis=1)

### Collimator

In [None]:
if FLAG_WITHCOLLIMATOR:
    df_spec = df_spec[df_spec["nightObs"]> DATE_WITHCOLLIMATOR ]

## Apply or not correction on errors related to PWV repeatability

In [None]:
# Take into account Photometric Repeatability
if FLAG_CORRECTFOR_PWV_REPEAT:
    if FLAG_CORRECTFOR_PWV_REPEAT_RATIO:
        df_spec["PWV [mm]_err_x"] =  df_spec["PWV [mm]_err_x"] * FACTORERR_PWV_REPEAT
    else:
        df_spec["PWV [mm]_err_x"] =  np.sqrt(df_spec["PWV [mm]_err_x"]**2  +   SIGMA_PWV_REPEAT**2)
    

### Series on spec

In [None]:
ser_spec_size = df_spec.groupby(["nightObs"]).size()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(6,5))
ax.hist(ser_spec_size.values,bins=50,facecolor="b")
ax.set_title(f"nb obs per night {tag}")
ax.set_xlabel("Nobs/night")
plt.show()

##### Make 3 series

In [None]:
ser_CHI2_FIT = df_spec[["CHI2_FIT","nightObs"]].groupby(["nightObs"]).agg(['count','min', 'max','mean','std','median'])
ser_PWV = df_spec[["PWV [mm]_x","nightObs"]].groupby(["nightObs"]).agg(['count','min', 'max','mean','std','median'])
ser_PWV_CHI2_FIT = df_spec[["PWV [mm]_x","CHI2_FIT","nightObs"]].groupby(["nightObs"]).agg(['count','min', 'max','mean','std','median'])

### Plot PWV and Chi2 from series before any selection

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,3))
ser_PWV.unstack()["PWV [mm]_x"]["count"].plot(kind='bar', ax=ax,subplots=False, rot=90,figsize=(18,4),facecolor="b",grid=True,title=f"Number of measurements per night, {tag}")
plt.tight_layout()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,3))
ser_PWV.unstack()["PWV [mm]_x"]["mean"].plot(kind='bar',ax=ax ,subplots=False, rot=90,figsize=(18,4),facecolor='b',grid=True,title=f"Mean PWV per night , {tag}")
plt.tight_layout()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,3))
ser_PWV.unstack()["PWV [mm]_x"]["median"].plot(kind='bar',ax=ax ,subplots=False, rot=90,figsize=(18,4),facecolor='b',grid=True,title=f"Median PWV per night ,{tag}")
plt.tight_layout()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,3))
ser_PWV.unstack()["PWV [mm]_x"]["std"].plot(kind='bar', subplots=False, rot=90,figsize=(18,4),facecolor='b',grid=True,title=f"STD variation for PWV per night , {tag}")
plt.tight_layout()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,3))
ser_CHI2_FIT.unstack()["CHI2_FIT"]["count"].plot(kind='bar', ax=ax,subplots=False, rot=90,figsize=(18,4),facecolor="r",grid=True,title=f"Number of measurements per night, {tag}")
plt.tight_layout()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,3))
ser_CHI2_FIT.unstack()["CHI2_FIT"]["mean"].plot(kind='bar',ax=ax ,subplots=False, rot=90,figsize=(18,4),facecolor='r',grid=True,title=f"Mean CHI2 per night, {tag}")
plt.tight_layout()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,3))
ser_CHI2_FIT.unstack()["CHI2_FIT"]["median"].plot(kind='bar',ax=ax ,subplots=False, rot=90,figsize=(18,4),facecolor='r',grid=True,title=f"Median CHI2 per night , {tag}")
plt.tight_layout()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,3))
ser_CHI2_FIT.unstack()["CHI2_FIT"]["std"].plot(kind='bar',ax=ax ,subplots=False, rot=90,figsize=(18,4),facecolor='r',grid=True,title=f"STD variation CHI2 per night , {tag}")
plt.tight_layout()

#### Add aggregate data added to pandas dataframe

In [None]:
def FillAgreggates(row):
    the_nightObs = row["nightObs"]
    df_night = ser_PWV_CHI2_FIT.loc[the_nightObs,:].unstack()
    count = df_night.loc["PWV [mm]_x","count"]
    pwvmin = df_night.loc["PWV [mm]_x","min"]
    pwvmax = df_night.loc["PWV [mm]_x","max"]
    pwvmean = df_night.loc["PWV [mm]_x","mean"]
    pwvmedian = df_night.loc["PWV [mm]_x","median"]
    pwvstd = df_night.loc["PWV [mm]_x","std"]
    chi2min = df_night.loc["CHI2_FIT","min"]
    chi2max = df_night.loc["CHI2_FIT","max"]
    chi2mean = df_night.loc["CHI2_FIT","mean"]
    chi2median = df_night.loc["CHI2_FIT","median"]
    chi2std = df_night.loc["CHI2_FIT","std"]
    
    d = {"_count":count,"_pwvmin":pwvmin,"_pwvmax":pwvmax,"_pwvmean":pwvmean,"_pwvmedian":pwvmedian,"_pwvstd":pwvstd,
        "_chi2min":chi2min,"_chi2max":chi2max,"_chi2mean":chi2mean,"_chi2median":chi2median,"_chi2std":chi2std}
    
    ser = pd.Series(data=d)

    return ser

In [None]:
row = df_spec.iloc[0]

In [None]:
row

In [None]:
FillAgreggates(row)

#### join dataframe + aggregates

In [None]:
df_spec = df_spec.join(df_spec.apply(FillAgreggates,axis=1,result_type="expand"))

## Apply Quality selection

In [None]:
DT = pd.Timedelta(minutes=7*24*60)
TMIN  = df_spec["Time"].min()-DT
TMAX  = df_spec["Time"].max()+DT

## Compute night boundaries

In [None]:
def GetNightBoundariesDict(df_spec):
    """
    input:
      df_spec the dataframe for spectroscopy summary results
    output:
      the dict of night boudaries
    """
    
    Dt = pd.Timedelta(minutes=30)
    d = {}
    list_of_nightobs = df_spec["nightObs"].unique()
    for nightobs in list_of_nightobs:
        sel_flag = df_spec["nightObs"]== nightobs
        df_night = df_spec[sel_flag]
        tmin = df_night["Time"].min()-Dt
        tmax = df_night["Time"].max()+Dt
        d[nightobs] = (tmin,tmax)
    return d

In [None]:
dn = GetNightBoundariesDict(df_spec)

## Plot all data

In [None]:
list_all_filts = df_spec["FILTER"].unique()
list_all_filts= sorted(list_all_filts )
colors = {filt: col for filt, col in zip(list_all_filts, ["r","b"])}

In [None]:
colors = {
    'empty': "blue", 
    'FELH0600': "purple",
    'OG550_65mm_1':"red",
    "BG40_65mm_1":"green"
}

In [None]:

date_form = DateFormatter("%y-%m-%d")

fig,axs = plt.subplots(1,1,figsize=(18,8))
ax  = axs
leg=ax.get_legend()

#df_spec.plot(x="Time",y="PWV [mm]_x",ax=ax,marker='+',c="r",lw=0.0,grid=True,label=tag,legend=leg)

for filt, group in df_spec.groupby("FILTER"):
    ax.errorbar(
                    group["Time"],
                    group["A1_x"],
                    yerr= group["A1_err_x"],
                    fmt="o",
                    label=filt,
                    color=colors[filt],
                    ecolor="k",
                    capsize=1,
                    markersize=5
                    )


ax.set_ylabel("A1_x")

ax.set_xlabel("Time")
ax.xaxis.set_major_formatter(date_form)
ax.set_title(f"A1 vs time (before selection cut) , {tag}")
ax.tick_params(axis="x", rotation=45)
ax.legend(loc="upper right")


for key, tt in dn.items():
    ax.axvspan(tt[0],tt[1], color='blue', alpha=0.05)

plt.tight_layout()

figname =f"{pathfigs}/{prefix}_A1x_allpoints_allnights_noqualcuts"+figtype
plt.savefig(figname)
plt.show()