# Compare hologram data quality on PWV between two versions

- author Sylvie Dagoret-Campagne
- creation date 2024-09-25 
- last update : 2024-09-26 

- affiliation : IJCLab
- Kernel @usdf **w_2024_37**
- Office emac : mamba_py311
- Home emac : base (conda)
- laptop : conda_py310


In [None]:
from platform import python_version
print(python_version())

In [None]:
import warnings
warnings.resetwarnings()
warnings.simplefilter('ignore')

In [None]:
from platform import python_version
print(python_version())

In [None]:
import os

In [None]:
# where are stored the figures
pathfigs = "figsPWVComparison"
if not os.path.exists(pathfigs):
    os.makedirs(pathfigs) 
figtype = ".png"

In [None]:
import numpy as np
from numpy.linalg import inv
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib.colors import LogNorm,SymLogNorm
from matplotlib.patches import Circle,Annulus
from astropy.visualization import ZScaleInterval
props = dict(boxstyle='round', facecolor="white", alpha=0.1)
#props = dict(boxstyle='round')

import matplotlib.colors as colors
import matplotlib.cm as cmx

import matplotlib.ticker                         # here's where the formatter is
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)

from matplotlib.gridspec import GridSpec

from astropy.visualization import (MinMaxInterval, SqrtStretch,ZScaleInterval,PercentileInterval,
                                   ImageNormalize,imshow_norm)
from astropy.visualization.stretch import SinhStretch, LinearStretch,AsinhStretch,LogStretch

from astropy.io import fits
from astropy.wcs import WCS
from astropy import units as u
from astropy import constants as c

from scipy import interpolate
from sklearn.neighbors import NearestNeighbors
from sklearn.neighbors import KDTree, BallTree

import pandas as pd
pd.set_option("display.max_columns", None)
pd.set_option('display.max_rows', 100)

import matplotlib.ticker                         # here's where the formatter is
import os
import re
import pandas as pd
import pickle
from collections import OrderedDict

plt.rcParams["figure.figsize"] = (4,3)
plt.rcParams["axes.labelsize"] = 'xx-large'
plt.rcParams['axes.titlesize'] = 'xx-large'
plt.rcParams['xtick.labelsize']= 'xx-large'
plt.rcParams['ytick.labelsize']= 'xx-large'

import scipy
from scipy.optimize import curve_fit,least_squares


# new color correction model
import pickle
from scipy.interpolate import RegularGridInterpolator

In [None]:
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)

from astropy.visualization import (MinMaxInterval, SqrtStretch,ZScaleInterval,PercentileInterval,
                                   ImageNormalize,imshow_norm)
from astropy.visualization.stretch import SinhStretch, LinearStretch,AsinhStretch,LogStretch

from astropy.time import Time


In [None]:
# Remove to run faster the notebook
#import ipywidgets as widgets
#%matplotlib widget

In [None]:
from importlib.metadata import version

In [None]:
# wavelength bin colors
#jet = plt.get_cmap('jet')
#cNorm = mpl.colors.Normalize(vmin=0, vmax=NSED)
#scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet)
#all_colors = scalarMap.to_rgba(np.arange(NSED), alpha=1)

In [None]:
np.__version__

In [None]:
pd.__version__

## Configuration

In [None]:
v_r1 = "v2" # Jeremy's version with old getObsAtmo
v_r2 = "v3" # my version with new version of getObsAtmo

legendtag = {"v1" : "very old version",
            "v2" : "old version",
            "v3" : "new version"}

In [None]:
atmfilenamesdict = {"v1" : "data/spectro/auxtel_atmosphere_202301_v3.1.0_doSensorFlat_rebin2_testWithMaskedEdges_newBoundaries_newPolysRescaled_newFitBounds_adjustA1_lockedOrder2_removeThroughputTails_2.npy",
                    "v2" : "auxtel_atmosphere_202301_v3.1.0_doSensorFlat_rebin2_lockedOrder2_FixA1_FixA2_FitAngstrom_FixA1_FixA2_FitAngstrom_WithGaia_freePressure_newThroughput6_BG40Scaled1.09_PeekFinder.npy",
                    "v3" : "u_dagoret_auxtel_atmosphere_202301_v3.1.0_doSensorFlat_rebin2_lockedOrder2_FixA1_FixA2_FitAngstrom_WithGaia_freePressure_newThroughput6_BG40Scaled1.09_AtmoFitPressureA2_SpecErr_PeekFinder_20240924T161119Z.npy"}

In [None]:
atmfilename1 = atmfilenamesdict[v_r1]
atmfilename2 = atmfilenamesdict[v_r2]
tag1 = legendtag[v_r1] 
tag2 = legendtag[v_r2] 

## Initialisation

### Read files

In [None]:
specdata1 = np.load(atmfilename1,allow_pickle=True)
specdata2 = np.load(atmfilename2,allow_pickle=True)

In [None]:
df_spec1 = pd.DataFrame(specdata1)
df_spec2 = pd.DataFrame(specdata2)

### Compute NightObs

In [None]:
df_spec1["nightObs"] = df_spec1.apply(lambda x: x['id']//100_000, axis=1)
df_spec2["nightObs"] = df_spec2.apply(lambda x: x['id']//100_000, axis=1)

### Add the Time in pd.datetime

In [None]:
df_spec1["Time"] = pd.to_datetime(df_spec1["DATE-OBS"])
df_spec2["Time"] = pd.to_datetime(df_spec2["DATE-OBS"])

## Compute night boundaries

In [None]:
def GetNightBoundariesDict(df_spec):
    """
    input:
      df_spec the dataframe for spectroscopy summary results
    output:
      the dict of night boudaries
    """
    
    Dt = pd.Timedelta(minutes=30)
    d = {}
    list_of_nightobs = df_spec["nightObs"].unique()
    for nightobs in list_of_nightobs:
        sel_flag = df_spec["nightObs"]== nightobs
        df_night = df_spec[sel_flag]
        tmin = df_night["Time"].min()-Dt
        tmax = df_night["Time"].max()+Dt
        d[nightobs] = (tmin,tmax)
    return d

In [None]:
dn1 = GetNightBoundariesDict(df_spec1)
dn2 = GetNightBoundariesDict(df_spec2)

## Plot data

In [None]:
from matplotlib.dates import DateFormatter
date_form = DateFormatter("%y-%m-%dT%H:%M")
fig,axs = plt.subplots(1,1,figsize=(14,6))
ax1  = axs

leg=ax1.get_legend()
df_spec1.plot(x="Time",y="PWV [mm]_x",ax=ax1,marker='x',c="b",lw=0.0,grid=True,label=tag1,legend=leg,ms=10)
df_spec2.plot(x="Time",y="PWV [mm]_x",ax=ax1,marker='+',c="r",lw=0.0,grid=True,label=tag2,legend=leg,ms=10)

ax1.legend(loc="upper right")
ax1.set_ylabel("PWV [mm]_x")
ax1.set_xlabel("time")
ax1.xaxis.set_major_formatter(date_form)
ax1.set_title("Precipitable water vapor meas by holo vs time")

for key, tt in dn1.items():
    ax1.axvspan(tt[0],tt[1], color='blue', alpha=0.1)
for key, tt in dn2.items():
    ax1.axvspan(tt[0],tt[1], color='red', alpha=0.1)

plt.tight_layout()

## Apply Quality selection cuts

In [None]:
def getSelectionCut(df_spec, chi2max=20., pwvmin=0.1, pwvmax = 14.9):
    cut =  (df_spec["CHI2_FIT"]<chi2max) & (df_spec["PWV [mm]_x"] > pwvmin) & (df_spec["PWV [mm]_x"] < pwvmax) 
    return cut

In [None]:
cut1 = getSelectionCut(df_spec1)
cut2 = getSelectionCut(df_spec2)

In [None]:
df_spec_sel1 = df_spec1[cut1]
df_spec_sel2 = df_spec2[cut2]
df_spec_sel1.reset_index(drop=True,inplace=True)
df_spec_sel2.reset_index(drop=True,inplace=True)

## Compute per-night aggregates

### Compute the per-night pandas series of aggregates

In [None]:
ser_PWV_CHI2_FIT_sel1 = df_spec_sel1[["PWV [mm]_x","CHI2_FIT","nightObs"]].groupby(["nightObs"]).agg(['count','min', 'max','mean','std','median'])
ser_PWV_CHI2_FIT_sel2 = df_spec_sel2[["PWV [mm]_x","CHI2_FIT","nightObs"]].groupby(["nightObs"]).agg(['count','min', 'max','mean','std','median'])

## Add the per-night aggregates to each dataframe

In [None]:
def FillAgreggatesSel(row):
    the_nightObs = row["nightObs"]
    df_night = ser_PWV_CHI2_FIT_sel.loc[the_nightObs,:].unstack()
    count = df_night.loc["PWV [mm]_x","count"]
    pwvmin = df_night.loc["PWV [mm]_x","min"]
    pwvmax = df_night.loc["PWV [mm]_x","max"]
    pwvmean = df_night.loc["PWV [mm]_x","mean"]
    pwvmedian = df_night.loc["PWV [mm]_x","median"]
    pwvstd = df_night.loc["PWV [mm]_x","std"]
    chi2min = df_night.loc["CHI2_FIT","min"]
    chi2max = df_night.loc["CHI2_FIT","max"]
    chi2mean = df_night.loc["CHI2_FIT","mean"]
    chi2median = df_night.loc["CHI2_FIT","median"]
    chi2std = df_night.loc["CHI2_FIT","std"]
    
    d = {"_count":count,"_pwvmin":pwvmin,"_pwvmax":pwvmax,"_pwvmean":pwvmean,"_pwvmedian":pwvmedian,"_pwvstd":pwvstd,
        "_chi2min":chi2min,"_chi2max":chi2max,"_chi2mean":chi2mean,"_chi2median":chi2median,"_chi2std":chi2std}
    
    ser = pd.Series(data=d)

    return ser
    

In [None]:
ser_PWV_CHI2_FIT_sel = ser_PWV_CHI2_FIT_sel1
df_spec_sel1 = df_spec_sel1.join(df_spec_sel1.apply(FillAgreggatesSel,axis=1,result_type="expand"))
ser_PWV_CHI2_FIT_sel = ser_PWV_CHI2_FIT_sel2
df_spec_sel2 = df_spec_sel2.join(df_spec_sel2.apply(FillAgreggatesSel,axis=1,result_type="expand"))

## Recompute night boundaries

In [None]:
dn1 = GetNightBoundariesDict(df_spec_sel1)
dn2 = GetNightBoundariesDict(df_spec_sel2)


### Plot all data

In [None]:
from matplotlib.dates import DateFormatter
date_form = DateFormatter("%y-%m-%dT%H:%M")
fig,axs = plt.subplots(1,1,figsize=(14,6))
ax1  = axs
leg=ax1.get_legend()

df_spec_sel1.plot(x="Time",y="PWV [mm]_x",ax=ax1,marker='x',c="b",lw=0.0,grid=True,label=tag1,legend=leg,ms=10)
df_spec_sel2.plot(x="Time",y="PWV [mm]_x",ax=ax1,marker='+',c="r",lw=0.0,grid=True,label=tag2,legend=leg,ms=10)

ax1.set_ylabel("PWV [mm]_x")
ax1.set_xlabel("time")
ax1.xaxis.set_major_formatter(date_form)
ax1.set_title("Precipitable water vapor meas by holo selected vs time")

ax1.legend(loc="upper right")

for key, tt in dn1.items():
    ax1.axvspan(tt[0],tt[1], color='blue', alpha=0.1)
for key, tt in dn2.items():
    ax1.axvspan(tt[0],tt[1], color='red', alpha=0.1)



plt.tight_layout()
figname =f"{pathfigs}/pwv_allpoints_allnights"+figtype
plt.savefig(figname)
plt.show()

## Show time variation each night

In [None]:
all_selected_nights1 = df_spec_sel1["nightObs"].unique()
all_selected_nights2 = df_spec_sel2["nightObs"].unique()


In [None]:
all_selected_nights = np.intersect1d(all_selected_nights1 ,all_selected_nights2,assume_unique=True) 
all_selected_nights_notincommon = np.setdiff1d(all_selected_nights1 ,all_selected_nights2, assume_unique=True)

In [None]:
print(">>>>>> Night not in common : ",all_selected_nights_notincommon)

In [None]:
def funclineres(params, x, y, yerr):
    # Return residual = fit-observed
    return (y-params[0] -params[1]*x)/yerr
def funcline(params,x):
    return params[0] + params[1]*x

In [None]:
def MakeLineFit(df_night_pwv_curve):
    """
    """

    x = df_night_pwv_curve["dt"].values
    y = df_night_pwv_curve["PWV [mm]_x"].values
    yerr = df_night_pwv_curve["PWV [mm]_err_x"].values
    n = len(y)
        
    #popt, pcov = optimize.curve_fit(f, x, y, [1,-4])
    fit_res = least_squares(funclineres,[5.,0],args = (x,y,yerr))
    popt = fit_res.x 
    npar = len(popt)
    J = fit_res.jac
    cov = np.linalg.inv(J.T.dot(J))
    chi2dof = ((funclineres(popt,x,y,yerr))**2).sum()/(n-npar)
    cov *= chi2dof
    perr = np.sqrt(np.diagonal(cov)) 
    
    xfit = np.linspace(x.min()*0.99,x.max()*1.05)
    yfit = funcline(popt,xfit)
        
    slope = popt[1]
    slope_err = perr[1]

    return x,y,yerr,n,chi2dof,xfit,yfit,slope,slope_err
    

### Plot night by night

In [None]:
#all_dateObs_sel = {}

all_dateObs_sel1 = {}
all_dateObs_sel2 = {}

# loop on nights
for night in all_selected_nights:
    # select the night
    df_spec_night1 = df_spec_sel1[df_spec_sel1["nightObs"] == night]
    df_spec_night2 = df_spec_sel2[df_spec_sel2["nightObs"] == night]

    # select the variables
    df_night_pwv_curve1 = df_spec_night1[["Time","PWV [mm]_x","PWV [mm]_err_x"]]
    df_night_pwv_curve2 = df_spec_night2[["Time","PWV [mm]_x","PWV [mm]_err_x"]]

    tmin1 = df_night_pwv_curve1["Time"].min()
    tmin2 = df_night_pwv_curve2["Time"].min()
    tmin = min(tmin1,tmin2)

    # convert in hours
    df_night_pwv_curve1["dt"] = (df_night_pwv_curve1["Time"] - tmin).dt.total_seconds()/3600.
    df_night_pwv_curve2["dt"] = (df_night_pwv_curve2["Time"] - tmin).dt.total_seconds()/3600.
   
    # extract statistics on pwv
    stat1 = df_night_pwv_curve1[["PWV [mm]_x"]].describe()
    stat2 = df_night_pwv_curve2[["PWV [mm]_x"]].describe()
   

    
    date_form = DateFormatter("%y-%m-%dT%H:%M")
    count = int(stat2.loc["count"].values[0])
    mean = stat2.loc["mean"].values[0]
    median = stat2.loc["50%"].values[0]
    std = stat2.loc["std"].values[0]
    textstr = "\n".join((f"count : {count}",
                     f"mean : {mean:.1f} mm",
                     f"median : {median:.1f} mm",
                     f"std : {std:.1f} mm",
                    ))


    
    N1 = len(df_night_pwv_curve1)
    N2 = len(df_night_pwv_curve2)
 

    if N2>8:
        # does the fit 
        x1,y1,yerr1,n1,chi2dof1,xfit1,yfit1,slope1,slope_err1 = MakeLineFit(df_night_pwv_curve1)
        x2,y2,yerr2,n2,chi2dof2,xfit2,yfit2,slope2,slope_err2 = MakeLineFit(df_night_pwv_curve2)
        
        
        textstr2 = "\n".join((f"d(PWV/dt) : ",
                     f"slope : {slope2:.3f} mm/h ",
                     f"slope err : {slope_err2:.3f} mm/h",
                     f"chi2/ndeg : {chi2dof2:.2f}",       
                    ))

        textstr3 = "\n".join((f"d(PWV/dt) : ",
                     f"slope : {slope2:.3f} mm/h ",
                     f"slope err : {slope_err2:.3f} mm/h",  
                    ))
    

        stat1.loc["slope","PWV [mm]_x"] = slope1 
        stat1.loc["slope_err","PWV [mm]_x"] = slope_err1 
        stat1.loc["chi2","PWV [mm]_x"] = chi2dof1

        stat2.loc["slope","PWV [mm]_x"] = slope2 
        stat2.loc["slope_err","PWV [mm]_x"] = slope_err2 
        stat2.loc["chi2","PWV [mm]_x"] = chi2dof2
        
        # plot

        
        # Create the figure
        fig = plt.figure(figsize=(16,6))
        gs = GridSpec(1, 2,  width_ratios=[2,1],figure=fig)

        ax = fig.add_subplot(gs[0])
        ax2 = fig.add_subplot(gs[1],sharey=ax1)


        leg=ax.get_legend()
        leg2=ax2.get_legend()
        
        
        
        # left figure
        df_night_pwv_curve1.plot(x="Time",y="PWV [mm]_x",ax=ax,marker='x',c="b",lw=0.0,grid=True,legend=leg,label=tag1,ms=10)
        ax.errorbar(x=df_night_pwv_curve1.Time, y=df_night_pwv_curve1["PWV [mm]_x"],yerr=df_night_pwv_curve1["PWV [mm]_err_x"],fmt=".",color="b",ecolor="b")

        df_night_pwv_curve2.plot(x="Time",y="PWV [mm]_x",ax=ax,marker='+',c="r",lw=0.0,grid=True,legend=leg,label=tag2, ms=10)
        ax.errorbar(x=df_night_pwv_curve2.Time, y=df_night_pwv_curve2["PWV [mm]_x"],yerr=df_night_pwv_curve2["PWV [mm]_err_x"],fmt=".",color="r",ecolor="r")


  
        ax.legend(loc="upper right")
        ax.set_ylabel("PWV [mm]_x")
        ax.set_xlabel("time")
        ax.xaxis.set_major_formatter(date_form)
        ax.set_ylim(0.,15.)
        ax.text(0.03, 0.95, textstr, transform=ax.transAxes, fontsize=14,
        verticalalignment='top', bbox=props)
        ax.text(0.35, 0.95, textstr3, transform=ax.transAxes, fontsize=14,
        verticalalignment='top', bbox=props)
        ax.set_title(f"night {night}")


        # right figure
        ax2.errorbar(x=df_night_pwv_curve1.dt, y=df_night_pwv_curve1["PWV [mm]_x"],yerr=df_night_pwv_curve1["PWV [mm]_err_x"],fmt="x",color="b",ecolor="b",ms=10,label=tag1)
        ax2.errorbar(x=df_night_pwv_curve2.dt, y=df_night_pwv_curve2["PWV [mm]_x"],yerr=df_night_pwv_curve2["PWV [mm]_err_x"],fmt="+",color="r",ecolor="r",ms=10,label=tag2)
        
        ax2.set_xlabel("hours")
        ax2.set_ylim(0.,15.)

        
        ax2.grid()
        ax2.plot(xfit1,yfit1,"b-.")
        ax2.plot(xfit2,yfit2,"r-")
        ax2.text(0.1, 0.95, textstr2, transform=ax2.transAxes, fontsize=14,verticalalignment='top', bbox=props)
        ax2.legend(loc="upper right")
        ax2.set_title(f"night {night}")
        
        plt.tight_layout()
        figname =f"{pathfigs}/pwv_per_night_{night}"+figtype
        plt.savefig(figname)
        plt.show()

        # add statistics
        all_dateObs_sel1[night] = stat1
        all_dateObs_sel2[night] = stat2

## Make a summary of Night quality

In [None]:
def GetStatistics(all_dateObs_sel):
    df = pd.DataFrame(columns = ["count","mean","median","std","slope","slope_err"])
    
    for nightObs, stat in all_dateObs_sel.items():
        count = int(stat.loc["count"].values[0])
        mean = stat.loc["mean"].values[0]
        median = stat.loc["50%"].values[0]
        std = stat.loc["std"].values[0]
        slope = stat.loc["slope"].values[0]
        slope_err = stat.loc["slope_err"].values[0]
        df.loc[nightObs] = [count,mean,median,std,slope,slope_err]
    return df

In [None]:
df1 = GetStatistics(all_dateObs_sel1)
df2 = GetStatistics(all_dateObs_sel2)

### Convert dateobs into datetime

In [None]:
def ComputeDataTimeStr(num):
    year = num//10000
    remain = (num-year*10000)
    month = remain//100
    day = remain-100*month
    yearstr = f"{year}"
    monthstr = f"{month}"
    daystr= f"{day}"
    stry = yearstr.zfill(4)
    strm = monthstr.zfill(2)
    strd = daystr.zfill(2)
    
    str = f"{stry}-{strm}-{strd}"
    return str

In [None]:
df1["Time"] = pd.to_datetime([ ComputeDataTimeStr(num) for  num in df1.index])
df2["Time"] = pd.to_datetime([ ComputeDataTimeStr(num) for  num in df2.index])

In [None]:
from matplotlib.dates import DateFormatter
date_form = DateFormatter("%y-%m-%d")
fig,axs = plt.subplots(2,1,figsize=(12,8),sharex=True)
ax1,ax2  = axs

leg1=ax1.get_legend()
leg2=ax2.get_legend()
        

df1.plot(x="Time",y="median",ax=ax1,marker='x',c="b",lw=0.0,grid=True,ms=10,label=tag1,legend=leg1)
ax1.errorbar(x=df1["Time"], y=df1["median"],yerr=df1["std"],fmt=".",color="b",ecolor="b")

df2.plot(x="Time",y="median",ax=ax1,marker='+',c="r",lw=0.0,grid=True,ms=10,label=tag2,legend=leg1)
ax1.errorbar(x=df2["Time"], y=df2["median"],yerr=df2["std"],fmt=".",color="r",ecolor="r")

ax1.set_ylabel("PWV (mm)")
ax1.set_xlabel("time")
ax1.xaxis.set_major_formatter(date_form)
ax1.set_title("Median Precipitable water vapor per night")
ax1.set_ylim(0.,15.)
ax1.legend(loc="upper right")

ax2.set_title("Fitted drift per night")
ax2.errorbar(x=df1["Time"], y=df1["slope"],yerr=df1["slope_err"],fmt="x",color="b",ecolor="b",ms=10,label=tag1,legend=leg2)
ax2.errorbar(x=df2["Time"], y=df2["slope"],yerr=df2["slope_err"],fmt="+",color="r",ecolor="r",ms=10,label=tag2,legend=leg2)
ax2.grid()
ax2.set_ylabel("dPWV/dt (mm/h)")
ax2.set_xlabel("time")
ax2.set_ylim(-1.,1.)
ax2.xaxis.set_major_formatter(date_form)
ax2.legend(loc="upper right")

ax2.tick_params(axis='x', labelrotation=45)

plt.tight_layout()
figname =f"{pathfigs}/pwv_medianspredslope_allnights"+figtype
plt.savefig(figname)
plt.show()

In [None]:
from matplotlib.dates import DateFormatter
date_form = DateFormatter("%y-%m-%d")
fig,axs = plt.subplots(1,1,figsize=(14,6))
ax = axs
leg=ax.get_legend()

df1.plot(x="Time",y="median",ax=ax,marker='x',c="b",ms=10,lw=0.0,grid=True,label=tag1,legend=leg)
ax.errorbar(x=df1["Time"], y=df1["median"],yerr=df1["std"],fmt=".",ms=5,color="b",ecolor="b")


df2.plot(x="Time",y="median",ax=ax,marker='+',c="r",ms=10,lw=0.0,grid=True,label=tag2,legend=leg)
ax.errorbar(x=df2["Time"], y=df2["median"],yerr=df2["std"],fmt="o",ms=5,color="r",ecolor="r")

ax.set_ylabel("PWV (mm)")
ax.set_xlabel("time")
ax.xaxis.set_major_formatter(date_form)
ax.set_title("Median and spread of Precipitable water vapor per night")
ax.set_ylim(0.,15)
ax.legend(loc="upper right")

figname =f"{pathfigs}/pwv_medians_allnights"+figtype
plt.savefig(figname)
plt.show()