# Time correlation on PWV and Discrete Covariance Function Update with Clear Sky Update with clearsky (V1)

- author Sylvie Dagoret-Campagne
- affiliation : IJCLab
- creation date 2025-02-10 :
- last update 2025-02-10
- last update 2025-02-10 : errorbars on X,Y
- last update : 2025-03-12 : Compute my DCT with pwv product.
- last update : 2025-03-13 : Include pair with itself and correct bug with sigmaPWV (don't put residuals)
- Kernel @usdf **w_2025_50*
- Office emac : mamba_py311
- Home emac : base (conda)
- laptop : conda_py310
- pyzdcf : readthedoc , https://pyzdcf.readthedocs.io/en/latest/ and github, https://github.com/LSST-sersag/pyzdcf, availle in pypi : https://pypi.org/project/pyzdcf/
- CANNOT INSTALL PYZDCF AT USDF

**Goal** : Show correlation holo /Merra

In [None]:
from platform import python_version
print(python_version())

In [None]:
import warnings
warnings.resetwarnings()
warnings.simplefilter('ignore')

In [None]:
from platform import python_version
print(python_version())

In [None]:
import os

In [None]:
from pyzdcf import pyzdcf

In [None]:
# where are stored the figures
pathfigs = "figsHoloCorrelationPWVTime-MyDCT"
if not os.path.exists(pathfigs):
    os.makedirs(pathfigs) 
figtype = ".png"

In [None]:
pathdata = "dataHoloCorrelationPWVTime-MyDCT"
if not os.path.exists(pathdata):
    os.makedirs(pathdata) 
datapath_input = os.path.join(pathdata,"timecurves") 
datapath_output = os.path.join(pathdata,"results") 
if not os.path.exists(datapath_input):
    os.makedirs(datapath_input) 
if not os.path.exists(datapath_output):
    os.makedirs(datapath_output) 

In [None]:
import numpy as np
from numpy.linalg import inv
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib.colors import LogNorm,SymLogNorm
from matplotlib.patches import Circle,Annulus
from astropy.visualization import ZScaleInterval
props = dict(boxstyle='round', facecolor="white", alpha=0.1)
#props = dict(boxstyle='round')

import matplotlib.colors as colors
import matplotlib.cm as cmx

import matplotlib.ticker                         # here's where the formatter is
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)

from matplotlib.gridspec import GridSpec

import seaborn as sns

from astropy.visualization import (MinMaxInterval, SqrtStretch,ZScaleInterval,PercentileInterval,
                                   ImageNormalize,imshow_norm)
from astropy.visualization.stretch import SinhStretch, LinearStretch,AsinhStretch,LogStretch

from astropy.io import fits
from astropy.wcs import WCS
from astropy import units as u
from astropy import constants as c

from astropy.coordinates.earth import EarthLocation
from datetime import datetime
from pytz import timezone

from scipy import interpolate
from sklearn.neighbors import NearestNeighbors
from sklearn.neighbors import KDTree, BallTree

import pandas as pd
pd.set_option("display.max_columns", None)
pd.set_option('display.max_rows', 100)

import matplotlib.ticker                         # here's where the formatter is
import os
import re
import pandas as pd
import pickle
from collections import OrderedDict

plt.rcParams["figure.figsize"] = (4,3)
plt.rcParams["axes.labelsize"] = 'xx-large'
plt.rcParams['axes.titlesize'] = 'xx-large'
plt.rcParams['xtick.labelsize']= 'xx-large'
plt.rcParams['ytick.labelsize']= 'xx-large'

import scipy
from scipy.optimize import curve_fit,least_squares


# new color correction model
import pickle
from scipy.interpolate import RegularGridInterpolator

In [None]:
from mpl_toolkits.axes_grid1.inset_locator import inset_axes

In [None]:
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)

from astropy.visualization import (MinMaxInterval, SqrtStretch,ZScaleInterval,PercentileInterval,
                                   ImageNormalize,imshow_norm)
from astropy.visualization.stretch import SinhStretch, LinearStretch,AsinhStretch,LogStretch

from astropy.time import Time


In [None]:
# Remove to run faster the notebook
#import ipywidgets as widgets
#%matplotlib widget

In [None]:
from importlib.metadata import version

In [None]:
# wavelength bin colors
#jet = plt.get_cmap('jet')
#cNorm = mpl.colors.Normalize(vmin=0, vmax=NSED)
#scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet)
#all_colors = scalarMap.to_rgba(np.arange(NSED), alpha=1)

In [None]:
np.__version__

In [None]:
pd.__version__

In [None]:
def convertNumToDatestr(num):
    year = num//10_000
    month= (num-year*10_000)//100
    day = (num-year*10_000-month*100)

    year_str = str(year).zfill(4)
    month_str = str(month).zfill(2)
    day_str = str(day).zfill(2)
    
    datestr = f"{year_str}-{month_str}-{day_str}"
    return pd.to_datetime(datestr)

## Configuration

In [None]:
observing_location = EarthLocation.of_site('Rubin Observatory')
tz = timezone('America/Santiago')

### Spectro Hologram data

In [None]:
FLAG_WITHCOLLIMATOR = False
DATE_WITHCOLLIMATOR = 20230930
datetime_WITHCOLLIMATOR = convertNumToDatestr(DATE_WITHCOLLIMATOR)
datetime_WITHCOLLIMATOR = pd.to_datetime("2023-09-30 00:00:00.0+0000")
datetime_WITHCOLLIMATOR

In [None]:
version_results = "v5"
legendtag = {"v1" : "old v3.1.0",
            "v2" : "v3.1.0-PWV<10mm",
            "v3" : "v3.1.0-PWV<15mm",
            "v4" : "Auxtel holo v3.1.0",
            "v5" : "Auxtel holo v3.1.0 09/22 - 10/24"}

In [None]:
atmfilenamesdict = {"v1" : "data/spectro/auxtel_atmosphere_202301_v3.1.0_doSensorFlat_rebin2_testWithMaskedEdges_newBoundaries_newPolysRescaled_newFitBounds_adjustA1_lockedOrder2_removeThroughputTails_2.npy",
                    "v2" : "auxtel_atmosphere_202301_v3.1.0_doSensorFlat_rebin2_lockedOrder2_FixA1_FixA2_FitAngstrom_FixA1_FixA2_FitAngstrom_WithGaia_freePressure_newThroughput6_BG40Scaled1.09_PeekFinder.npy",
                    "v3" : "u_dagoret_auxtel_atmosphere_202301_v3.1.0_doSensorFlat_rebin2_lockedOrder2_FixA1_FixA2_FitAngstrom_WithGaia_freePressure_newThroughput6_BG40Scaled1.09_AtmoFitPressureA2_SpecErr_PeekFinder_20240924T161119Z.npy",
                    "v4" : "u_dagoret_auxtel_atmosphere_202301_v3.1.0_doSensorFlat_rebin2_lockedOrder2_FixA1_FixA2_FitAngstrom_WithGaia_freePressure_newThroughput6_BG40Scaled1.09_AtmoFitPressureA2_SpecErr_PeekFinder_20240924T161119Z_spectrfullextend.npy",
                    "v5" : "u_dagoret_auxtel_atmosphere_202209_v3.1.0_doSensorFlat_rebin2_lockedOrder2_FixA1_FixA2_FitAngstrom_WithGaia_freePressure_newThroughput6_BG40Scaled1.09_AtmoFitPressureA2_SpecErr_No5SigmaClip_20241016T184601Z_spectrfullextended.npy"}

In [None]:
atmfilename = atmfilenamesdict[version_results]
tag = legendtag[version_results] 

## Initialisation

### Read the file

In [None]:
specdata = np.load(atmfilename,allow_pickle=True)

In [None]:
df_spec = pd.DataFrame(specdata)

In [None]:
#list(df_spec.columns)

### Remove spectra with red filter

In [None]:
df_spec['FILTER'].unique()

In [None]:
FLAG_REMOVE_FILTERS = True
if FLAG_REMOVE_FILTERS:
    df_spec=df_spec[df_spec["FILTER"] == 'empty']
    df_spec.reset_index(inplace=True)  

### Define if a target is faint or bright

In [None]:
def IsFaint(row):
    List_Of_Faint_targets = ['Feige110','HD074000','HD115169','HD031128','HD200654','HD167060','HD009051','HD142331','HD160617','HD111980']
    List_Of_faint_selected = List_Of_Faint_targets[:10]
    if row["TARGET"] in List_Of_faint_selected:
        return True
    else:
        return False

In [None]:
df_spec["isFaint"] = df_spec.apply(IsFaint,axis=1)

### Compute NightObs

In [None]:
df_spec["nightObs"] = df_spec.apply(lambda x: x['id']//100_000, axis=1)

In [None]:
if FLAG_WITHCOLLIMATOR:
    df_spec = df_spec[df_spec["nightObs"]> DATE_WITHCOLLIMATOR]

## Apply Quality selection

In [None]:
fig,ax = plt.subplots(1,1)
df_spec["CHI2_FIT"].hist(bins=50,ax=ax,range=(0,200))
ax.set_yscale("log")

### Add the Time in pd.datetime

#### UTC

In [None]:
df_spec["Time"] = pd.to_datetime(df_spec["DATE-OBS"])

In [None]:
DT = pd.Timedelta(minutes=7*24*60)
TMIN  = df_spec["Time"].min()-DT
TMAX  = df_spec["Time"].max()+DT

### Compute relative time to Mid-night

In [None]:
def GetTimeToMidNight(row):
    observing_time = Time(row['DATE-OBS'], scale='utc', location=observing_location)

    # time at the location , either before or after midnight
    local_time =  observing_time.to_datetime(timezone=tz)

    # take time independent  of any location now
    local_time_new = datetime(local_time.year,local_time.month,local_time.day,local_time.hour,local_time.minute,local_time.second)
    local_time_midnight = datetime(local_time_new.year,local_time_new.month,local_time_new.day)
    dt_hour = (local_time_new -local_time_midnight).seconds/3600.

    # we took the previous night mid-night , must subtract 24H
    if dt_hour > 12.:
        dt_hour_new = (dt_hour - 24.)
    else:
        dt_hour_new = dt_hour
        
    return dt_hour_new

In [None]:
df_spec["dt_midnight"] = df_spec.apply(GetTimeToMidNight,axis=1)

In [None]:
fig,ax = plt.subplots(1,1,figsize=(6,4))
df_spec["dt_midnight"].hist(bins=48,range=(-12,12),ax=ax,facecolor="blue") 
ax.set_xlabel("time relative to midnight (hour)")
ax.set_title("Observation time")

### Compute Date relative to January

In [None]:
def GetDateToMidJanuary(row):
    observing_time = Time(row['DATE-OBS'], scale='utc', location=observing_location)

    # time at the location , either before or after midnight
    local_time =  observing_time.to_datetime(timezone=tz)

    # take time independent  of any location now
    local_time_new = datetime(2024,local_time.month,local_time.day,local_time.hour,local_time.minute,local_time.second)
           
    return pd.to_datetime(local_time_new)

In [None]:
#df_spec["Time_january"] = df_spec.apply(GetDateToMidJanuary,axis=1)

In [None]:
def GetDateToMidJanuaryAndYear(row):
    observing_time = Time(row['DATE-OBS'], scale='utc', location=observing_location)

    # time at the location , either before or after midnight
    local_time =  observing_time.to_datetime(timezone=tz)

    # take time independent  of any location now
    local_time_new = datetime(2024,local_time.month,local_time.day,local_time.hour,local_time.minute,local_time.second)
           
    return pd.to_datetime(local_time_new),local_time.year 

In [None]:
df_spec[["Time_january","Year"]] = df_spec.apply(GetDateToMidJanuaryAndYear,axis=1,result_type="expand")

In [None]:
df_spec[["Time_january","Year"]]

## Compute night boundaries

In [None]:
def GetNightBoundariesDict(df_spec):
    """
    input:
      df_spec the dataframe for spectroscopy summary results
    output:
      the dict of night boudaries
    """
    
    Dt = pd.Timedelta(minutes=30)
    d = {}
    list_of_nightobs = df_spec["nightObs"].unique()
    for nightobs in list_of_nightobs:
        sel_flag = df_spec["nightObs"]== nightobs
        df_night = df_spec[sel_flag]
        tmin = df_night["Time"].min()-Dt
        tmax = df_night["Time"].max()+Dt
        d[nightobs] = (tmin,tmax)
    return d

In [None]:
dn = GetNightBoundariesDict(df_spec)

## Plot all data

In [None]:
from matplotlib.dates import DateFormatter
#date_form = DateFormatter("%y-%m-%dT%H:%M")
date_form = DateFormatter("%y-%m-%d")
fig,axs = plt.subplots(1,1,figsize=(14,6))
ax  = axs
leg=ax.get_legend()

ax.set_xlim(TMIN,TMAX) 
#df_m.plot(x="Time",y="PShP",ax=ax,marker=".",c="b",lw=0.5,label="Merra2",ms=1,legend=leg)   
df_spec.plot(x="Time",y="OUTPRESS",ax=ax,marker='+',c="r",lw=0.0,grid=True,label=tag,legend=leg)
ax.set_ylabel("OUTPRESS")

ax.set_xlabel("time")
ax.xaxis.set_major_formatter(date_form)
ax.set_title("Pressure (before quality cuts)")
ax.legend()

if not FLAG_WITHCOLLIMATOR:
    ax.axvspan(TMIN,datetime_WITHCOLLIMATOR, color='yellow', alpha=0.1)


for key, tt in dn.items():
    ax.axvspan(tt[0],tt[1], color='blue', alpha=0.1)


#ax.set_ylim(0.,15.)

figname =f"{pathfigs}/pressure_allpoints_allnights_nocuts"+figtype
plt.savefig(figname)
plt.show()


In [None]:
from matplotlib.dates import DateFormatter
#date_form = DateFormatter("%y-%m-%dT%H:%M")
date_form = DateFormatter("%y-%m-%d")
fig,axs = plt.subplots(1,1,figsize=(14,6))
ax  = axs
leg=ax.get_legend()

ax.set_xlim(TMIN,TMAX)   
df_spec.plot(x="Time",y="PWV [mm]_x",ax=ax,marker='+',c="r",lw=0.0,grid=True,label=tag,legend=leg)
ax.set_ylabel("PWV [mm]_x")

ax.set_xlabel("time")
ax.xaxis.set_major_formatter(date_form)
ax.set_title("Precipitable water vapor measured by holo vs time (before quality cuts)")
ax.legend()

if not FLAG_WITHCOLLIMATOR:
    ax.axvspan(TMIN,datetime_WITHCOLLIMATOR, color='yellow', alpha=0.1)


for key, tt in dn.items():
    ax.axvspan(tt[0],tt[1], color='blue', alpha=0.1)


ax.set_ylim(0.,15.)

figname =f"{pathfigs}/pwv_allpoints_allnights_nocuts"+figtype
plt.savefig(figname)
plt.show()


In [None]:
fig,axs = plt.subplots(1,2,figsize=(14,4))
ax1,ax2=axs.flatten()
df_spec.hist("VAOD_x",bins=50,ax=ax1)
df_spec.hist("VAOD_y",bins=50,ax=ax2)

## Apply Quality selection cuts

In [None]:
def getSelectionCut(df_spec, chi2max=20., pwvmin=0.1, pwvmax = 14.9,ozmin=100.,ozmax=600.,vaodmax=0.1):
    #cut =  (df_spec["CHI2_FIT"]<chi2max) & (df_spec["PWV [mm]_x"] > pwvmin) & (df_spec["PWV [mm]_x"] < pwvmax) & (df_spec["D2CCD"]>186.5) &  (df_spec["D2CCD"]<187.3) & \
    cut =  (df_spec["CHI2_FIT"]<chi2max) & (df_spec["PWV [mm]_x"] > pwvmin) & (df_spec["PWV [mm]_x"] < pwvmax) & (df_spec["D2CCD"]>186.75) &  (df_spec["D2CCD"]<187.75) &  (df_spec["VAOD_x"]<0.3) & \
    (df_spec['EXPTIME'] > 20.) & (df_spec["PWV [mm]_y"] > pwvmin) & (df_spec["PWV [mm]_y"] < pwvmax) & \
    (df_spec["ozone [db]_y"] > ozmin) & (df_spec["ozone [db]_y"] < ozmax) 
    return cut

In [None]:
def getSelectionCutNoPolar(df_spec, chi2max=20., pwvmin=0.1, pwvmax = 14.9,ozmin=100.,ozmax=600.,vaodmax=0.1):
    #cut =  (df_spec["CHI2_FIT"]<chi2max) & (df_spec["PWV [mm]_x"] > pwvmin) & (df_spec["PWV [mm]_x"] < pwvmax) & (df_spec["D2CCD"]>186.5) &  (df_spec["D2CCD"]<187.3) & \
    cut =  (df_spec["CHI2_FIT"]<chi2max) & (df_spec["PWV [mm]_x"] > pwvmin) & (df_spec["PWV [mm]_x"] < pwvmax) & (df_spec["D2CCD"]>186.75) &  (df_spec["D2CCD"]<187.75) &  (df_spec["VAOD_x"]<0.3) & \
    (df_spec['EXPTIME'] > 20.) & (df_spec["PWV [mm]_y"] > pwvmin) & (df_spec["PWV [mm]_y"] < pwvmax) & \
    (df_spec["ozone [db]_y"] > ozmin) & (df_spec["ozone [db]_y"] < ozmax) & (df_spec["TARGET"] != "HD185975")
    return cut

In [None]:
def getSelectionCutWithPolar(df_spec, chi2max=20., pwvmin=0.1, pwvmax = 14.9,ozmin=100.,ozmax=600.,vaodmax=0.1):
    #cut =  (df_spec["CHI2_FIT"]<chi2max) & (df_spec["PWV [mm]_x"] > pwvmin) & (df_spec["PWV [mm]_x"] < pwvmax) & (df_spec["D2CCD"]>186.5) &  (df_spec["D2CCD"]<187.3) & \
    cut =  (df_spec["CHI2_FIT"]<chi2max) & (df_spec["PWV [mm]_x"] > pwvmin) & (df_spec["PWV [mm]_x"] < pwvmax) & (df_spec["D2CCD"]>186.75) &  (df_spec["D2CCD"]<187.75) &  (df_spec["VAOD_x"]<0.3) & \
    (df_spec['EXPTIME'] > 20.) & (df_spec["PWV [mm]_y"] > pwvmin) & (df_spec["PWV [mm]_y"] < pwvmax) & \
    (df_spec["ozone [db]_y"] > ozmin) & (df_spec["ozone [db]_y"] < ozmax) & (df_spec["TARGET"] == "HD185975")
    return cut

In [None]:
cut = getSelectionCut(df_spec) 
cut_nopolar = getSelectionCutNoPolar(df_spec) 
cut_nopolar_bright = getSelectionCutNoPolar(df_spec) & (~df_spec["isFaint"])
cut_nopolar_faint = getSelectionCutNoPolar(df_spec) & (df_spec["isFaint"])
cut_wthpolar = getSelectionCutWithPolar(df_spec)

In [None]:
df_spec_sel = df_spec[cut]
df_spec_np = df_spec[cut_nopolar] 
df_spec_np_b = df_spec[cut_nopolar_bright]
df_spec_np_f = df_spec[cut_nopolar_faint]
df_spec_wp = df_spec[cut_wthpolar]

In [None]:
print("Total number of Spectra          : ",len(df_spec))
print("Number of selected Spectra       : ",len(df_spec_sel))
print("Number of selected Polars        : ",len(df_spec_wp))
print("Number of selected Non-Polars    : ",len(df_spec_np))
print("Number of selected Non-Polars Bright : ",len(df_spec_np_b))
print("Number of selected Non-Polars Faint  : ",len(df_spec_np_f))

In [None]:
df_spec_sel.reset_index(drop=True,inplace=True)
df_spec_np.reset_index(drop=True,inplace=True)
df_spec_wp.reset_index(drop=True,inplace=True) 
df_spec_np_b.reset_index(drop=True,inplace=True)
df_spec_np_f.reset_index(drop=True,inplace=True)

In [None]:
#List_Of_Faint_targets = ['Feige110','HD074000','HD115169','HD031128','HD200654','HD167060','HD009051','HD142331','HD160617','HD111980']
print("Polar            :",len(df_spec_wp["TARGET"].unique()),"\t", df_spec_wp["TARGET"].unique()) 
print("Non Polar        :",len(df_spec_np["TARGET"].unique()),"\t" ,df_spec_np["TARGET"].unique())
print("Non Polar Bright :",len(df_spec_np_b["TARGET"].unique()),"\t" ,df_spec_np_b["TARGET"].unique())
print("Non Polar Faint  :",len(df_spec_np_f["TARGET"].unique()),"\t",df_spec_np_f["TARGET"].unique())

## Recompute night boundaries

In [None]:
#dn = GetNightBoundariesDict(df_spec_sel)

## Plot all data

In [None]:
from matplotlib.dates import DateFormatter
date_form = DateFormatter("%y-%m-%d")

fig,axs = plt.subplots(1,1,figsize=(14,6),layout='constrained')
ax  = axs
leg=ax.get_legend()

#ax.set_xlim(TMIN,TMAX) 
#df_m.plot(x="Time",y="TQV",ax=ax,marker=".",c="b",lw=0.5,ms=1,label="Merra2",legend=leg)     
    
df_spec_sel[df_spec_sel.Year==2022].plot(x="Time_january",y="PWV [mm]_x",ax=ax,marker='+',c="g",lw=0.0,grid=True,label="2022",legend=leg,ms=5,alpha=1.)
df_spec_sel[df_spec_sel.Year==2023].plot(x="Time_january",y="PWV [mm]_x",ax=ax,marker='+',c="b",lw=0.0,grid=True,label="2023",legend=leg,ms=5,alpha=1.)
df_spec_sel[df_spec_sel.Year==2024].plot(x="Time_january",y="PWV [mm]_x",ax=ax,marker='+',c="r",lw=0.0,grid=True,label="2024",legend=leg,ms=5,alpha=1.)
#df_spec_np_f.plot(x="Time",y="PWV [mm]_x",ax=ax,marker='o',c="grey",lw=0.0,grid=True,label="faint,no polar",legend=leg,ms=5,alpha=0.2)
#df_spec_np_b.plot(x="Time_january",y="PWV [mm]_x",ax=ax,marker='o',c="r",lw=0.0,grid=True,label="bright,no polar",legend=leg,ms=5,alpha=0.2)
#df_spec_np.plot(x="Time",y="PWV [mm]_x",ax=ax,marker='o',c="m",lw=0.0,grid=True,label="no polar",legend=leg,ms=5,alpha=0.5)
#df_spec_wp.plot(x="Time_january",y="PWV [mm]_x",ax=ax,marker='o',c="g",lw=0.0,grid=True,label="with polar",legend=leg,ms=5,alpha=0.5)
ax.set_ylabel("PWV [mm]_x")
ax.set_xlabel("date (since January)")
ax.xaxis.set_major_formatter(date_form)
ax.set_title("Precipitable water vapor measured by holo (modulo 1 year)",fontweight="bold")
ax.legend()

for key, tt in dn.items():
    ax.axvspan(tt[0],tt[1], color='blue', alpha=0.1)

ax.axvspan(TMIN,datetime_WITHCOLLIMATOR, color='yellow', alpha=0.1)

plt.tight_layout()
 
ax.set_ylim(0.,15.)
#ax.set_xlim(TMIN,TMAX)

figname =f"{pathfigs}/pwvholoM2_allpoints_allnights_modulo1year"+figtype
plt.savefig(figname)
plt.show()

In [None]:
df_spec.DEC*u.deg

In [None]:
df_spec.RA

## Plot night by night

In [None]:
def ComputeRepeatability(df):
    """
    Compute Repeatability of PWV for Spectrogram and and Spectrum
    
    """
    N = len(df)
    dfout = pd.DataFrame(index=df.index,columns = ["nightObs","dt","dt_rep","dPWVx_rep","dPWVy_rep","targflag_rep","Npoints"])
    dfout["targflag_rep"].astype(bool)
    #dfout["Npoints"].astype(int)
    
    target_old = "No"
    time_old = 0.
    pwvx_old = 0.
    pwvy_old = 0.
    
    for index in range(N):
        
        nightObs =  df.iloc[index]["nightObs"]
        
        if index ==0:
            dt0 = df.iloc[index]["dt"]
            dfout.iloc[index] = [ nightObs,dt0, 0., 0., 0., False,N]
        else:
            target_new = df.iloc[index]["TARGET"]
            time_new = df.iloc[index]["dt"]
            pwvx_new = df.iloc[index]["PWV [mm]_x"]
            pwvy_new = df.iloc[index]["PWV [mm]_y"]
            
            flag_target = (target_new == target_old)
            dPWVx_rep = pwvx_new - pwvx_old
            dPWVy_rep = pwvy_new - pwvy_old
            dt_rep = (time_new-time_old)*3600. # in seconds
            
            dfout.iloc[index] = [ nightObs,time_new, dt_rep, dPWVx_rep, dPWVy_rep, flag_target,N]
        
        target_old = df.iloc[index]["TARGET"]
        time_old = df.iloc[index]["dt"]
        pwvx_old = df.iloc[index]["PWV [mm]_x"]
        pwvy_old = df.iloc[index]["PWV [mm]_y"]
        
    return dfout

In [None]:
all_selected_nights = df_spec_sel["nightObs"].unique()

## Fits gaussien et lineaires

In [None]:
def funclineres(params, x, y, yerr):
    # Return residual = fit-observed
    return (y-params[0] -params[1]*x)/yerr
def funcline(params,x):
    return params[0] + params[1]*x

In [None]:
def MakeLineFit(df_night_pwv_curve):
    """
    """

    x = df_night_pwv_curve["dt_midnight"].values
    y = df_night_pwv_curve["PWV [mm]_x"].values
    yerr = df_night_pwv_curve["PWV [mm]_err_x"].values
    n = len(y)
        
    #popt, pcov = optimize.curve_fit(f, x, y, [1,-4])
    fit_res = least_squares(funclineres,[5.,0],args = (x,y,yerr))
    popt = fit_res.x 
    npar = len(popt)
    J = fit_res.jac
    cov = np.linalg.inv(J.T.dot(J))
    chi2dof = ((funclineres(popt,x,y,yerr))**2).sum()/(n-npar)
    cov *= chi2dof
    perr = np.sqrt(np.diagonal(cov)) 
    
    xfit = np.linspace(x.min()*0.99,x.max()*1.05)
    yfit = funcline(popt,xfit)
        
    slope = popt[1]
    slope_err = perr[1]

    return x,y,yerr,n,chi2dof,xfit,yfit,slope,slope_err

In [None]:
#
# Gaussian Process regression
# https://scikit-learn.org/1.5/auto_examples/gaussian_process/plot_gpr_noisy_targets.html
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF

In [None]:
def MakeGaussianProcessFit(df_night_pwv_curve):
    """
    """

    x_train = df_night_pwv_curve["dt_midnight"].values
    xmin =  x_train.min()
    xmax = x_train.max()
    xfit = np.linspace(xmin,xmax,50)
    X_fit =xfit.reshape(-1,1)
    X_train = x_train.reshape(-1, 1)
    
    y_train = df_night_pwv_curve["PWV [mm]_x"].values
    y_mean = y_train.mean()
    yerr_train = df_night_pwv_curve["PWV [mm]_err_x"].values
    n = len(y_train)

    noise_std= 0.5
    
    kernel = 1. * RBF(length_scale=5.0, length_scale_bounds=(0.5, 12.))
    gaussian_process = GaussianProcessRegressor(kernel=kernel,alpha=noise_std**2 ,n_restarts_optimizer=9)
    
    gaussian_process.fit(X_train, y_train)

    mean_prediction, std_prediction = gaussian_process.predict(X_fit, return_std=True)
    return xfit, mean_prediction, std_prediction, gaussian_process

In [None]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
# https://scikit-learn.org/1.5/auto_examples/linear_model/plot_ard.html
from sklearn.linear_model import ARDRegression,BayesianRidge

In [None]:
POLY_DEGREE_MAX = 10

In [None]:
def MakeLinearModelFit(df_night_pwv_curve,degree_max = POLY_DEGREE_MAX ):
    """
    https://scikit-learn.org/1.5/auto_examples/linear_model/plot_ard.html
    ARDRegression and BayesianRidge
    """

    x_train = df_night_pwv_curve["dt_midnight"].values
    xmin =  x_train.min()
    xmax = x_train.max()
    xfit = np.linspace(xmin,xmax,50)
    X_fit =xfit.reshape(-1,1)
    X_train = x_train.reshape(-1, 1)
    
    y_train = df_night_pwv_curve["PWV [mm]_x"].values
    y_mean = y_train.mean()
    yerr_train = df_night_pwv_curve["PWV [mm]_err_x"].values
    n = len(y_train)

    ard_poly = make_pipeline(
    PolynomialFeatures(degree=degree_max, include_bias=False),StandardScaler(),ARDRegression(),).fit(X_train, y_train)
    
    brr_poly = make_pipeline(
    PolynomialFeatures(degree=degree_max, include_bias=False),StandardScaler(),BayesianRidge(),).fit(X_train, y_train)

    y_ard, y_ard_std = ard_poly.predict(X_fit, return_std=True)
    y_brr, y_brr_std = brr_poly.predict(X_fit, return_std=True)

    y_ard_pred = ard_poly.predict(X_train,return_std=False)
    resy = y_train - y_ard_pred
    
    return xfit,y_ard, y_ard_std, y_brr, y_brr_std, resy, y_mean

In [None]:
# Container of images that all goes in a pdf
all_figs_to_pdf = []

In [None]:
all_dateObs_sel = {}

# loop on nights
for night in all_selected_nights:
    #select the night
    # Choose the kind of observation
    df_spec_night = df_spec_sel[df_spec_sel["nightObs"] == night]
    df_spec_night_wp = df_spec_wp[df_spec_wp["nightObs"] == night]
    df_spec_night_np = df_spec_np[df_spec_np["nightObs"] == night]
    df_spec_night_np_f = df_spec_np_f[df_spec_np_f["nightObs"] == night]
    df_spec_night_np_b = df_spec_np_b[df_spec_np_b["nightObs"] == night]
    
    #select the variables
    df_night_pwv_curve = df_spec_night[["Time","PWV [mm]_x","PWV [mm]_err_x","PWV [mm]_y","PWV [mm]_err_y","dt_midnight","AIRMASS","TARGET"]]
    df_night_pwv_curve_wp = df_spec_night_wp[["Time","PWV [mm]_x","PWV [mm]_err_x","PWV [mm]_y","PWV [mm]_err_y","dt_midnight","AIRMASS","TARGET"]]
    df_night_pwv_curve_np_b = df_spec_night_np_b[["Time","PWV [mm]_x","PWV [mm]_err_x","PWV [mm]_y","PWV [mm]_err_y","dt_midnight","AIRMASS","TARGET"]]
    df_night_pwv_curve_np_f = df_spec_night_np_f[["Time","PWV [mm]_x","PWV [mm]_err_x","PWV [mm]_y","PWV [mm]_err_y","dt_midnight","AIRMASS","TARGET"]]
    df_night_pwv_curve_np = df_spec_night_np[["Time","PWV [mm]_x","PWV [mm]_err_x","PWV [mm]_y","PWV [mm]_err_y","dt_midnight","AIRMASS","TARGET"]]

    tmin = df_night_pwv_curve["Time"].min()
    tmax = df_night_pwv_curve["Time"].max()

    list_of_targets = df_night_pwv_curve["TARGET"].unique()
    str_list_of_targets = "\n".join(list_of_targets)
     
    # convert in hours
    df_night_pwv_curve["dt"] = (df_night_pwv_curve["Time"] - tmin).dt.total_seconds()/3600.
    df_night_pwv_curve_wp["dt"] = (df_night_pwv_curve_wp["Time"] - tmin).dt.total_seconds()/3600.
    df_night_pwv_curve_np["dt"] = (df_night_pwv_curve_np["Time"] - tmin).dt.total_seconds()/3600.
    df_night_pwv_curve_np_f["dt"] = (df_night_pwv_curve_np_f["Time"] - tmin).dt.total_seconds()/3600.
    df_night_pwv_curve_np_b["dt"] = (df_night_pwv_curve_np_b["Time"] - tmin).dt.total_seconds()/3600.
    
    # extract statistics on pwv
    stat = df_night_pwv_curve[["PWV [mm]_x"]].describe()
    date_form = DateFormatter("%y-%m-%dT%H:%M")
    #date_form = DateFormatter("%y-%m-%dT%H")
    count = int(stat.loc["count"].values[0])
    mean = stat.loc["mean"].values[0]
    median = stat.loc["50%"].values[0]
    std = stat.loc["std"].values[0]
    textstr = "\n".join((f"count : {count}",
                     f"mean : {mean:.1f} mm",
                     f"median : {median:.1f} mm",
                     f"std : {std:.1f} mm",
                    ))


    N= len(df_night_pwv_curve)

    if N>10:

        if 0:
        
            # does the fit
        
            #x,y,yerr,n,chi2dof,xfit,yfit,slope,slope_err = MakeLineFit(df_night_pwv_curve)
            
            xfit, mean_prediction, std_prediction, gp = MakeGaussianProcessFit(df_night_pwv_curve)
            xfit2, y_ard, y_ard_std, y_brr, y_brr_std, resy, y_mean = MakeLinearModelFit(df_night_pwv_curve)
        
       
    
        
            # plot

            #layout="constrained"
            fig = plt.figure(figsize=(16,4),constrained_layout=True)
            gs = GridSpec(1, 2,  width_ratios=[1,1],figure=fig)
    
            ax = fig.add_subplot(gs[0])
            ax2 = fig.add_subplot(gs[1])
        
            leg=ax.get_legend()
            leg2=ax2.get_legend()
      
        
            ################################
            # left figure : Gaussian Process
            ################################
        
            ax.errorbar(x=df_night_pwv_curve.dt_midnight, y=df_night_pwv_curve["PWV [mm]_x"],yerr=df_night_pwv_curve["PWV [mm]_err_x"],fmt="+",color="r",ecolor="k",ms=1)
            df_night_pwv_curve_wp.plot(x="dt_midnight",y="PWV [mm]_x",c="cyan",ax=ax,marker='o',lw=0.0,grid=True,legend=leg2,label="polar",ms=6,alpha=1)
            df_night_pwv_curve_np_b.plot(x="dt_midnight",y="PWV [mm]_x",c="r",ax=ax,marker='o',lw=0.0,grid=True,legend=leg2,label="bright, no polar",ms=6,alpha=1)
            df_night_pwv_curve_np_f.plot(x="dt_midnight",y="PWV [mm]_x",c="g",ax=ax,marker='o',lw=0.0,grid=True,legend=leg2,label="faint, no polar",ms=6,alpha=1)

            # fit result
            ax.plot(xfit, mean_prediction,"b-",lw=3,label="Gaussian Process (RBF)")
            ax.fill_between(xfit,mean_prediction - 1.96 * std_prediction,mean_prediction + 1.96 * std_prediction,alpha=0.3,label=r"95% CL",facecolor='grey')
        
            ax.set_xlabel("hours")
            ax.set_ylim(0.,15.)
            ax.legend(loc="upper right")
        
            ax.set_title(f"GP fit for night {night}")
            ax.axvline(x=0,color="k",lw=2)


            ############################
            # right figure : linear fits
            ############################
            ax2.errorbar(x=df_night_pwv_curve.dt_midnight, y=df_night_pwv_curve["PWV [mm]_x"],yerr=df_night_pwv_curve["PWV [mm]_err_x"],fmt="+",color="r",ecolor="k",ms=1)
            df_night_pwv_curve_wp.plot(x="dt_midnight",y="PWV [mm]_x",c="cyan",ax=ax2,marker='o',lw=0.0,grid=True,legend=leg2,label="polar",ms=6,alpha=1)
            df_night_pwv_curve_np_b.plot(x="dt_midnight",y="PWV [mm]_x",c="r",ax=ax2,marker='o',lw=0.0,grid=True,legend=leg2,label="bright, no polar",ms=6,alpha=1)
            df_night_pwv_curve_np_f.plot(x="dt_midnight",y="PWV [mm]_x",c="g",ax=ax2,marker='o',lw=0.0,grid=True,legend=leg2,label="faint, no polar",ms=6,alpha=1)


            ax2.plot(xfit2, y_ard,"g-",lw=3,label="ARDRegression")
            ax2.fill_between(xfit2, y_ard - 1.96 * y_ard_std,y_ard + 1.96 * y_ard_std,alpha=0.3,label=r"95% CL",facecolor='green')

        

        
            ax2.set_title(f"Linear fit for night {night}")
            ax2.set_xlabel("hours")
            ax2.set_ylim(0.,15.)
            ax2.legend()
            ax2.axvline(x=0,color="k",lw=2)
        
        

        

            figname =f"{pathfigs}/gp_ard_pwv_per_night_{night}"+figtype
            plt.savefig(figname)
            plt.show()

        # add statistics
        all_dateObs_sel[night] = stat
        

## PWV difference and Time difference

In [None]:
def ComputePWVAndTimeDiffence(df):
    """

    For each night return a number of quanitites related to pair differences.
    The pairs are ordered bi increaing time (t2>t1)

    Return 
        all_DPWV = [] # Difference in PWV for the pair (PWV2-PWV1)
        all_DT = []   # Difference  in time for the pair (t2-t1) which is > 0
        all_pwvpwv = []  # Product of (PW1 -meanPWV)(PWV2 - meanPWV) where the meanPWV is taken over the night 
        all_PWVpairs = [] # (Keep the values of PWV1,PWV2)

    """


    all_DPWV = [] # Difference in PWV for the pair (PWV2-PWV1)
    all_DT = []   # Difference  in time for the pair (t2-t1) which is > 0
    all_pwvpwv = []  # Product of (PW1 -meanPWV)(PWV2 - meanPWV) where the meanPWV is taken over the night 
    all_PWVpairs = [] # (Keep the values of PWV1,PWV2)
    
    
    N = len(df)

    meanPWV = df.describe().loc["mean"]["PWV [mm]_x"] 
        
    for row1 in df.iterrows(): 
        t1 = row1[1]["dt"]
        PWV1 = row1[1]["PWV [mm]_x"]
        pwv1 = PWV1 - meanPWV
        for row2 in df.iterrows(): 
            t2 = row2[1]["dt"]
            PWV2 = row2[1]["PWV [mm]_x"]
            pwv2 = PWV2 - meanPWV
            Delta_t = t2-t1
            Delta_pwv = PWV2-PWV1
            pwv1pwv2 = pwv1*pwv2
            if  Delta_t>0:
                all_DPWV.append(Delta_pwv)
                all_DT.append(Delta_t)
                all_pwvpwv.append(pwv1pwv2)
                all_PWVpairs.append([PWV1,PWV2])
    return np.array(all_DT), np.array(all_DPWV), np.array(all_pwvpwv),np.array(all_PWVpairs) 

In [None]:
from astropy.coordinates import angular_separation
from astropy.coordinates import Angle
# https://docs.astropy.org/en/latest/api/astropy.coordinates.angular_separation.html
#(lon1, lat1, lon2, lat2)
u.degree

In [None]:
def ComputePWVAndTimeAndSepDiffence(df):
    """
    For each night return a number of quanitites related to pair differences.
    The pairs are ordered bi increaing time (t2>t1)

    Return 
        all_DPWV = [] # Difference in PWV for the pair (PWV2-PWV1)
        all_DT = []   # Difference  in time for the pair (t2-t1) which is > 0
        all_pwvpwv = []  # Product of (PW1 -meanPWV)(PWV2 - meanPWV) where the meanPWV is taken over the night 
        all_sep = []  # Angular separation between the pairs 
        all_PWVpairs = [] # (Keep the values of PWV1,PWV2)
    """
    all_DPWV = [] # Difference in PWV for the pair (PWV2-PWV1)
    all_DT = []   # Difference  in time for the pair (t2-t1) which is > 0
    all_pwvpwv = []  # Product of (PW1 -meanPWV)(PWV2 - meanPWV) where the meanPWV is taken over the night 
    all_sep = []  # Angular separation btween the pairs 
    all_PWVpairs = [] # (Keep the values of PWV1,PWV2)
    
    N = len(df)

    meanPWV = df.describe().loc["mean"]["PWV [mm]_x"] 

    # loop on first element
    for row1 in df.iterrows(): 
        t1 = row1[1]["dt"]
        PWV1 = row1[1]["PWV [mm]_x"]
        pwv1 = PWV1 - meanPWV
        ra1 = Angle(row1[1]["RA"],u.degree)
        dec1 = Angle(row1[1]["DEC"],u.degree)

        # loop on second element
        for row2 in df.iterrows(): 
            t2 = row2[1]["dt"]
            ra2 = Angle(row2[1]["RA"],u.degree)
            dec2 = Angle(row2[1]["DEC"],u.degree)
            PWV2 = row2[1]["PWV [mm]_x"]
            pwv2 = PWV2 - meanPWV
            Delta_t = t2-t1
            Delta_pwv = PWV2-PWV1
            pwv1pwv2 = pwv1*pwv2
            sep = Angle(angular_separation(ra1.radian,dec1.radian,ra2.radian,dec2.radian),u.radian).degree
            if  Delta_t>= -0.000001:
                all_DPWV.append(Delta_pwv)
                all_DT.append(Delta_t)
                all_pwvpwv.append(pwv1pwv2)
                all_sep.append(sep)
                all_PWVpairs.append([PWV1,PWV2])
    return np.array(all_DT), np.array(all_DPWV), np.array(all_pwvpwv), np.array(all_sep),np.array(all_PWVpairs) 

In [None]:
df_night_pwv_curve

In [None]:
df_night_pwv_curve.describe()

In [None]:
df_night_pwv_curve.describe().loc["mean"]["PWV [mm]_x"] 

In [None]:
sigma_repeatability = 0.26

## Plot the time behaviour

In [None]:
# container of all quantities
all_sigmapwv = []
all_meanpwv = []
all_TimeDifferences = [] # t2-t1
all_PWVDifferences = [] # PWV2-PWV1
all_PWVProducts = [] # pwvi x pwvj
all_nights_forpairs = []
all_AngularSep = []
all_PWVPairs = [] # (pwv1,pwv2)
all_pwvvalues_overallnights = []


# loop on nights
for night in all_selected_nights:
    #select the night
    # Choose the kind of observation
    df_spec_night = df_spec_sel[df_spec_sel["nightObs"] == night]
    df_spec_night_wp = df_spec_wp[df_spec_wp["nightObs"] == night]
    df_spec_night_np = df_spec_np[df_spec_np["nightObs"] == night]
    df_spec_night_np_f = df_spec_np_f[df_spec_np_f["nightObs"] == night]
    df_spec_night_np_b = df_spec_np_b[df_spec_np_b["nightObs"] == night]
    
    #select the variables
    df_night_pwv_curve = df_spec_night[["Time","PWV [mm]_x","PWV [mm]_err_x","PWV [mm]_y","PWV [mm]_err_y","dt_midnight","AIRMASS","TARGET","RA","DEC"]]
    df_night_pwv_curve_wp = df_spec_night_wp[["Time","PWV [mm]_x","PWV [mm]_err_x","PWV [mm]_y","PWV [mm]_err_y","dt_midnight","AIRMASS","TARGET","RA","DEC"]]
    df_night_pwv_curve_np_b = df_spec_night_np_b[["Time","PWV [mm]_x","PWV [mm]_err_x","PWV [mm]_y","PWV [mm]_err_y","dt_midnight","AIRMASS","TARGET","RA","DEC"]]
    df_night_pwv_curve_np_f = df_spec_night_np_f[["Time","PWV [mm]_x","PWV [mm]_err_x","PWV [mm]_y","PWV [mm]_err_y","dt_midnight","AIRMASS","TARGET","RA","DEC"]]
    df_night_pwv_curve_np = df_spec_night_np[["Time","PWV [mm]_x","PWV [mm]_err_x","PWV [mm]_y","PWV [mm]_err_y","dt_midnight","AIRMASS","TARGET","RA","DEC"]]

    tmin = df_night_pwv_curve["Time"].min()
    tmax = df_night_pwv_curve["Time"].max()

    list_of_targets = df_night_pwv_curve["TARGET"].unique()
    str_list_of_targets = "\n".join(list_of_targets)
    str_list_of_targets = "\n".join(["targets:",str_list_of_targets])
     

    # convert in hours
    df_night_pwv_curve["dt"] = (df_night_pwv_curve["Time"] - tmin).dt.total_seconds()/3600.
    df_night_pwv_curve_wp["dt"] = (df_night_pwv_curve_wp["Time"] - tmin).dt.total_seconds()/3600.
    df_night_pwv_curve_np["dt"] = (df_night_pwv_curve_np["Time"] - tmin).dt.total_seconds()/3600.
    df_night_pwv_curve_np_f["dt"] = (df_night_pwv_curve_np_f["Time"] - tmin).dt.total_seconds()/3600.
    df_night_pwv_curve_np_b["dt"] = (df_night_pwv_curve_np_b["Time"] - tmin).dt.total_seconds()/3600.
    
    # extract statistics on pwv
    stat = df_night_pwv_curve[["PWV [mm]_x"]].describe()
    date_form = DateFormatter("%y-%m-%dT%H:%M")
    #date_form = DateFormatter("%y-%m-%dT%H")
    count = int(stat.loc["count"].values[0])
    mean = stat.loc["mean"].values[0]
    median = stat.loc["50%"].values[0]
    std = stat.loc["std"].values[0]
    textstr = "\n".join((f"count : {count}",
                     f"mean : {mean:.1f} mm",
                     f"median : {median:.1f} mm",
                     f"std : {std:.1f} mm",
                    ))


    N= len(df_night_pwv_curve)
    
    count_goodnight =0
    
    if N>10:
        # does the fit
        
        #x,y,yerr,n,chi2dof,xfit,yfit,slope,slope_err = MakeLineFit(df_night_pwv_curve)
        xfit, y_ard, y_ard_std, y_brr, y_brr_std, resy, y_mean = MakeLinearModelFit(df_night_pwv_curve)
        #all_dt,all_dpwv,all_pwvpwv = ComputePWVAndTimeDiffence(df_night_pwv_curve)

        ## Compute statistics for pairs
        all_dt,all_dpwv,all_pwvpwv,all_sep , all_pwv1pwv2 = ComputePWVAndTimeAndSepDiffence(df_night_pwv_curve)


        
        # Sigma of the night : residuals not the sigma of the night
        sigma_of_the_night = np.std(resy)

        


        all_nights_forpairs.append(night)
        
        # Fill t,pwv
        all_TimeDifferences.append(all_dt)
        all_PWVDifferences.append(all_dpwv)
        all_PWVProducts.append(all_pwvpwv)
        all_AngularSep.append(all_sep)
        all_PWVPairs.append(all_pwv1pwv2)

        # To compute later the mean and sigma over all nights
        all_pwvvalues_overallnights.append(df_night_pwv_curve["PWV [mm]_x"].values)
            
        #mean and std of the night
        all_meanpwv.append(y_mean)
        #all_sigmapwv.append(sigma_of_the_night)
        all_sigmapwv.append(df_night_pwv_curve["PWV [mm]_x"].std(ddof=0))

        if  night  == 20231221 :
            df_keep = df_night_pwv_curve
        
        count_goodnight +=1
        
        #textstr2 = "\n".join((f"d(PWV/dt) : ",
        #             f"slope : {slope:.3f} mm/h",
        #             f"slope err : {slope_err:.3f} mm/h",
        #             f"chi2/ndeg : {chi2dof:.2f}",       
        #            ))

        #textstr3 = "\n".join((f"d(PWV/dt) : ",
        #             f"slope : {slope:.3f} mm/h",
        #             f"slope err : {slope_err:.3f} mm/h",  
        #            ))
    

        #stat.loc["slope","PWV [mm]_x"] = slope 
        #stat.loc["slope_err","PWV [mm]_x"] = slope_err 
        #stat.loc["chi2","PWV [mm]_x"] = chi2dof

        ##########
        # plot
        ############
        

        fig = plt.figure(figsize=(18,4),constrained_layout=True)
        gs = GridSpec(1, 3,  width_ratios=[2,1,1],figure=fig)
    
        ax = fig.add_subplot(gs[0])
        ax2 = fig.add_subplot(gs[1])
        ax3 = fig.add_subplot(gs[2])
        
        leg=ax.get_legend()
        leg2=ax2.get_legend()
        leg3=ax3.get_legend()
        
        ax.errorbar(x=df_night_pwv_curve.dt_midnight, y=df_night_pwv_curve["PWV [mm]_x"],yerr=df_night_pwv_curve["PWV [mm]_err_x"],fmt="+",color="r",ecolor="k",label="stat err",ms=1)
        df_night_pwv_curve_wp.plot(x="dt_midnight",y="PWV [mm]_x",c="cyan",ax=ax,marker='o',lw=0.0,grid=True,legend=leg,label="polar",ms=6,alpha=1)
        df_night_pwv_curve_np_b.plot(x="dt_midnight",y="PWV [mm]_x",c="r",ax=ax,marker='o',lw=0.0,grid=True,legend=leg,label="bright, no polar",ms=6,alpha=1)
        df_night_pwv_curve_np_f.plot(x="dt_midnight",y="PWV [mm]_x",c="g",ax=ax,marker='o',lw=0.0,grid=True,legend=leg,label="faint, no polar",ms=6,alpha=1)
        ax.set_xlabel("hours since midnight")
        ax.set_ylim(0.,15.)
        ax.set_ylabel("PWV (mm)")
        ax.legend(loc="upper right")

        ax.plot(xfit, y_ard,"g-",lw=3,label="ARDRegression")
        ax.fill_between(xfit, y_ard - 1.96 * y_ard_std,y_ard + 1.96 * y_ard_std,alpha=0.3,label=r"95% CL",facecolor='green')

        #ax.plot(xfit2, y_brr,"r-",label="BayesianRidge")
        #ax.fill_between(xfit2, y_brr - 1.96 * y_brr_std,y_brr + 1.96 * y_brr_std,alpha=0.5,label=r"95% CL",facecolor='red')
      
       
        ax.text(0.8, 0.5, str_list_of_targets, transform=ax.transAxes, fontsize=12,verticalalignment='top', bbox=props,alpha=0.5)
        ax.set_title(f"night {night}")
        ax.axvline(x=0,color="k",lw=2)


        inset_ax = inset_axes(ax,width="20%", # width = 30% of parent_bbox
                                          height="40%", # height : 1 inch
                                          loc="upper left")
        
        textstr_sigma = f"$\sigma = $" +f"{sigma_of_the_night:.2f} mm"
        inset_ax.hist(resy,bins=30,facecolor="b")
        inset_ax.set_xlabel("residuals (mm)")
        inset_ax.text(0.1, 0.9, textstr_sigma , transform=inset_ax.transAxes, fontsize=8,verticalalignment='top', bbox=props,alpha=1)



        ax2.scatter(all_dt,np.abs(all_dpwv),marker="o",c="b",s=20,alpha=.5)
        ax2.set_xlabel("$\Delta t$ (hours)")
        ax2.set_ylabel("$|\Delta PWV |$ (mm)")
        ax2.grid()
        ax2.set_title("PWV difference and time difference")
        #ax2.axhspan(0, 2*sigma_repeatability, facecolor="grey", alpha=0.8,label="95 CL PWV repeatability")
        ax2.set_xscale("log")
        ax2.legend()

        ax3.scatter(all_dt,all_pwvpwv/sigma_repeatability**2,marker="o",c="r",s=20,alpha=.5)
        ax3.set_xlabel("$\Delta t$ (hours)")
        ax3.set_ylabel("$ dpwv1 \cdot dpwv2/\sigma^2$")
        ax3.grid()
        ax3.set_title("$ dpwv1 \cdot dpwv2/\sigma^2 \; vs\; \delta t$")
        #ax2.axhspan(0, 2*sigma_repeatability, facecolor="grey", alpha=0.8,label="95 CL PWV repeatability")
        ax3.set_xscale("log")
        ax3.legend()


        
        all_figs_to_pdf.append(fig) 
        
        plt.tight_layout()
        figname =f"{pathfigs}/pwvtimecorrelation_per_night_{night}"+figtype
        plt.savefig(figname)
        plt.show()


In [None]:
df_keep["PWV [mm]_x"].sum()

In [None]:
df_keep.describe()

In [None]:
#all_PWVDifferences

In [None]:
#all_AngularSep 

## Make a pandas dataframe with all time and pwv differences

In [None]:
#can use 
#all_PWVPairs = [] # (pwv1,pwv2)
#all_pwvvalues_overallnights = []

In [None]:
all_pwvvalues_overallnights = np.concatenate(all_pwvvalues_overallnights)

In [None]:
meanPWV = all_pwvvalues_overallnights.mean()
sigmaPWV = all_pwvvalues_overallnights.std()
medianPWV = np.median(all_pwvvalues_overallnights)

print(meanPWV,sigmaPWV,medianPWV)

In [None]:
plt.hist(all_pwvvalues_overallnights,bins=50,histtype="step")
plt.axvline(meanPWV,color="r")
plt.axvline(medianPWV,color="g")

#### Generate the dataframe of pairs

In [None]:
# loop on nights
df_pairs = pd.DataFrame(columns = ["idxnight","idxpair","nightobs","meanpwv","sigmapwv","dt","dPwv","PwvixPwvj","sep","PWV1","PWV2","pwv1","pwv2","pwv1xpwv2"])
ientry = 0

for idxnight, night in enumerate(all_nights_forpairs):
    the_sigmapwv = all_sigmapwv[idxnight]
    the_meanpwv = all_meanpwv[idxnight]
    the_TimeDifferences = all_TimeDifferences[idxnight]
    the_PWVDifferences = all_PWVDifferences[idxnight]
    the_PWVProducts = all_PWVProducts[idxnight]
    the_Separations = all_AngularSep[idxnight] 
    the_PWV1PWV2  =  all_PWVPairs[idxnight]

    Npairs = len(the_TimeDifferences)

    # loop on pairs 
    for idxpair in range(Npairs):
        PWV1 = the_PWV1PWV2[idxpair][0]
        PWV2 = the_PWV1PWV2[idxpair][1]
        pwv1 = PWV1 - meanPWV
        pwv2 = PWV2 - meanPWV
        df_pairs.loc[ientry] = [idxnight,idxpair, night,the_meanpwv , the_sigmapwv, the_TimeDifferences[idxpair], the_PWVDifferences[idxpair], the_PWVProducts[idxpair], the_Separations[idxpair],PWV1,PWV2,pwv1,pwv2, pwv1*pwv2  ]
        ientry+=1

In [None]:
df_pairs["abs_dPwv"] = df_pairs["dPwv"].abs()

In [None]:
df_pairs = df_pairs.astype({"idxnight":"int","idxpair":"int","nightobs":"int"})

In [None]:
df_pairs

### Handle the pandas dataframe 

In [None]:
def print_groups(gdf): 
    count = 0
    all_groups = []
    for name, g in gdf: 
        print(name)
        print('\n'+ str(name)) 
        print(g)
        count +=1
        if count >=2:
            break
        all_groups.append(g)
    return all_groups

In [None]:
gdf = df_pairs.groupby(by="idxnight")

In [None]:
#for g in gdf:
#    print(g)
#    break;

In [None]:
all_gdf = print_groups(gdf)

In [None]:
all_gdf[0]

### Ckeck Visually the pandas dataframe for pairs is done correctly

In [None]:
fig,axs = plt.subplots(5,1,figsize=(14,8),layout="constrained")

ax = axs.flatten()

df_pairs.plot.scatter(x="idxnight", y="meanpwv",ax=ax[0])
ax[0].axhline(meanPWV,color="r") 
df_pairs.plot.scatter(x="idxnight", y="sigmapwv",ax=ax[1])
ax[1].axhline(sigma_repeatability,color="r") 
df_pairs.plot.scatter(x="idxnight", y="dPwv",ax=ax[2])
ax[2].axhline(0,color="r") 
df_pairs.plot.scatter(x="idxnight", y="PwvixPwvj",ax=ax[3])
ax[3].axhline(0,color="r") 
df_pairs.plot.scatter(x="idxnight", y="pwv1xpwv2",ax=ax[4])
ax[4].axhline(0,color="r") 

### Time separation of pairs : Linear binning in time

In [None]:
fig,ax = plt.subplots(1,1,figsize=(6,4))
df_pairs["dt"].hist(bins=50,ax=ax)
ax.set_yscale("log")
ax.set_xlabel("separation time (hour)")
ax.set_title("Pairs time separation during a night")

In [None]:
fig,ax = plt.subplots(1,1,figsize=(16,4))
df_pairs["dt"].hist(bins=100,range=(0,0.2),ax=ax)
#ax.set_yscale("log")
ax.set_xlabel("separation time (hour)")
ax.set_title("Pairs time separation during a night")
ax.axvline(0.024,color="r",label="dt < 1.4 min")
ax.axvline(30./3600,color="green",label="dt> 30 sec")
ax.axvline(0.016,color='k')
ax.legend()

First bin is between 30 seconds up to 1.4 minutes

In [None]:
print("pairs up at ",0.024*60,"minutes")
print("pairs up at ",1.4/60.,"hours", np.log10(1.4/60.))
print("pairs lower bound at",30/3600.,"hours",np.log10(30/3600.))
print("separation of two bins ",np.mean([30./3600,0.024]))

### Time separation of pairs : Log binning in time

In [None]:
logbins  = np.logspace(-1.6320232147054057,1,50)

fig,ax = plt.subplots(1,1,figsize=(6,4))
ax.hist(df_pairs["dt"].values,bins=logbins,histtype="step",color="b",lw=3)
ax.hist(df_pairs["dt"].values,bins=3,range=(0.00 ,0.02333333333333333),color="r")
#df_pairs["dt"].hist(bins=logbins ,ax=ax)
ax.set_xlabel("separation time (hour)")
ax.set_title("Pairs time separation during a night")
ax.set_yscale("log")
ax.set_xscale("log")

In [None]:
# make all bins after the closeby pairs
logbins  = np.logspace(-1.6320232147054057,1,50)
# Insert the two first bins
logbins  = np.insert(logbins , 0, 0.016166666666666666 , axis=0)
logbins  = np.insert(logbins , 0, 0.008333333333333333 , axis=0)
logbins  = np.insert(logbins , 0, 0.0 , axis=0)
fig,ax = plt.subplots(1,1,figsize=(6,4))
ax.hist(df_pairs["dt"].values,bins=logbins,histtype="step",color="b",lw=3)
#ax.hist(df_pairs["dt"].values,bins=2,range=(0.008333333333333333 ,0.02333333333333333),color="r")
#df_pairs["dt"].hist(bins=logbins ,ax=ax)
ax.set_xlabel("separation time (hour)")
ax.set_title("Pairs time separation during a night")
ax.set_yscale("log")
ax.set_xscale("log")



In [None]:
fig,(ax1,ax2) = plt.subplots(1,2,figsize=(14,4),layout="constrained")
df_pairs["pwv1xpwv2"].hist(bins=50,ax=ax1)
ax1.set_yscale("log")
ax1.set_xlabel("pwv1 x pwv2 ")
ax1.set_title("pwv1 pwv2 product in pairs (average over all night)")

df_pairs["PwvixPwvj"].hist(bins=50,ax=ax2)
ax2.set_yscale("log")
ax2.set_xlabel("Pwvi x Pwvj")
ax2.set_title("pwvi x pwvj product in pairs (average per night)")

In [None]:
fig,ax = plt.subplots(1,1,figsize=(6,4))

df_pairs["PWV1"].hist(bins=50,ax=ax,histtype="step",label="PWV1",color="b")
df_pairs["PWV2"].hist(bins=50,ax=ax,histtype="step",label="PWV2",color="r")
ax.legend()
ax.set_xlabel("PWV (mm)")
ax.set_title("Precipitable water vapor")

In [None]:
fig,(ax,ax2) = plt.subplots(1,2,figsize=(16,8))
#ax.legend()
leg = ax.get_legend()


df_pairs.plot.scatter(x="pwv1",y="pwv2",ax=ax,marker='+',grid=True,alpha=0.8)
handles, labels = ax.get_legend_handles_labels()

dfa = df_pairs[df_pairs["dt"]<0.1]
dfa.plot.scatter(x="pwv1",y="pwv2",ax=ax,marker='+',color="red")

dfb = df_pairs[df_pairs["dt"]>5.0]
dfb.plot.scatter(x="pwv1",y="pwv2",ax=ax,marker='+',color="yellow", label="dt>5 hours")
ax.set_aspect('equal')
ax.set_xlabel("pwv1 (mm)")
ax.set_ylabel("pwv2 (mm)")
ax.legend()


dfc_x = df_pairs["PWV1"]- df_pairs["meanpwv"]
dfc_y = df_pairs["PWV2"]- df_pairs["meanpwv"]
xxx = dfc_x.values
yyy = dfc_y.values


dfd = df_pairs[df_pairs["dt"]< 0.1]
xxx1 = (dfd["PWV1"]- dfd["meanpwv"]).values
yyy1 = (dfd["PWV1"]- dfd["meanpwv"]).values

dfe = df_pairs[df_pairs["dt"]> 1.0]
xxx2 = (dfe["PWV1"]- dfe["meanpwv"]).values
yyy2 = (dfe["PWV2"]- dfe["meanpwv"]).values


ax2.scatter(xxx,yyy,marker='+',color="b",alpha=0.8)
ax2.scatter(xxx1,yyy1,marker='+',color="r",alpha=0.8)
ax2.scatter(xxx2,yyy2,marker='+',color="yellow",alpha=0.8)
ax2.set_aspect("equal")

In [None]:
df_pairs.head()

In [None]:
# example of grouby
allg = df_pairs.groupby(by="idxnight")

In [None]:
# loop on groups (data from one night) from groupby
for name,g in allg:
    break

In [None]:
# show one example of group
g

In [None]:
# example of how to compute aggregate functions from a group
(g["PwvixPwvj"]/g["sigmapwv"]**2).aggregate(['count','mean','std'])

In [None]:
def retrieve_timebin(ibin,df,list_of_bins):
    """
    Return the data used to compute a time bin

    Parameters
     df : dataframe contraining
     list_of_bins : array contining the edges of timebin separation
     ibin : the selected 
    Return
     
    """

    Nbins = len(logbins)

    xcenter = (logbins[0:-1]+ logbins[1:])/2.
    N = len(xcenter)
    ydata = np.zeros(N)
    ydataerr = np.zeros(N)

    # select all pairs in the timebin ibin
    cut = (df["dt"] >= list_of_bins[ibin]) & (df["dt"] < list_of_bins[ibin+1])
    df_sel = df[cut]

    return df_sel

In [None]:
def ComputeMyDCF_PwvixPwvj(df,list_of_bins):
    """
    Compute my average Discrete Covariance Function
    mean,sigma : per night
    Note : the good function
    Parameters
     df : dataframe containing the data : (dt, pwv1 x pwv2)
     list_of_bins : array contining the edges of timebin separation
    Return
     xcenter,ydata,ydataerr for the auto correlation function
    """
    Nbins = len(logbins)

    xcenter = (logbins[0:-1]+ logbins[1:])/2.
    N = len(xcenter)
    ydata = np.zeros(N)
    ydataerr = np.zeros(N)

    # loop on time-bins
    for ibin in range(Nbins-1):

        # select all pairs in the timebin ibin
        cut = (df["dt"] >= list_of_bins[ibin]) & (df["dt"] < list_of_bins[ibin+1])
        df_sel = df[cut]
        
        # now group pairs per night
        gdf = df_sel.groupby(by="idxnight")

        # will fill the values per night 
        all_yinthatbin = []
        all_sigyinthatbin = []
        all_nyinthatbin = []

        # loop over each night for the selected time bin 
        for gname,g in gdf:
            ng = len(g)
            # only if not empty
            if ng>0:
                the_count,the_mean,the_std = (g["PwvixPwvj"]/g["sigmapwv"]**2).aggregate(['count','mean','std'])
                all_yinthatbin.append(the_mean)
                all_sigyinthatbin.append(the_std)
                all_nyinthatbin.append(the_count)
        # now stacking for each night in that time-bin is finished    
            
        # calculate the average over all night in that timebin
        n = len(all_yinthatbin)
        ydata[ibin] =  np.mean(all_yinthatbin)
        ydataerr[ibin] = np.std(all_yinthatbin)
        if n>0:
            ydataerr[ibin] /= np.sqrt(n)
            
          
    return xcenter,ydata,ydataerr

In [None]:
def ComputeMyDCF_pwv1xpwv2(df,list_of_bins):
    """
    Compute my Discrete Covariance Function
    mean,sigma : all nights
    Note a bad function 
    Parameters
     df : dataframe containing the data : (dt, pwv1 x pwv2)
     list_of_bins : array contining the edges of timebin separation
    Return
     xcenter,ydata,ydataerr for the auto correlation function
    """
    Nbins = len(logbins)

    xcenter = (logbins[0:-1]+ logbins[1:])/2.
    N = len(xcenter)
    ydata = np.zeros(N)
    ydataerr = np.zeros(N)

    for ibin in range(Nbins-1):
        cut = (df["dt"] >= list_of_bins[ibin]) & (df["dt"] < list_of_bins[ibin+1])
        df_sel = df[cut]
        n = len(df_sel)
        ydata[ibin] = df_sel["pwv1xpwv2"].mean()
        ydataerr[ibin] = df_sel["pwv1xpwv2"].std()
        if n>0:
            ydataerr[ibin] /= np.sqrt(n)
            
        ydata[ibin] /= sigmaPWV**2
        ydataerr[ibin] /= sigmaPWV**2
    
    return xcenter,ydata,ydataerr

In [None]:
# Only one bin for close pairs
#logbins  = np.logspace(-1.5,0.8,40)
#logbins  = np.logspace(-1.6320232147054057,0.8,40)
#logbins  = np.insert(logbins , 0, 0.008333333333333333 , axis=0)

# make all bins after the closeby pairs
logbins  = np.logspace(-1.6320232147054057,1,20)
# Insert the three first bins
logbins  = np.insert(logbins , 0, 0.016166666666666666 , axis=0)
logbins  = np.insert(logbins , 0, 0.008333333333333333 , axis=0)
logbins  = np.insert(logbins , 0, 0.000000 , axis=0)


tt1,yy1,yyerr1 =  ComputeMyDCF_pwv1xpwv2(df_pairs,list_of_bins = logbins)
tt2,yy2,yyerr2 = ComputeMyDCF_PwvixPwvj(df_pairs,list_of_bins = logbins)

logbins_tmin =  tt1.min()*60
logbins_tmax =  tt1.max()
txtstr = "\n".join([f"tmin = {logbins_tmin:.1f} min", f"tmax = {logbins_tmax:.1f} hour"]) 

In [None]:
NB = 10
palette = sns.color_palette("Spectral",NB)

In [None]:
fig,ax = plt.subplots(1,1,figsize=(16,4))
for ibin in range(NB):
    dfg= retrieve_timebin(ibin,df_pairs,list_of_bins = logbins)
    dfg['dt'].hist(bins=100,ax=ax,color=palette[ibin])
ax.set_yscale("log")
ax.set_xscale("log")

In [None]:
fig,ax = plt.subplots(1,1,figsize=(16,4))
ax.hist(df_pairs["dt"].values,bins=logbins,histtype="step",color="b",lw=3)
#ax.hist(df_pairs["dt"].values,bins=2,range=(0.008333333333333333 ,0.02333333333333333),color="r")
#df_pairs["dt"].hist(bins=logbins ,ax=ax)
ax.set_xlabel("separation time (hour)")
ax.set_title("Pairs time separation during a night")
ax.set_yscale("log")
ax.set_xscale("log")

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,6))
#ax.plot(tt1,yy1,'o-b',label="method 1 : \n $mean_{pwv},\sigma_{pwv}$ all nights")
#ax.errorbar(tt1,yy1,yerr=yyerr1,c="grey")

ax.plot(tt2,yy2,'o-r',label="method 2 : \n $mean_{pwv},\sigma_{pwv}$ per night")
ax.errorbar(tt2,yy2,yerr=yyerr2,c="grey")

ax.legend()
ax.text(0.8, 0.95, txtstr, transform=ax.transAxes, fontsize=12,verticalalignment='top', bbox=props,alpha=0.5)

ax.set_title("Discrete Covariance Function (in log-time-bins)")
ax.set_xlabel("$\Delta t$ (hours)")
ax.axhline(0,color="k")
plt.show()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,6))

#ax.plot(tt1,yy1,'o-b',label="method 1 : \n $mean_{pwv},\sigma_{pwv}$ all nights")
#ax.errorbar(tt1,yy1,yerr=yyerr1,c="grey")

ax.plot(tt2,yy2,'o-r',label="method 2 : \n $mean_{pwv},\sigma_{pwv}$ per night")
ax.errorbar(tt2,yy2,yerr=yyerr2,c="grey")
ax.set_xscale("log")
ax.legend()

ax.text(0.8, 0.95, txtstr, transform=ax.transAxes, fontsize=12,verticalalignment='top', bbox=props,alpha=0.5)


ax.set_title("Discrete Covariance Function (in log-time-bins)")
ax.set_xlabel("$\Delta t$ (hours)")
ax.axhline(0,color="k")
plt.show()

## Stat plot 

In [None]:
import seaborn as sns

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,4))
sns.violinplot(x=df_pairs["dt"],color='cyan',ax=ax)
ax.set_title("Observation pairs $\Delta t$")

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,4))
sns.violinplot(x=df_pairs["dPwv"],color="r",ax=ax)
ax.set_title("Observation pairs PWV difference")

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,4))
sns.violinplot(x=df_pairs["sep"],color="g",ax=ax)
ax.set_title("Observation pairs Angular separation")

In [None]:
fig,ax = plt.subplots(1,1,figsize=(12,6))
df_pairs.plot.scatter(x="sep",y="abs_dPwv",c="dt",ax=ax,cmap="jet")
ax.set_title("dPWV variation vs angular separation for different time seperation" )
ax.set_ylabel("$|\Delta PWV|$ (mm)")
ax.set_xlabel("Angular separation (degrees)")
#ax.set_zlabel("$\Delta t$ (hours)")

In [None]:
fig,ax = plt.subplots(1,1,figsize=(12,6))
df_pairs.plot.scatter(x="dt",y="abs_dPwv",c="sep",ax=ax,cmap="jet")
ax.set_title("dPWV variation vs time seperation for different angular separation" )
ax.set_ylabel("$|\Delta PWV|$ (mm)")
ax.set_xlabel("$\Delta t$ (hours)")

In [None]:
fig,ax = plt.subplots(1,1,figsize=(12,6))
df_pairs.plot.scatter(x="dt",y="sep",c="abs_dPwv",ax=ax,cmap="jet")
ax.set_title("Angular separation vs time seperation for different PWV differences" )
ax.set_ylabel("Angular separation (degrees)")
ax.set_xlabel("$\Delta t$ (hours)")

### Generate time binning

In [None]:
MAXHOURTIMES = 7.0    # hours
#TIMEBINWIDTHMIN = 10.0 # in minutes
TIMEBINWIDTHMIN = 4.0 # in minutes
NUMBEROFTIMEBINS = int(MAXHOURTIMES*60.0/TIMEBINWIDTHMIN)

NUMBEROFTIMEBINS

In [None]:
bin_to_timehour = lambda ibin : ibin*TIMEBINWIDTHMIN/60.
def time_to_bintime(thour,binwidth=TIMEBINWIDTHMIN):
    """
    thour : time difference in hour
    binwidth : width of the bin
    """
    # convert time 
    minutes_bin  = (thour*60+(TIMEBINWIDTHMIN-1)) /binwidth 
    binnum = int(minutes_bin)
    return pd.Series( data = [binnum, bin_to_timehour(binnum)],index = ["binnum","hourbin"])

In [None]:
#binwidth=TIMEBINWIDTHMIN
#thour = 0.99/60.
#minutes_bin  = (thour*60+3) /binwidth 
#binnum = int(minutes_bin)
#print(minutes_bin,binnum )

In [None]:
df_pairs["dt"].apply(time_to_bintime)

In [None]:
meanPWV

In [None]:
sigmaPWV

In [None]:
df_pairs

In [None]:
df_pairs[["binnum","hour_bin"]]= df_pairs["dt"].apply(time_to_bintime)
df_pairs = df_pairs.astype({"binnum":"int"})

In [None]:
df_pairs = df_pairs[df_pairs.binnum<NUMBEROFTIMEBINS]

In [None]:
df_pairs.describe()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(6,6))
df_pairs.plot(x="binnum",y="hour_bin",ax=ax,grid=True,color="b")
ax.set_ylabel("$\Delta t$ (hour)")

In [None]:
#df_pairs["abs_dPwv"] = df_pairs["dPwv"].abs()

In [None]:
#df_pairs[["binnum","hour_bin"]]= df_pairs["dt"].apply(time_to_bintime)
#df_pairs = df_pairs.astype({"binnum":"int"})

In [None]:
df_pairs

### Number of entries per bin

In [None]:
myseq = df_pairs.groupby(["binnum"]).size()
fig,ax = plt.subplots(1,1,figsize=(15,5))
myseq.plot(kind="bar",ax=ax,facecolor="b")
ax.set_title(f"Number of observation-pairs per time-bin = {TIMEBINWIDTHMIN} min/ {MAXHOURTIMES} hours")
ax.set_ylabel("count per bins")

In [None]:
myseq.describe()

### Save files on pairs

In [None]:
output_filename_pairs = f"{pathdata}/pairs_tpwv_vs_dt_allconditions_{MAXHOURTIMES:.0f}H_{TIMEBINWIDTHMIN:.0f}.csv"
df_pairs.to_csv(output_filename_pairs)

### home made profile plot

In [None]:
all_bin_numbers = df_pairs["binnum"].unique()
all_bin_numbers = sorted(all_bin_numbers)

# loop on time-bins
all_df = []
mapping_dict = {}
mapping_dict_MyDCT = {}
# loop on bin
for ibin in all_bin_numbers:
    df = df_pairs[df_pairs["binnum"] == ibin ]
    
    mean_dpwv_inbin = df['dPwv'].mean()
    std_dpwv_inbin = df['dPwv'].std()

    product_inbin = df['pwv1xpwv2'].mean()/sigmaPWV**2
    
    mapping_dict[ibin] = df[["dt","dPwv","abs_dPwv"]].agg(["mean","std"])
    mapping_dict_MyDCT[ibin]  = product_inbin
    all_df.append(df)

In [None]:
NBins = len(all_bin_numbers)
NBins

## Check 

In [None]:
fig,ax = plt.subplots(1,1,figsize=(6,4))
h = all_df[0]["dt"].hist(bins = NUMBEROFTIMEBINS,ax=ax,color="r",histtype="step",lw=3)
ax.set_title("bin 0")
ax.set_xlabel("$\Delta t$ (hours)")

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,4))
all_counts = np.zeros(len(all_bin_numbers))
for ibin in range(len(all_bin_numbers)):
    if ibin==0:
        h = all_df[ibin]["dt"].hist(bins = NUMBEROFTIMEBINS,ax=ax,color="r",histtype="step",lw=3)
    else:
        h = all_df[ibin]["dt"].hist(bins = NUMBEROFTIMEBINS,ax=ax)
    out, bins  = pd.cut(all_df[ibin]["dt"], bins = NUMBEROFTIMEBINS, include_lowest=True, right=False, retbins=True)
    maxcount = out.value_counts().max()
    all_counts[ibin] = maxcount
ax.set_xlabel("$\Delta t$ (hour)")
ax.set_title("Dt for observation pairs in the first time bins")
#ax.set_ylim(0.,all_counts[0:].max())
ax.set_ylim(0.,300)
ax.set_xlim(0,1.0)

In [None]:
fig,ax = plt.subplots(1,1,figsize=(18,4))
all_counts = np.zeros(len(all_bin_numbers))
for ibin in range(len(all_bin_numbers)):
    if ibin==0:
        h = all_df[ibin]["dt"].hist(bins = NUMBEROFTIMEBINS,ax=ax,color="r",histtype="step",lw=3)
    else:
        h = all_df[ibin]["dt"].hist(bins = NUMBEROFTIMEBINS,ax=ax)
    out, bins  = pd.cut(all_df[ibin]["dt"], bins = NUMBEROFTIMEBINS, include_lowest=True, right=False, retbins=True)
    maxcount = out.value_counts().max()
    all_counts[ibin] = maxcount
ax.set_xlabel("$\Delta t$ (hour)")
ax.set_title("Dt for observation pairs in the first time bins")
#ax.set_ylim(0.,all_counts[0:].max())
ax.set_ylim(0.,300)
ax.set_xlim(0,1.0)

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
#ibin = 0
#h = all_df[ibin]["dt"].hist(bins = NUMBEROFTIMEBINS,ax=ax)
#out0, bins0  = pd.cut(all_df[ibin]["dt"], bins = NUMBEROFTIMEBINS, include_lowest=True, right=False, retbins=True)
ibin = 9
h = all_df[ibin]["dt"].hist(bins = NUMBEROFTIMEBINS,ax=ax)
out9, bins9  = pd.cut(all_df[ibin]["dt"], bins = NUMBEROFTIMEBINS, include_lowest=True, right=False, retbins=True)
ibin = 10
h = all_df[ibin]["dt"].hist(bins = NUMBEROFTIMEBINS,ax=ax)
out10, bins10  = pd.cut(all_df[ibin]["dt"], bins = NUMBEROFTIMEBINS, include_lowest=True, right=False, retbins=True)
ibin = 11
h = all_df[ibin]["dt"].hist(bins = NUMBEROFTIMEBINS,ax=ax)
out11, bins11  = pd.cut(all_df[ibin]["dt"], bins = NUMBEROFTIMEBINS, include_lowest=True, right=False, retbins=True)
ax.set_xlabel("$\Delta t$ (hours)")
ax.set_title("Dt for pairs in the different time bins 9,10,11")

In [None]:
out9.value_counts().max()

In [None]:
out10.value_counts().max()

In [None]:
mapping_dict[1]["abs_dPwv"]["mean"]

## My auto correlation

In [None]:
mapping_dict

In [None]:
XX = np.array([])
YY = np.array([])
EXX = np.array([])
EYY = np.array([])
for item_bin in mapping_dict.items():
    timeforbin = bin_to_timehour(item_bin[0])
    #print(item_bin[0],timeforbin,item_bin[1]["mean"],item_bin[1]["std"])
    XX = np.append(XX,timeforbin)
    YY = np.append(YY,item_bin[1]["abs_dPwv"]["mean"])
    EYY = np.append(EYY,item_bin[1]["abs_dPwv"]["std"])
    EXX = np.append(EXX,item_bin[1]["dt"]["std"])

fig,ax = plt.subplots(1,1,figsize=(16,6))    
ax.errorbar(XX, YY, yerr=EYY, xerr=EXX,fmt='o', mfc="r",mec="k",ecolor="k",ms=5,lw=3,label="PWV data pairs")#
#ax.errorbar(XX, YY, xerr=EXX,yerr=EYY, mfc='red',mec='green', ms=20, mew=4)
#ax.scatter(XX, YY,  marker='o',color="r", lw=3,label="PWV data pairs")
ax.grid()
ax.set_ylim(0,3)
#ax.set_title(f"Profile-Plot : PWV difference in pairs in bins of {TIMEBINWIDTHMIN:.0f} minutes")
ax.set_title(f"Profile-Plot : PWV difference in pairs in bins of {TIMEBINWIDTHMIN:.0f} min / {MAXHOURTIMES} hours")
ax.set_xlabel("$\Delta t \; (hour)$")
ax.set_ylabel("$|\Delta PWV| \; (mm)$")
ax.axhline(sigma_repeatability*np.sqrt(2),linestyle="-.",color="b",label="$\sigma_{PWV}$ (repeatability from pairs)")
ax.legend()

## Plot Time dependence

In [None]:
mapping_dict_MyDCT

In [None]:
XX = np.array([])
YY = np.array([])
EXX = np.array([])
EYY = np.array([])

for item_bin in mapping_dict_MyDCT.items():
    timeforbin = bin_to_timehour(item_bin[0])
    #print(item_bin[0],timeforbin,item_bin[1]["mean"],item_bin[1]["std"])
    XX = np.append(XX,timeforbin)
    YY = np.append(YY,item_bin[1])
    #EYY = np.append(EYY,item_bin[1]["abs_dPwv"]["std"])
    #EXX = np.append(EXX,item_bin[1]["dt"]["std"])

fig,ax = plt.subplots(1,1,figsize=(16,6))    
#ax.errorbar(XX, YY, yerr=EYY, xerr=EXX,fmt='o', mfc="r",mec="k",ecolor="k",ms=5,lw=3,label="PWV data pairs")#
ax.errorbar(XX, YY)#
#ax.errorbar(XX, YY, xerr=EXX,yerr=EYY, mfc='red',mec='green', ms=20, mew=4)
#ax.scatter(XX, YY,  marker='o',color="r", lw=3,label="PWV data pairs")
ax.grid()
#ax.set_ylim(0,3)
#ax.set_title(f"Profile-Plot : PWV difference in pairs in bins of {TIMEBINWIDTHMIN:.0f} minutes")

ax.legend()

In [None]:
XX = np.array([])
YY = np.array([])
EXX = np.array([])
EYY = np.array([])
EYYcorr1 =  np.array([])
EYYcorr2 =  np.array([])
for item_bin in mapping_dict.items():
    timeforbin = bin_to_timehour(item_bin[0])
    #print(item_bin[0],timeforbin,item_bin[1]["mean"],item_bin[1]["std"])
    XX = np.append(XX,timeforbin)
    YY = np.append(YY,item_bin[1]["abs_dPwv"]["mean"])
    EYY = np.append(EYY,item_bin[1]["abs_dPwv"]["std"])
    EXX = np.append(EXX,item_bin[1]["dt"]["std"])
    EYYcorr1 = np.append(EYYcorr1,np.sqrt( (item_bin[1]["abs_dPwv"]["mean"])**2-2*sigma_repeatability**2 )) 
    EYYcorr2 = np.append(EYYcorr2,np.sqrt( (item_bin[1]["abs_dPwv"]["std"])**2-2*sigma_repeatability**2 )) 

fig,axs = plt.subplots(2,1,figsize=(16,12))
ax1,ax2 =axs.flatten()

ax1.errorbar(XX, YY, yerr=EYY, xerr=EXX,fmt='o', mfc="r",mec="k",ecolor="k",ms=5,lw=3,label="PWV data pairs")#
#ax.errorbar(XX, YY, xerr=EXX,yerr=EYY, mfc='red',mec='green', ms=20, mew=4)
#ax.scatter(XX, YY,  marker='o',color="r", lw=3,label="PWV data pairs")
ax1.grid()
ax1.set_ylim(0,3)
#ax1.set_title(f"Profile-Plot : PWV difference in pairs in bins of {TIMEBINWIDTHMIN:.0f} minutes)")
ax1.set_title(f"Profile-Plot : PWV difference in pairs in bins of {TIMEBINWIDTHMIN:.0f} min / {MAXHOURTIMES} hours")
ax1.set_xlabel("$\Delta t \; (hour)$")
ax1.set_ylabel("$|\Delta PWV| \; (mm)$")
ax1.axhline(sigma_repeatability*np.sqrt(2),linestyle="-.",color="b",label="$\sigma_{PWV}$ (repeatability from pairs)")
ax1.legend()

ax2.step(XX, EYYcorr1,lw=3,color="r", label = "$(\overline{|\Delta PWV|}^2 - 2\sigma_{repeatability}^2)^{1/2} $")
ax2.step(XX, EYYcorr2,lw=3,color="b",label = "$(\sigma_{|\Delta PWV|}^2 - 2\sigma_{repeatability}^2)^{1/2} $")
ax2.set_ylim(0.,1.5)
ax2.grid()
ax2.set_xlabel("$\Delta t \; (hour)$")
ax2.set_ylabel("$\sigma_{|\Delta PWV|} (excess) \; (mm)$")
ax2.legend()
plt.show()

In [None]:
sigma_repeatability

In [None]:
XX = np.array([])
YY = np.array([])
EXX = np.array([])
EYY = np.array([])

for item_bin in mapping_dict.items():
    timeforbin = bin_to_timehour(item_bin[0])
    #print(item_bin[0],timeforbin,item_bin[1]["mean"],item_bin[1]["std"])
    XX = np.append(XX,timeforbin)
    YY = np.append(YY,item_bin[1]["dPwv"]["mean"])
    EYY = np.append(EYY,item_bin[1]["dPwv"]["std"])
    EXX = np.append(EXX,item_bin[1]["dt"]["std"])

fig,ax = plt.subplots(1,1,figsize=(16,6))    
#ax.errorbar(XX, YY, EYY, fmt='ok', lw=3)
#ax.scatter(XX, YY,  marker='o',color="r", lw=3,label="PWV data pairs")
ax.errorbar(XX, YY, yerr=EYY, xerr=EXX,fmt='o', mfc="r",mec="k",ecolor="k",ms=5,lw=3,label="PWV data pairs")#
ax.grid()
ax.set_ylim(-3,3)
#ax.set_title(f"Profile-Plot : PWV difference in pairs in bins of {TIMEBINWIDTHMIN:.0f} minutes ")
ax.set_title(f"Profile-Plot : PWV difference in pairs in bins of {TIMEBINWIDTHMIN:.0f} min / {MAXHOURTIMES} hours")
ax.set_xlabel("$\Delta t \; (hour)$")
ax.set_ylabel("$\Delta PWV \; (mm)$")
ax.axhline(sigma_repeatability*np.sqrt(2),linestyle="-.",color="b",label="$\sigma_{PWV}$ (repeatability from pairs)")
ax.axhline(-sigma_repeatability*np.sqrt(2),linestyle="-.",color="b")
ax.legend()

In [None]:
XX = np.array([])
YY = np.array([])
EXX = np.array([])
EYY = np.array([])
EYYcorr =  np.array([])

for item_bin in mapping_dict.items():
    timeforbin = bin_to_timehour(item_bin[0])
    #print(item_bin[0],timeforbin,item_bin[1]["mean"],item_bin[1]["std"])
    XX = np.append(XX,timeforbin)
    YY = np.append(YY,item_bin[1]["dPwv"]["mean"])
    EYY = np.append(EYY,item_bin[1]["dPwv"]["std"])
    EXX = np.append(EXX,item_bin[1]["dt"]["std"])
    EYYcorr = np.append(EYYcorr,np.sqrt( (item_bin[1]["dPwv"]["std"])**2-2*sigma_repeatability**2 )) 

fig,axs = plt.subplots(2,1,figsize=(16,12))    
#ax.errorbar(XX, YY, EYY, fmt='ok', lw=3)
#ax.scatter(XX, YY,  marker='o',color="r", lw=3,label="PWV data pairs")
ax1,ax2 = axs.flatten()

ax1.errorbar(XX, YY, yerr=EYY, xerr=EXX,fmt='o', mfc="r",mec="k",ecolor="k",ms=5,lw=3,label="PWV data pairs")#
ax1.grid()
ax1.set_ylim(-3,3)
#ax1.set_title(f"Profile-Plot : PWV difference in pairs in bins of {TIMEBINWIDTHMIN:.0f} minutes ")
ax1.set_title(f"Profile-Plot : PWV difference in pairs in bins of {TIMEBINWIDTHMIN:.0f} min / {MAXHOURTIMES} hours")
ax1.set_xlabel("$\Delta t \; (hour)$")
ax1.set_ylabel("$\Delta PWV \; (mm)$")
ax1.axhline(sigma_repeatability*np.sqrt(2),linestyle="-.",color="b",label="$\sigma_{PWV}$ (repeatability from pairs)")
ax1.axhline(-sigma_repeatability*np.sqrt(2),linestyle="-.",color="b")
ax1.legend()

ax2.step(XX, EYYcorr,lw=3,color="b",label = "$(\sigma_{\Delta PWV}^2 - 2\sigma_{repeatability}^2)^{1/2} $")
ax2.set_ylim(0.,1.5)
ax2.grid()
ax2.set_xlabel("$\Delta t \; (hour)$")
ax2.set_ylabel("$\sigma_{\Delta PWV} (excess) \; (mm)$")
ax2.legend()
plt.show()


### boxplot

- https://wellbeingatschool.org.nz/information-sheet/understanding-and-interpreting-box-plots

- https://stackoverflow.com/questions/33328774/box-plot-with-min-max-average-and-standard-deviation

In [None]:
fig,ax = plt.subplots(1,1,figsize=(16,6))
#sns.boxplot(x=df_pairs["binnum"],y=df_pairs["abs_dPwv"],ax=ax,color="r",notch=False,linewidth=2,label="PWV data pairs")
sns.boxplot(x=df_pairs["binnum"],y=df_pairs["abs_dPwv"],ax=ax,color="r",notch=False,linewidth=2)
ax.set_xticks(ax.get_xticks())
ax.set_xticklabels(ax.get_xticklabels(), rotation=0, ha='center',color="k");

# a) convert the xlabel from binnumbers to hours
labels_binnumbers = [int(item.get_text()) for item in ax.get_xticklabels()]
labels_hours = [ bin_to_timehour(binnum) for binnum in labels_binnumbers]
labels_hours_str = [f"{label:.1f}" for label in labels_hours]
ax.set_xticklabels(labels_hours_str)
ax.set_xlabel("$\Delta t$ (hours)")
# b) reduce the number of xlabels
ax.locator_params(axis="x", nbins=10)
ax.grid()
ax.set_ylim(0,3)
#ax.set_title(f"Box-Plot : PWV difference in pairs in bins of {TIMEBINWIDTHMIN:.0f} minutes (clearsky)")
ax.set_title(f"Profile-Plot : PWV difference in pairs in bins of {TIMEBINWIDTHMIN:.0f} min / {MAXHOURTIMES} hours")
ax.set_ylabel("$|\Delta PWV| \; (mm)$")
# reduce the number of bins in the plot
ax.axhline(sigma_repeatability*np.sqrt(2),linestyle="-.",color="b",label="$\sigma_{PWV}$ (repeatability from pairs)")
ax.legend()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(16,6))
#sns.boxplot(x=df_pairs["binnum"],y=df_pairs["dPwv"],ax=ax,color="r",notch=False,linewidth=2,label="PWV data pairs")
sns.boxplot(x=df_pairs["binnum"],y=df_pairs["dPwv"],ax=ax,color="r",notch=False,linewidth=2)
ax.set_xticks(ax.get_xticks())
ax.set_xticklabels(ax.get_xticklabels(), rotation=0, ha='center',color="k");
# a) convert the xlabel from binnumbers to hours
labels_binnumbers = [int(item.get_text()) for item in ax.get_xticklabels()]
labels_hours = [ bin_to_timehour(binnum) for binnum in labels_binnumbers]
labels_hours_str = [f"{label:.1f}" for label in labels_hours]
ax.set_xticklabels(labels_hours_str)
ax.set_xlabel("$\Delta t$ (hours)")
# b) reduce the number of xlabels
ax.locator_params(axis="x", nbins=10)
ax.set_ylabel("$\Delta PWV \; (mm)$")

ax.grid()
ax.set_ylim(-3,3)
#ax.set_title(f"Box-Plot : PWV difference in pairs in bins of {TIMEBINWIDTHMIN:.0f} minutes (clearsky)")
ax.set_title(f"Profile-Plot : PWV difference in pairs in bins of {TIMEBINWIDTHMIN:.0f} min / {MAXHOURTIMES} hours")
ax.axhline(sigma_repeatability*np.sqrt(2),linestyle="-.")
ax.axhline(-sigma_repeatability*np.sqrt(2),linestyle="-.")
ax.axhline(sigma_repeatability*np.sqrt(2),linestyle="-.",color="b",label="$\sigma_{PWV}$ (repeatability from pairs)")
ax.axhline(-sigma_repeatability*np.sqrt(2),linestyle="-.",color="b")
ax.legend()

In [None]:
df_pair_bin_num = df_pairs[df_pairs.binnum == 0 ]

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,6))
df_pair_bin_num.hist("dPwv",bins=100,ax=ax)

In [None]:
fig,ax = plt.subplots(1,1,figsize=(16,6))
sns.violinplot(x=df_pairs["binnum"],y=df_pairs["abs_dPwv"],ax=ax,facecolor="r",linewidth=2,linecolor="b")

ax.set_xticks(ax.get_xticks())
ax.set_xticklabels(ax.get_xticklabels(), rotation= 0, ha='center',color="b");
# a) convert the xlabel from binnumbers to hours
labels_binnumbers = [int(item.get_text()) for item in ax.get_xticklabels()]
labels_hours = [ bin_to_timehour(binnum) for binnum in labels_binnumbers]
labels_hours_str = [f"{label:.1f}" for label in labels_hours]
ax.set_xticklabels(labels_hours_str)
ax.set_xlabel("$\Delta t$ (hours)")
# b) reduce the number of xlabels
ax.locator_params(axis="x", nbins=10)
ax.set_title(f"Profile-Plot : PWV difference in pairs in bins of {TIMEBINWIDTHMIN:.0f} min / {MAXHOURTIMES} hours")
ax.grid()
ax.set_ylim(0,4)

## Make histograms of PWV per bin in Delta t

In [None]:
fig,axs = plt.subplots(1,2,figsize=(16,6))

ax1,ax2 = axs.flatten()
binfirst = 0
binlast = 6
DTfirst = bin_to_timehour(binfirst)
DTlast = bin_to_timehour(binlast)

sigmalast = mapping_dict[binlast]["abs_dPwv"]["std"]
sigmafirst = mapping_dict[binfirst]["abs_dPwv"]["std"]
labellast = "$\Delta t$ =" +  f"{DTlast:.2f} hours" + " $\sigma$ = " + f"{sigmalast:.2f} mm"
labelfirst = "$\Delta t$ =" +  f"{DTfirst:.2f} hours" + " $\sigma$ = " + f"{sigmafirst:.2f} mm"

df_pair_bin_num1 = df_pairs[df_pairs.binnum == binlast ]
df_pair_bin_num1.hist("abs_dPwv",bins=50,ax=ax1,density = True,color="b",histtype="step",lw=3, label = labellast)

df_pair_bin_num0 = df_pairs[df_pairs.binnum == binfirst ]
df_pair_bin_num0.hist("abs_dPwv",bins=50,ax=ax1,density = True,color="r",histtype="step",lw=3, label = labelfirst)


sigmalast = mapping_dict[binlast]["dPwv"]["std"]
sigmafirst = mapping_dict[binfirst]["dPwv"]["std"]
labellast = "$\Delta t$ =" +  f"{DTlast:.2f} hours" + " $\sigma$ = " + f"{sigmalast:.2f} mm"
labelfirst = "$\Delta t$ =" +  f"{DTfirst:.2f} hours" + " $\sigma$ = " + f"{sigmafirst:.2f} mm"

df_pair_bin_num1 = df_pairs[df_pairs.binnum == binlast ]
df_pair_bin_num1.hist("dPwv",bins=50,ax=ax2,density = True,color="b",histtype="step",lw=3,label = labellast)

df_pair_bin_num0 = df_pairs[df_pairs.binnum == binfirst ]
df_pair_bin_num0.hist("dPwv",bins=50,ax=ax2,density = True,color="r",histtype="step",lw=3,label = labelfirst)

ax1.legend()
ax2.legend()
ax1.set_xlabel("$|\Delta PWV|$ (mm)")
ax2.set_xlabel("$\Delta PWV$ (mm)")
plt.suptitle(f"PWV difference in pairs in bins of {TIMEBINWIDTHMIN:.0f} min / {MAXHOURTIMES} hours")

## Discrete correlation function

In [None]:
for night in all_selected_nights:
    #select the night
    # Choose the kind of observation
    df_spec_night = df_spec_sel[df_spec_sel["nightObs"] == night]
    N = len( df_spec_night)
    print(night,N)
    if night == 20230131:
        break

In [None]:
def ComputeZDCF(night,df_night_pwv_curve):
    """
    Compute the Discrete Covariance Curve with pyzdcf
    """
    # convert to seconds
    df_night_pwv_curve["t_sec_rel"] = df_night_pwv_curve.dt_midnight*3600.0

    # compute the time index (integer) in seconds
    t_sec_rel_min = df_night_pwv_curve["t_sec_rel"].min()
    df_night_pwv_curve["t_sec_abs"] = (df_night_pwv_curve["t_sec_rel"] - t_sec_rel_min).astype(int)

    # create the curve in the required format
    df_pwvc = df_night_pwv_curve[["t_sec_abs","PWV [mm]_x"]]

    # add the error on the point 
    df_pwvc = df_pwvc.assign(sig_pwv = lambda x: sigma_repeatability)

    # save the pwv curve in a file
    fn = f"pwv_curve_{night}.csv"
    ffn = os.path.join(datapath_input,fn)
    df_pwvc.to_csv(ffn, index=False,header=False)

    # parameters for the pyzdcf
    params_dcf = dict(autocf    =  True, # Autocorrelation (T) or cross-correlation (F)
              prefix            = 'acf',  # Output files prefix
              uniform_sampling  =  False, # Uniform sampling?
              omit_zero_lags    =  False,  # Omit zero lag points?
              minpts            =  20,     # Min. num. of points per bin (0 is a flag for default value of 11)
              num_MC            =  100,   # Num. of Monte Carlo simulations for error estimation
              lc1_name          =  fn,   # Name of the first light curve file
              lc2_name          =  fn    # Name of the second light curve file (required only if we do CCF)
             )

    # compute the ZDCF
    dcf_df = pyzdcf(input_dir  = datapath_input+"/" , 
                    output_dir = datapath_output+"/", 
                    intr       = False, 
                    parameters = params_dcf, 
                    sep        = ',', 
                    sparse     = 'auto', 
                    verbose    = False)
    return dcf_df


In [None]:
df_night_pwv_curve = df_spec_night[["Time","PWV [mm]_x","PWV [mm]_err_x","PWV [mm]_y","PWV [mm]_err_y","dt_midnight","ex_mjd","AIRMASS","TARGET"]]

In [None]:
dcf_df = ComputeZDCF(night,df_night_pwv_curve)

In [None]:
dcf_df

In [None]:
xerr = dcf_df[["-sig(tau)","+sig(tau)"]].values.T/3600. 	
yerr = dcf_df[["-err(dcf)","+err(dcf)"]].values.T/3600.	
x = dcf_df["tau"].values/3600.
y = dcf_df["dcf"].values

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,4))
ax.errorbar(x,y,xerr=xerr,yerr=yerr,marker='o', mfc='red',linewidth=0.5,
         mec='red', ms=2, mew=2,ecolor="k",elinewidth=2,capsize=2,uplims=True, lolims=True)
ax.grid()
ax.set_ylim(-1,1)
ax.set_title(f"Discrete covariance function night {night}")
ax.set_xlabel("Time (hours)")
ax.set_ylabel("DCF (no units)")

## Compute the DCF over all nights

In [None]:
# RESET figures
all_figs_to_pdf = []

In [None]:
#all_pwv = []
#all_sigmapwv = []

all_T = []
all_DCF = []

# loop on nights
#for night in all_selected_nights:
for night in all_nights_forpairs:
    #select the night
    # Choose the kind of observation
    df_spec_night = df_spec_sel[df_spec_sel["nightObs"] == night]
    df_spec_night_wp = df_spec_wp[df_spec_wp["nightObs"] == night]
    df_spec_night_np = df_spec_np[df_spec_np["nightObs"] == night]
    df_spec_night_np_f = df_spec_np_f[df_spec_np_f["nightObs"] == night]
    df_spec_night_np_b = df_spec_np_b[df_spec_np_b["nightObs"] == night]
    
    #select the variables
    df_night_pwv_curve = df_spec_night[["Time","PWV [mm]_x","PWV [mm]_err_x","PWV [mm]_y","PWV [mm]_err_y","dt_midnight","AIRMASS","TARGET","RA","DEC"]]
    df_night_pwv_curve_wp = df_spec_night_wp[["Time","PWV [mm]_x","PWV [mm]_err_x","PWV [mm]_y","PWV [mm]_err_y","dt_midnight","AIRMASS","TARGET","RA","DEC"]]
    df_night_pwv_curve_np_b = df_spec_night_np_b[["Time","PWV [mm]_x","PWV [mm]_err_x","PWV [mm]_y","PWV [mm]_err_y","dt_midnight","AIRMASS","TARGET","RA","DEC"]]
    df_night_pwv_curve_np_f = df_spec_night_np_f[["Time","PWV [mm]_x","PWV [mm]_err_x","PWV [mm]_y","PWV [mm]_err_y","dt_midnight","AIRMASS","TARGET","RA","DEC"]]
    df_night_pwv_curve_np = df_spec_night_np[["Time","PWV [mm]_x","PWV [mm]_err_x","PWV [mm]_y","PWV [mm]_err_y","dt_midnight","AIRMASS","TARGET","RA","DEC"]]

    tmin = df_night_pwv_curve["Time"].min()
    tmax = df_night_pwv_curve["Time"].max()

    list_of_targets = df_night_pwv_curve["TARGET"].unique()
    str_list_of_targets = "\n".join(list_of_targets)
    str_list_of_targets = "\n".join(["targets:",str_list_of_targets])
     

    # convert in hours
    df_night_pwv_curve["dt"] = (df_night_pwv_curve["Time"] - tmin).dt.total_seconds()/3600.
    df_night_pwv_curve_wp["dt"] = (df_night_pwv_curve_wp["Time"] - tmin).dt.total_seconds()/3600.
    df_night_pwv_curve_np["dt"] = (df_night_pwv_curve_np["Time"] - tmin).dt.total_seconds()/3600.
    df_night_pwv_curve_np_f["dt"] = (df_night_pwv_curve_np_f["Time"] - tmin).dt.total_seconds()/3600.
    df_night_pwv_curve_np_b["dt"] = (df_night_pwv_curve_np_b["Time"] - tmin).dt.total_seconds()/3600.
    
    # extract statistics on pwv
    stat = df_night_pwv_curve[["PWV [mm]_x"]].describe()
    date_form = DateFormatter("%y-%m-%dT%H:%M")
    #date_form = DateFormatter("%y-%m-%dT%H")
    count = int(stat.loc["count"].values[0])
    mean = stat.loc["mean"].values[0]
    median = stat.loc["50%"].values[0]
    std = stat.loc["std"].values[0]
    textstr = "\n".join((f"count : {count}",
                     f"mean : {mean:.1f} mm",
                     f"median : {median:.1f} mm",
                     f"std : {std:.1f} mm",
                    ))


    N= len(df_night_pwv_curve)

    if N>10:
        # does the fit
        
        #x,y,yerr,n,chi2dof,xfit,yfit,slope,slope_err = MakeLineFit(df_night_pwv_curve)
        xfit, y_ard, y_ard_std, y_brr, y_brr_std, resy, y_mean = MakeLinearModelFit(df_night_pwv_curve)


        # Compute the DCF
        # convert the relative time in seconds
        df_dcf = ComputeZDCF(night,df_night_pwv_curve)
        
        
        
        #textstr2 = "\n".join((f"d(PWV/dt) : ",
        #             f"slope : {slope:.3f} mm/h",
        #             f"slope err : {slope_err:.3f} mm/h",
        #             f"chi2/ndeg : {chi2dof:.2f}",       
        #            ))

        #textstr3 = "\n".join((f"d(PWV/dt) : ",
        #             f"slope : {slope:.3f} mm/h",
        #             f"slope err : {slope_err:.3f} mm/h",  
        #            ))
    

        #stat.loc["slope","PWV [mm]_x"] = slope 
        #stat.loc["slope_err","PWV [mm]_x"] = slope_err 
        #stat.loc["chi2","PWV [mm]_x"] = chi2dof

        ##########
        # plot
        ############
        

        fig = plt.figure(figsize=(18,4),constrained_layout=True)
        gs = GridSpec(1, 2,  width_ratios=[1,1],figure=fig)
    
        ax = fig.add_subplot(gs[0])
        ax2 = fig.add_subplot(gs[1])
       
        leg=ax.get_legend()
        leg2=ax2.get_legend()
        
        ax.errorbar(x=df_night_pwv_curve.dt_midnight, y=df_night_pwv_curve["PWV [mm]_x"],yerr=df_night_pwv_curve["PWV [mm]_err_x"],fmt="+",color="r",ecolor="k",label="stat err",ms=1)
        df_night_pwv_curve_wp.plot(x="dt_midnight",y="PWV [mm]_x",c="cyan",ax=ax,marker='o',lw=0.0,grid=True,legend=leg,label="polar",ms=6,alpha=1)
        df_night_pwv_curve_np_b.plot(x="dt_midnight",y="PWV [mm]_x",c="r",ax=ax,marker='o',lw=0.0,grid=True,legend=leg,label="bright, no polar",ms=6,alpha=1)
        df_night_pwv_curve_np_f.plot(x="dt_midnight",y="PWV [mm]_x",c="g",ax=ax,marker='o',lw=0.0,grid=True,legend=leg,label="faint, no polar",ms=6,alpha=1)
        ax.set_xlabel("hours since midnight")
        ax.set_ylim(0.,15.)
        ax.set_ylabel("PWV (mm)")
        ax.legend(loc="upper right")

        ax.plot(xfit, y_ard,"g-",lw=3,label="ARDRegression")
        ax.fill_between(xfit, y_ard - 1.96 * y_ard_std,y_ard + 1.96 * y_ard_std,alpha=0.3,label=r"95% CL",facecolor='green')

        #ax.plot(xfit2, y_brr,"r-",label="BayesianRidge")
        #ax.fill_between(xfit2, y_brr - 1.96 * y_brr_std,y_brr + 1.96 * y_brr_std,alpha=0.5,label=r"95% CL",facecolor='red')
      
       
        ax.text(0.8, 0.5, str_list_of_targets, transform=ax.transAxes, fontsize=12,verticalalignment='top', bbox=props,alpha=0.5)
        ax.set_title(f"night {night}")
        ax.axvline(x=0,color="k",lw=2)


        inset_ax = inset_axes(ax,width="20%", # width = 30% of parent_bbox
                                          height="40%", # height : 1 inch
                                          loc="upper left")
        
        #textstr_sigma = f"$\sigma = $" +f"{sigma:.2f} mm"
        inset_ax.hist(resy,bins=30,facecolor="b")
        inset_ax.set_xlabel("residuals (mm)")
        inset_ax.text(0.1, 0.9, textstr_sigma , transform=inset_ax.transAxes, fontsize=8,verticalalignment='top', bbox=props,alpha=1)



        ## plot DCF
   
        xerr = df_dcf[["-sig(tau)","+sig(tau)"]].values.T/3600. 	
        yerr = df_dcf[["-err(dcf)","+err(dcf)"]].values.T/3600.	
        xerr= xerr.clip(0)
        yerr= yerr.clip(0)
        
        x = df_dcf["tau"].values/3600.
        y = df_dcf["dcf"].values

        all_T.append(x)
        all_DCF.append(y)
        
        ax2.errorbar(x,y,xerr=xerr,yerr=yerr,marker='o', mfc='red',linewidth=0.5,
        mec='red', ms=2, mew=2,ecolor="k",elinewidth=2,capsize=2,uplims=True, lolims=True)
        ax2.grid()
        ax2.set_ylim(-1,1)
        ax2.set_title(f"Discrete covariance function night {night}")
        ax2.set_xlabel("Time (hours)")
        ax2.set_ylabel("DCF (no units)")


        all_figs_to_pdf.append(fig) 
        
        plt.tight_layout()
        figname =f"{pathfigs}/pwvdcf_per_night_{night}"+figtype
        plt.savefig(figname)
        plt.show()


## Summary over all DCF

- colors : https://r02b.github.io/seaborn_palettes/

In [None]:
import seaborn as sns

In [None]:
NNt = len(all_T)
NNd = len(all_DCF)
assert NNt == NNd

In [None]:
NNt

In [None]:
palette = sns.color_palette("bright",NNt)

In [None]:
palette

In [None]:
palette[1]

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,6))
for idx in range(NNt):
    x = all_T[idx]
    y = all_DCF[idx]
    ax.plot(x,y,marker= 'o',color=palette[idx],lw=2)
ax.grid()
ax.set_xscale("log")
ax.set_title("Discrete covariance function for all nights")
ax.set_xlabel("Time (hours)")
ax.set_ylabel("DCF (no units)")

In [None]:
all_T_flat = np.concatenate(all_T)
all_DCF_flat = np.concatenate(all_DCF)

In [None]:
fig,ax = plt.subplots(1,1,figsize=(8,5))

ax.hist2d(all_T_flat,np.abs(all_DCF_flat),bins=(50,50),range=((0,2),(0,1)),cmap="YlOrRd")
ax.grid()
ax.set_xlabel("$\Delta t/1hour)$")
ax.set_ylabel("$DCF$")
ax.set_title("Auto correlation function")
all_figs_to_pdf.append(fig)
plt.show()

In [None]:
index_sel = np.where(all_T_flat > 0 )[0]
all_T_flat = all_T_flat[index_sel]
all_DCF_flat = all_DCF_flat[index_sel]

In [None]:
fig,ax = plt.subplots(1,1,figsize=(8,5))

ax.hist2d(np.log10(all_T_flat),np.abs(all_DCF_flat),bins=(50,50),range=((-2,1),(0,1)),cmap="YlOrRd")
ax.grid()
ax.set_xlabel("$\log_{10}(\Delta t/1hour)$")
ax.set_ylabel("$DCF$")
ax.set_title("Auto correlation function")
all_figs_to_pdf.append(fig)
plt.show()

## Save all figs in a pdf file

In [None]:
from matplotlib.backends.backend_pdf import PdfPages
pdf_filename = f"holo_PWV_night_fitvariation_polydeg_{POLY_DEGREE_MAX}_timecorrelationDCF_tight.pdf"
pdf_fullfilename = os.path.join(pathfigs,pdf_filename)
with PdfPages(pdf_fullfilename) as pdf:
    for fig in all_figs_to_pdf:
        pdf.savefig(fig, bbox_inches='tight') 

In [None]:
pdf_filename = f"holo_PWV_night_fitvariation_polydeg_{POLY_DEGREE_MAX}_timecorrelationDCF_notight.pdf"
pdf_fullfilename = os.path.join(pathfigs,pdf_filename)
with PdfPages(pdf_fullfilename) as pdf:
    for fig in all_figs_to_pdf:
        pdf.savefig(fig) 

## Compute Discrete Correlation Function for all pais

In [None]:
def ComputeZDCFForAllPairs(night,df_night_pwv_curve):
    """
    Compute the Discrete Covariance Curve with pyzdcf
    """
    # convert to seconds
    df_night_pwv_curve["t_sec_rel"] = df_night_pwv_curve.dt_midnight*3600.0

    # compute the time index (integer) in seconds
    t_sec_rel_min = df_night_pwv_curve["t_sec_rel"].min()
    df_night_pwv_curve["t_sec_abs"] = (df_night_pwv_curve["t_sec_rel"] - t_sec_rel_min).astype(int)

    # create the curve in the required format
    df_pwvc = df_night_pwv_curve[["t_sec_abs","PWV [mm]_x"]]

    # add the error on the point 
    df_pwvc = df_pwvc.assign(sig_pwv = lambda x: sigma_repeatability)

    # save the pwv curve in a file
    fn = f"pwv_curve_{night}.csv"
    ffn = os.path.join(datapath_input,fn)
    df_pwvc.to_csv(ffn, index=False,header=False)

    # parameters for the pyzdcf
    params_dcf = dict(autocf    =  True, # Autocorrelation (T) or cross-correlation (F)
              prefix            = 'acf',  # Output files prefix
              uniform_sampling  =  False, # Uniform sampling?
              omit_zero_lags    =  False,  # Omit zero lag points?
              minpts            =  20,     # Min. num. of points per bin (0 is a flag for default value of 11)
              num_MC            =  100,   # Num. of Monte Carlo simulations for error estimation
              lc1_name          =  fn,   # Name of the first light curve file
              lc2_name          =  fn    # Name of the second light curve file (required only if we do CCF)
             )

    # compute the ZDCF
    dcf_df = pyzdcf(input_dir  = datapath_input+"/" , 
                    output_dir = datapath_output+"/", 
                    intr       = False, 
                    parameters = params_dcf, 
                    sep        = ',', 
                    sparse     = 'auto', 
                    verbose    = False)
    return dcf_df