# Frequencies in PWV variation and comparison to Merra2 and Discrete Time Covariance Function

- author Sylvie Dagoret-Campagne
- affiliation : IJCLab
- creation date 2025-03-15 :
- last update 2025-03-16
- laptiop : conda_py311

**Goal** : Show correlation holo /Merra

In [None]:
from platform import python_version
print(python_version())

In [None]:
import warnings
warnings.resetwarnings()
warnings.simplefilter('ignore')

In [None]:
from platform import python_version
print(python_version())

In [None]:
import os,glob

In [None]:
#from pyzdcf import pyzdcf

### Output for figures

In [None]:
# where are stored the figures
pathfigs = "figsPWVFrequenciesFitSinusAuxtelMerra2"
if not os.path.exists(pathfigs):
    os.makedirs(pathfigs) 
figtype = ".png"

### Output for data

- including DCF

In [None]:
pathdata = "dataPWVFrequenciesFitSinusAuxtelMerra2"
if not os.path.exists(pathdata):
    os.makedirs(pathdata) 

dcf_path_input = os.path.join(pathdata,"dcf_timecurves") 
dcf_path_output = os.path.join(pathdata,"dcf_results") 
if not os.path.exists(dcf_path_input):
    os.makedirs(dcf_path_input) 
if not os.path.exists(dcf_path_output):
    os.makedirs(dcf_path_output) 

In [None]:
import numpy as np
from numpy.linalg import inv
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline

import matplotlib.gridspec as gridspec

from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib.colors import LogNorm,SymLogNorm
from matplotlib.patches import Circle,Annulus
from astropy.visualization import ZScaleInterval
props = dict(boxstyle='round', facecolor="white", alpha=0.1)
#props = dict(boxstyle='round')

import matplotlib.colors as colors
import matplotlib.cm as cmx

import matplotlib.dates as mdates

import matplotlib.ticker                         # here's where the formatter is
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)

from matplotlib.gridspec import GridSpec

from astropy.visualization import (MinMaxInterval, SqrtStretch,ZScaleInterval,PercentileInterval,
                                   ImageNormalize,imshow_norm)
from astropy.visualization.stretch import SinhStretch, LinearStretch,AsinhStretch,LogStretch

from astropy.io import fits
from astropy.wcs import WCS
from astropy import units as u
from astropy import constants as c

from astropy.coordinates.earth import EarthLocation
from datetime import datetime
from pytz import timezone

from scipy import interpolate
from sklearn.neighbors import NearestNeighbors
from sklearn.neighbors import KDTree, BallTree

import pandas as pd
pd.set_option("display.max_columns", None)
pd.set_option('display.max_rows', 100)

import matplotlib.ticker                         # here's where the formatter is
import os
import re
import pandas as pd
import pickle
from collections import OrderedDict

plt.rcParams["figure.figsize"] = (4,3)
plt.rcParams["axes.labelsize"] = 'xx-large'
plt.rcParams['axes.titlesize'] = 'xx-large'
plt.rcParams['xtick.labelsize']= 'xx-large'
plt.rcParams['ytick.labelsize']= 'xx-large'

import scipy
from scipy.optimize import curve_fit,least_squares


# new color correction model
import pickle
#from scipy.interpolate import RegularGridInterpolator

import seaborn as sns

In [None]:
from astropy.modeling import models, fitting
from statsmodels.tsa.stattools import adfuller, kpss

In [None]:
from mpl_toolkits.axes_grid1.inset_locator import inset_axes

In [None]:
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)

from astropy.visualization import (MinMaxInterval, SqrtStretch,ZScaleInterval,PercentileInterval,
                                   ImageNormalize,imshow_norm)
from astropy.visualization.stretch import SinhStretch, LinearStretch,AsinhStretch,LogStretch

from astropy.time import Time
from astropy.timeseries import LombScargle

from scipy.stats import t  # Loi de Student

In [None]:
#from scipy.interpolate import interp

In [None]:
from sklearn.gaussian_process.kernels import RBF
from sklearn.gaussian_process.kernels import ExpSineSquared
from sklearn.gaussian_process.kernels import RationalQuadratic
from sklearn.gaussian_process.kernels import WhiteKernel
from sklearn.gaussian_process.kernels import ConstantKernel
from sklearn.gaussian_process import GaussianProcessRegressor

In [None]:
from scipy.stats import levy_stable,cauchy, laplace, norm

In [None]:
from pyzdcf import pyzdcf

In [None]:
# Remove to run faster the notebook
#! pip install ipympl  
import ipywidgets as widgets
%matplotlib widget

In [None]:
from importlib.metadata import version

In [None]:
# wavelength bin colors
#jet = plt.get_cmap('jet')
#cNorm = mpl.colors.Normalize(vmin=0, vmax=NSED)
#scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet)
#all_colors = scalarMap.to_rgba(np.arange(NSED), alpha=1)

In [None]:
np.__version__

In [None]:
import pandas as pd
pd.__version__

In [None]:
def convertNumToDatestr(num):
    year = num//10_000
    month= (num-year*10_000)//100
    day = (num-year*10_000-month*100)

    year_str = str(year).zfill(4)
    month_str = str(month).zfill(2)
    day_str = str(day).zfill(2)
    
    datestr = f"{year_str}-{month_str}-{day_str}"
    return pd.to_datetime(datestr)

In [None]:
YEAR = 365.25
MONTHS6 = YEAR/2.
MONTHS4 = YEAR/3.
QUARTER = YEAR/4. 
DAY = 1.
MONTH = YEAR/12.
WEEK = 7*DAY

In [None]:
FIGXSIZE_1 = 14
FIGYSIZE_1 = 8

FIGXSIZE_0 = 14
FIGYSIZE_0 = 5

In [None]:
def LombScargle_analysis(dates, values, ax ,mode = "logxlogy",title="LombScargle - Spectrum",
                    xlabel="frequency (day)$^{-1}$",ylabel="y-unit",label="Lomb Scargle", legendout = True, datecut = 0 ):
    # Centrer les données autour de la moyenne


    if datecut>0:
        index_selected = np.where(dates >= datecut)[0]
        dates = dates[index_selected]
        values = values[index_selected]

    
    values_centered = values - np.mean(values)
    

    # Nombre de points
    N = len(dates)

    # sigma
    sigma_x = np.sqrt(np.sum(values_centered**2)/N)
    
    # Intervalle d'échantillonnage (assume 1 jour entre chaque point)
    T = np.mean(np.diff(dates))  # Période d'échantillonnage

    # Fréquence de Nyquist (limite de Shannon)
    f_nyquist = 1 / (2 * T)
    

    freqs, power = LombScargle(dates, values_centered).autopower()
   
    ax.plot(freqs, power,'ob-' ,ms=5,label=label)

    if mode == "logxliny":
        ax.set_xscale("log")  # Définit l'axe X en échelle logarithmique
        ax.set_yscale("linear")  # Garde l'axe Y en échelle linéaire
    elif mode == "logxlogy":
        ax.set_xscale("log")  # Définit l'axe X en échelle logarithmique
        ax.set_yscale("log")  # Garde l'axe Y en échelle logarithmique
    elif mode == "linxlogy":
        ax.set_xscale("linear")  # Définit l'axe X en  échelle linéaire
        ax.set_yscale("log")  # Garde l'axe Y en échelle logarithmique
    elif mode == "linxliny":
        ax.set_xscale("linear")  # Définit l'axe X en échelle linéaire
        ax.set_yscale("linear")  # Garde l'axe Y en échelle linéaire
        
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    ax.set_title(title)
    

    ax.axvline(1/YEAR, color='r', linestyle='-', label="Cycle : 365 days - 1 year")
    ax.axvline(1/MONTHS6, color='r', linestyle='--', label="Cycle : 182.6 days - 6 months")
    ax.axvline(1/MONTHS4, color='r', linestyle=':', label="Cycle : 121.7 days - 4 months")
    ax.axvline(1/QUARTER, color='r', linestyle='-.', label="Cycle : 91.3 days - 3 months")
    ax.axvline(1/MONTH, color='r', linestyle=':', label="Cycle : 30.4 days - 1 month")
    ax.axvline(1/WEEK, color='purple', linestyle='--', label="Cycle : 7 days - 1 week")
    ax.axvline(DAY, color='purple', linestyle='-', label="Cycle : 1 day ")
    ax.axvline(1./(0.5*DAY), color='purple', linestyle='-.', label="Cycle : 0.5 day ")

    #ax.axvline(f_nyquist, color='g', linestyle='--', label=f"Nyquist frequency({f_nyquist:.3f} cycles/days)")

    txtstr_sigma = "$\sigma_x$ = " + f" {sigma_x:0.3f}" 
    ax.text(0.01, 0.95, txtstr_sigma, transform=ax.transAxes, fontsize=16,verticalalignment='top', bbox=props)
    

    if legendout:
        ax.legend(bbox_to_anchor=(1.05, 1.05),fontsize=12)
    else:
        ax.legend(fontsize=10,fancybox=True, framealpha=0.5)
        


In [None]:
def func_seasonalvariation(x, a, b, c, d, e ,tb, tc, td, te):
    """
    """
    A = a
    B = b*np.sin(2*np.pi*(x-tb)/YEAR)
    C = c*np.sin(2*np.pi*(x-tc)/MONTHS6)
    D = d*np.sin(2*np.pi*(x-td)/MONTHS4)
    E = e*np.sin(2*np.pi*(x-te)/QUARTER)
      
    return A+B+C+D+E

### What does it mean if a series has a unit root?

A unit root is a feature of some stochastic processes (such as random walks) that can cause issues with statistical inference. In a time series context, having a unit root means that the value of the series at time t is equal to the value at time t-1 plus some random error.

Augmented Dickey-Fuller unit root test.
The Augmented Dickey-Fuller test can be used to test for a unit root in a univariate process in the presence of serial correlation.

The null hypothesis (H0) of the Augmented Dickey-Fuller is that there is a unit root(is non stationary), with the alternative H1 that there is no unit root (is stationary). If the pvalue is above a critical size, then we cannot reject that there is a unit root.

The p-values are obtained through regression surface approximation from MacKinnon 1994, but using the updated 2010 tables. If the p-value is close to significant, then the critical values should be used to judge whether to reject the null.

https://www.statsmodels.org/dev/generated/statsmodels.tsa.stattools.adfuller.html

Si la p-value du test est faible (ex. < 0.05), on rejette H0 et on conclut que la série est stationnaire.

Stationarity means that the statistical properties of a time series i.e. mean, variance and covariance do not change over time. Many statistical models require the series to be stationary to make effective and precise predictions.

Two statistical tests would be used to check the stationarity of a time series – Augmented Dickey Fuller (“ADF”) test and Kwiatkowski-Phillips-Schmidt-Shin (“KPSS”) test. A method to convert a non-stationary time series into stationary series shall also be used.

https://www.statsmodels.org/dev/examples/notebooks/generated/stationarity_detrending_adf_kpss.html

In [None]:
def adf_test(timeseries):
    """
    DF test is used to determine the presence of unit root in the series, and hence helps in understand if the series is stationary or not. 
    The null and alternate hypothesis of this test are:
    - Null Hypothesis: The series has a unit root.
    - Alternate Hypothesis: The series has no unit root.
    - If the null hypothesis in failed to be rejected, this test may provide evidence that the series is non-stationary.
    A function is created to carry out the ADF test on a time series :
    """
    print("Results of Dickey-Fuller Test:")
    dftest = adfuller(timeseries, autolag="AIC")
    dfoutput = pd.Series(
        dftest[0:4],
        index=[
            "Test Statistic",
            "p-value",
            "#Lags Used",
            "Number of Observations Used",
        ],
    )
    for key, value in dftest[4].items():
        dfoutput["Critical Value (%s)" % key] = value
    print(dfoutput)

In [None]:
def kpss_test(timeseries):
    """
    KPSS is another test for checking the stationarity of a time series. 
    The null and alternate hypothesis for the KPSS test are opposite that of the ADF test.
     - Null Hypothesis: The process is trend stationary.
    -  Alternate Hypothesis: The series has a unit root (series is not stationary).
    A function is created to carry out the KPSS test on a time series.
    """
    print("Results of KPSS Test:")
    kpsstest = kpss(timeseries, regression="c", nlags="auto")
    kpss_output = pd.Series(
        kpsstest[0:3], index=["Test Statistic", "p-value", "Lags Used"]
    )
    for key, value in kpsstest[3].items():
        kpss_output["Critical Value (%s)" % key] = value
    print(kpss_output)

In [None]:
def plot_histdata_andfit(data,ax, models, fitter,plotdata=True,datacolor="",function_name="gauss"):
    """
    """

    fcolor = {"gauss":"r","lorentz":"g"}
    
    bin_heights, bin_borders = np.histogram(data, bins='auto')
    bin_widths = np.diff(bin_borders)
    bin_centers = bin_borders[:-1] + bin_widths / 2

    # define the statistics to fit
    
    if function_name == "lorentz":
        t_init = models.Lorentz1D() 
    elif function_name == "gauss":
        t_init = models.Gaussian1D()
    else:
        function_name = "gauss"
        t_init = models.Gaussian1D()
        

    # define the fit method
    fit_t = fitter.LevMarLSQFitter()

    #does the fit on histogram data
    t = fit_t(t_init, bin_centers, bin_heights)

    x_interval_for_fit = np.linspace(bin_borders[0], bin_borders[-1], 1000)

    if plotdata:
        ax.bar(bin_centers, bin_heights, width=bin_widths, label='data',color=datacolor,alpha=0.5)
        
    ax.plot(x_interval_for_fit, t(x_interval_for_fit), label=function_name, c=fcolor[function_name],lw=3)
    
    ax.set_xlim(bin_borders.min(),bin_borders.max())
    ax.set_ylim(0., bin_heights.max()*1.2)
    ax.legend()


In [None]:
# Ajouter un axe secondaire pour afficher les dates standards
def mjd_to_date(mjd_values):
    return Time(mjd_values, format='mjd').to_datetime()

def date_to_mjd(date_values):
    return Time(date_values).mjd

## Configuration

In [None]:
observing_location = EarthLocation.of_site('Rubin Observatory')
tz = timezone('America/Santiago')

## Merra2

In [None]:
filename_m2 = "../SpectroMerra2/MerradataMerged/Merge_inst1_2d_asm_Nx_M2I1NXASM-2021-2024.csv"
if not os.path.isfile(filename_m2):
    #os.makedirs(pathdata) 
    raise Exception("Missing input Merra2 input file {filename_m2}")

In [None]:
df_m2 = pd.read_csv(filename_m2,index_col=0)

In [None]:
df_m2["mjd"] = Time(pd.to_datetime(df_m2.time.values)).mjd

In [None]:
df_m2.head()

## Join Auxtel and Merra2

### Build the Interpolation Function for Merra2 data

In [None]:
X_M2 = df_m2.mjd.values
Y_M2 = df_m2.TQV.values	
# function to interpolate PWV vs time 
func_M2interp = lambda x : np.interp(x,X_M2,Y_M2) 

### Spectro Hologram data

In [None]:
FLAG_WITHCOLLIMATOR = False  # Not justified to cut data before collimator
DATE_WITHCOLLIMATOR = 20230930
datetime_WITHCOLLIMATOR = convertNumToDatestr(DATE_WITHCOLLIMATOR)
datetime_WITHCOLLIMATOR = pd.to_datetime("2023-09-30 00:00:00.0+0000")
DATEMJD_WITHCOLLIMATOR = Time(datetime_WITHCOLLIMATOR).mjd

In [None]:
version_results = "v5"
legendtag = {"v1" : "old v3.1.0",
            "v2" : "v3.1.0-PWV<10mm",
            "v3" : "v3.1.0-PWV<15mm",
            "v4" : "Auxtel holo v3.1.0",
            "v5" : "Auxtel holo v3.1.0 09/22 - 10/24"}

In [None]:
atmfilenamesdict = {"v1" : "data/spectro/auxtel_atmosphere_202301_v3.1.0_doSensorFlat_rebin2_testWithMaskedEdges_newBoundaries_newPolysRescaled_newFitBounds_adjustA1_lockedOrder2_removeThroughputTails_2.npy",
                    "v2" : "auxtel_atmosphere_202301_v3.1.0_doSensorFlat_rebin2_lockedOrder2_FixA1_FixA2_FitAngstrom_FixA1_FixA2_FitAngstrom_WithGaia_freePressure_newThroughput6_BG40Scaled1.09_PeekFinder.npy",
                    "v3" : "u_dagoret_auxtel_atmosphere_202301_v3.1.0_doSensorFlat_rebin2_lockedOrder2_FixA1_FixA2_FitAngstrom_WithGaia_freePressure_newThroughput6_BG40Scaled1.09_AtmoFitPressureA2_SpecErr_PeekFinder_20240924T161119Z.npy",
                    "v4" : "u_dagoret_auxtel_atmosphere_202301_v3.1.0_doSensorFlat_rebin2_lockedOrder2_FixA1_FixA2_FitAngstrom_WithGaia_freePressure_newThroughput6_BG40Scaled1.09_AtmoFitPressureA2_SpecErr_PeekFinder_20240924T161119Z_spectrfullextend.npy",
                    "v5" : "u_dagoret_auxtel_atmosphere_202209_v3.1.0_doSensorFlat_rebin2_lockedOrder2_FixA1_FixA2_FitAngstrom_WithGaia_freePressure_newThroughput6_BG40Scaled1.09_AtmoFitPressureA2_SpecErr_No5SigmaClip_20241016T184601Z_spectrfullextended.npy"}

In [None]:
atmfilename = atmfilenamesdict[version_results]
tag = legendtag[version_results] 

## Initialisation

### Read the Auxtel data

In [None]:
specdata = np.load(atmfilename,allow_pickle=True)

In [None]:
df_spec = pd.DataFrame(specdata)

#### Add the corresponding interpolated Merra2 data at the Auxtel Measurement time

In [None]:
df_spec['pwv_m2'] = df_spec["ex_mjd"].apply(func_M2interp)
df_spec["dpwv"] =   df_spec["PWV [mm]_x"] - df_spec['pwv_m2']

### Remove spectra with red filter

In [None]:
df_spec['FILTER'].unique()

In [None]:
FLAG_REMOVE_FILTERS = True
if FLAG_REMOVE_FILTERS:
    df_spec=df_spec[df_spec["FILTER"] == 'empty']
    df_spec.reset_index(inplace=True)  

### Define if a target is faint or bright

In [None]:
def IsFaint(row):
    List_Of_Faint_targets = ['Feige110','HD074000','HD115169','HD031128','HD200654','HD167060','HD009051','HD142331','HD160617','HD111980']
    List_Of_faint_selected = List_Of_Faint_targets[:10]
    if row["TARGET"] in List_Of_faint_selected:
        return True
    else:
        return False

In [None]:
df_spec["isFaint"] = df_spec.apply(IsFaint,axis=1)

### Compute NightObs

In [None]:
df_spec["nightObs"] = df_spec.apply(lambda x: x['id']//100_000, axis=1)

In [None]:
if FLAG_WITHCOLLIMATOR:
    df_spec = df_spec[df_spec["nightObs"]> DATE_WITHCOLLIMATOR]

## Apply Quality selection

In [None]:
fig,ax = plt.subplots(1,1)
df_spec["CHI2_FIT"].hist(bins=50,ax=ax,range=(0,200))
ax.set_yscale("log")

### Add the Time in pd.datetime

#### UTC

In [None]:
df_spec["Time"] = pd.to_datetime(df_spec["DATE-OBS"])

In [None]:
DT = pd.Timedelta(minutes=7*24*60)
TMIN  = df_spec["Time"].min()-DT
TMAX  = df_spec["Time"].max()+DT

TMINMJD = Time(TMIN).mjd
TMAXMJD = Time(TMAX).mjd

### Compute relative time to Mid-night

In [None]:
def GetTimeToMidNight(row):
    observing_time = Time(row['DATE-OBS'], scale='utc', location=observing_location)

    # time at the location , either before or after midnight
    local_time =  observing_time.to_datetime(timezone=tz)

    # take time independent  of any location now
    local_time_new = datetime(local_time.year,local_time.month,local_time.day,local_time.hour,local_time.minute,local_time.second)
    local_time_midnight = datetime(local_time_new.year,local_time_new.month,local_time_new.day)
    dt_hour = (local_time_new -local_time_midnight).seconds/3600.

    # we took the previous night mid-night , must subtract 24H
    if dt_hour > 12.:
        dt_hour_new = (dt_hour - 24.)
    else:
        dt_hour_new = dt_hour
        
    return dt_hour_new

In [None]:
df_spec["dt_midnight"] = df_spec.apply(GetTimeToMidNight,axis=1)

In [None]:
fig,ax = plt.subplots(1,1,figsize=(6,4))
df_spec["dt_midnight"].hist(bins=48,range=(-12,12),ax=ax,facecolor="blue") 
ax.set_xlabel("time relative to midnight (hour)")
ax.set_title("Observation time")

### Compute Date relative to January

In [None]:
def GetDateToMidJanuary(row):
    observing_time = Time(row['DATE-OBS'], scale='utc', location=observing_location)

    # time at the location , either before or after midnight
    local_time =  observing_time.to_datetime(timezone=tz)

    # take time independent  of any location now
    local_time_new = datetime(2024,local_time.month,local_time.day,local_time.hour,local_time.minute,local_time.second)
           
    return pd.to_datetime(local_time_new)

In [None]:
#df_spec["Time_january"] = df_spec.apply(GetDateToMidJanuary,axis=1)

In [None]:
def GetDateToMidJanuaryAndYear(row):
    observing_time = Time(row['DATE-OBS'], scale='utc', location=observing_location)

    # time at the location , either before or after midnight
    local_time =  observing_time.to_datetime(timezone=tz)

    # take time independent  of any location now
    local_time_new = datetime(2024,local_time.month,local_time.day,local_time.hour,local_time.minute,local_time.second)
           
    return pd.to_datetime(local_time_new),local_time.year 

In [None]:
df_spec[["Time_january","Year"]] = df_spec.apply(GetDateToMidJanuaryAndYear,axis=1,result_type="expand")

In [None]:
df_spec[["Time_january","Year"]]

## Compute night boundaries

In [None]:
def GetNightBoundariesDict(df_spec):
    """
    input:
      df_spec the dataframe for spectroscopy summary results
    output:
      the dict of night boudaries
    """
    
    Dt = pd.Timedelta(minutes=30)
    d = {}
    list_of_nightobs = df_spec["nightObs"].unique()
    for nightobs in list_of_nightobs:
        sel_flag = df_spec["nightObs"]== nightobs
        df_night = df_spec[sel_flag]
        tmin = df_night["Time"].min()-Dt
        tmax = df_night["Time"].max()+Dt
        d[nightobs] = (tmin,tmax)
    return d

In [None]:
dn = GetNightBoundariesDict(df_spec)

## Plot all data

## Apply Quality selection cuts

- Note now we have a cut on airmass < 2

In [None]:
def getSelectionCut(df_spec, chi2max=20., pwvmin=0.1, pwvmax = 14.9,ozmin=100.,ozmax=600.,vaodmax=0.1):
    #cut =  (df_spec["CHI2_FIT"]<chi2max) & (df_spec["PWV [mm]_x"] > pwvmin) & (df_spec["PWV [mm]_x"] < pwvmax) & (df_spec["D2CCD"]>186.5) &  (df_spec["D2CCD"]<187.3) & \
    cut =  (df_spec["CHI2_FIT"]<chi2max) & (df_spec["PWV [mm]_x"] > pwvmin) & (df_spec["PWV [mm]_x"] < pwvmax) & (df_spec["D2CCD"]>186.75) &  (df_spec["D2CCD"]<187.75) &  (df_spec["VAOD_x"]<0.1) & \
    (df_spec['EXPTIME'] > 20.) & (df_spec["PWV [mm]_y"] > pwvmin) & (df_spec["PWV [mm]_y"] < pwvmax) & \
    (df_spec["ozone [db]_y"] > ozmin) & (df_spec["ozone [db]_y"] < ozmax) & \
    (df_spec["AIRMASS"] < 2)
    return cut

In [None]:
def getSelectionCutNoPolar(df_spec, chi2max=20., pwvmin=0.1, pwvmax = 14.9,ozmin=100.,ozmax=600.,vaodmax=0.1):
    #cut =  (df_spec["CHI2_FIT"]<chi2max) & (df_spec["PWV [mm]_x"] > pwvmin) & (df_spec["PWV [mm]_x"] < pwvmax) & (df_spec["D2CCD"]>186.5) &  (df_spec["D2CCD"]<187.3) & \
    cut =  (df_spec["CHI2_FIT"]<chi2max) & (df_spec["PWV [mm]_x"] > pwvmin) & (df_spec["PWV [mm]_x"] < pwvmax) & (df_spec["D2CCD"]>186.75) &  (df_spec["D2CCD"]<187.75) &  (df_spec["VAOD_x"]<0.1) & \
    (df_spec['EXPTIME'] > 20.) & (df_spec["PWV [mm]_y"] > pwvmin) & (df_spec["PWV [mm]_y"] < pwvmax) & \
    (df_spec["ozone [db]_y"] > ozmin) & (df_spec["ozone [db]_y"] < ozmax) & (df_spec["TARGET"] != "HD185975") & \
    (df_spec["AIRMASS"] < 2)
    return cut

In [None]:
def getSelectionCutWithPolar(df_spec, chi2max=20., pwvmin=0.1, pwvmax = 14.9,ozmin=100.,ozmax=600.,vaodmax=0.1):
    #cut =  (df_spec["CHI2_FIT"]<chi2max) & (df_spec["PWV [mm]_x"] > pwvmin) & (df_spec["PWV [mm]_x"] < pwvmax) & (df_spec["D2CCD"]>186.5) &  (df_spec["D2CCD"]<187.3) & \
    cut =  (df_spec["CHI2_FIT"]<chi2max) & (df_spec["PWV [mm]_x"] > pwvmin) & (df_spec["PWV [mm]_x"] < pwvmax) & (df_spec["D2CCD"]>186.75) &  (df_spec["D2CCD"]<187.75) &  (df_spec["VAOD_x"]<0.1) & \
    (df_spec['EXPTIME'] > 20.) & (df_spec["PWV [mm]_y"] > pwvmin) & (df_spec["PWV [mm]_y"] < pwvmax) & \
    (df_spec["ozone [db]_y"] > ozmin) & (df_spec["ozone [db]_y"] < ozmax) & (df_spec["TARGET"] == "HD185975") & \
    (df_spec["AIRMASS"] < 2)
    return cut

In [None]:
cut = getSelectionCut(df_spec) 
cut_nopolar = getSelectionCutNoPolar(df_spec) 
cut_nopolar_bright = getSelectionCutNoPolar(df_spec) & (~df_spec["isFaint"])
cut_nopolar_faint = getSelectionCutNoPolar(df_spec) & (df_spec["isFaint"])
cut_wthpolar = getSelectionCutWithPolar(df_spec)

In [None]:
# all selected data
df_spec_sel = df_spec[cut]
# subsample of selected data to understand differences
df_spec_np = df_spec[cut_nopolar] 
df_spec_np_b = df_spec[cut_nopolar_bright]
df_spec_np_f = df_spec[cut_nopolar_faint]
df_spec_wp = df_spec[cut_wthpolar]

In [None]:
print("Total number of Spectra          : ",len(df_spec))
print("Number of selected Spectra       : ",len(df_spec_sel))
print("Number of selected Polars        : ",len(df_spec_wp))
print("Number of selected Non-Polars    : ",len(df_spec_np))
print("Number of selected Non-Polars Bright : ",len(df_spec_np_b))
print("Number of selected Non-Polars Faint  : ",len(df_spec_np_f))

In [None]:
df_spec_sel.reset_index(drop=True,inplace=True)
df_spec_np.reset_index(drop=True,inplace=True)
df_spec_wp.reset_index(drop=True,inplace=True) 
df_spec_np_b.reset_index(drop=True,inplace=True)
df_spec_np_f.reset_index(drop=True,inplace=True)

In [None]:
#List_Of_Faint_targets = ['Feige110','HD074000','HD115169','HD031128','HD200654','HD167060','HD009051','HD142331','HD160617','HD111980']
print("Polar            :",len(df_spec_wp["TARGET"].unique()),"\t", df_spec_wp["TARGET"].unique()) 
print("Non Polar        :",len(df_spec_np["TARGET"].unique()),"\t" ,df_spec_np["TARGET"].unique())
print("Non Polar Bright :",len(df_spec_np_b["TARGET"].unique()),"\t" ,df_spec_np_b["TARGET"].unique())
print("Non Polar Faint  :",len(df_spec_np_f["TARGET"].unique()),"\t",df_spec_np_f["TARGET"].unique())

## Recompute night boundaries

In [None]:
#dn = GetNightBoundariesDict(df_spec_sel)

## Plot all data

In [None]:
x_mjd = np.arange(TMINMJD,TMAXMJD,1/24/60)
y_mjd = func_M2interp(x_mjd)

In [None]:
fig,axs = plt.subplots(1,1,figsize=(14,7),layout='constrained')
ax  = axs
leg=ax.get_legend()


df_m2.plot(x="mjd",y="TQV",ax=ax,marker=".",c="b",lw=0,ms=5,label="Merra2",legend=leg)  

ax.plot(x_mjd,y_mjd,'-k',lw=0.5)

ax.set_ylabel("PWV [mm]_x")
ax.set_xlabel("date")
#ax.xaxis.set_major_formatter(date_form)
ax.set_title("Precipitable water vapor measured by Mera2 \n (check interpolation function)",fontweight="bold")
ax.legend()

#for key, tt in dn.items():
#    ax.axvspan(tt[0],tt[1], color='blue', alpha=0.1)

plt.tight_layout()
 
ax.set_ylim(0.,25.)
ax.set_xlim(TMINMJD,TMAXMJD)

# Ajouter un axe secondaire pour afficher les dates standards
#def mjd_to_date(mjd_values):
#    return Time(mjd_values, format='mjd').to_datetime()

#def date_to_mjd(date_values):
#    return Time(date_values).mjd


#ax2 = ax.secondary_xaxis('top', functions=(mjd_to_date, date_to_mjd))
#ax2.xaxis.set_major_formatter(mdates.DateFormatter('%d-%m-%Y'))  # Format jour/mois/année
#ax2.set_xlabel("Date")



plt.show()

## Fit Merra2 with sinusoids

In [None]:
xdata_m = df_m2["mjd"].values 
ydata_m = df_m2["TQV"].values 
xdata_min = xdata_m.min()
xdata_max = xdata_m.max()
xfit_m = np.arange(xdata_min,xdata_max,1) 

In [None]:
#func_seasonalvariation(x, a, b, c, d, e, tb, tc, td, te)
#popt, pcov = curve_fit(func_seasonalvariation, xdata, ydata, bounds=(0, [3., 1., 0.5]))
popt_m, pcov_m = curve_fit(func_seasonalvariation, xdata_m, ydata_m)

In [None]:
popt_m

In [None]:
yfit_m = func_seasonalvariation(xfit_m,*popt_m)

### Save the residuals in pandas dataframe

In [None]:
#### Save residuals in Merra2 pandas dataframe
df_m2["TQVres"] = ydata_m - func_seasonalvariation(xdata_m,*popt_m)
ydata_m_res = df_m2["TQVres"].values 

In [None]:
#### Save residuals in Auxtel pandas dataframe
df_spec_sel["pwv_m2_res"] =  df_spec_sel["pwv_m2"] - func_seasonalvariation(df_spec_sel["ex_mjd"].values,*popt_m)

In [None]:
fig = plt.figure(figsize=(16,6),layout="constrained")

# 🔹 Utilisation de gridspec pour gérer l'espace
gs = fig.add_gridspec(2, 1, height_ratios=[0.7, 0.3])  # 95% pour les subplots, 5% pour la colorbar

# 🔹 Subplots principaux (en haut, dans la première ligne)
ax1 = fig.add_subplot(gs[0])  # Premier subplot
ax2 = fig.add_subplot(gs[1],sharex=ax1)  # Deuxième subplot

ax1.plot(xdata_m,ydata_m,'b-')
ax1.plot(xfit_m,yfit_m,'r-',lw=3)
ax2.plot(xdata_m,ydata_m_res,'b-')
ax2.axhline(0,color="r",lw=3)

ax1.set_ylabel("PWV (mm)")
ax2.set_ylabel("PWV residuals (mm)")
ax2.grid()
ax1.set_title("Precipitable water vapor at Merra2")
ax2.set_xlabel("date (mjd)")
plt.show()

### Fit Auxtel data with same sinusoid

In [None]:
xdata_a = df_spec_sel["ex_mjd"].values 
ydata_a = df_spec_sel["PWV [mm]_x"].values 
xdata_min = xdata_a.min()
xdata_max = xdata_a.max()
xfit_a = np.arange(xdata_min,xdata_max,1) 

In [None]:
lower_bounds = [-20.,-20.,-20.,-20.,-20.,
                min(popt_m[5]*0.999, popt_m[5]*1.0001),
                min(popt_m[6]*0.999, popt_m[6]*1.0001),
                min(popt_m[7]*0.999, popt_m[7]*1.0001),
                min(popt_m[8]*0.999, popt_m[8]*1.0001)]
upper_bounds = [20.,20.,20.,20.,20.,
                max(popt_m[5]*0.999, popt_m[5]*1.0001),
                max(popt_m[6]*0.999, popt_m[6]*1.0001),
                max(popt_m[7]*0.999, popt_m[7]*1.0001),
                max(popt_m[8]*0.999, popt_m[8]*1.0001)]

In [None]:
#func_seasonalvariation(x, a, b, c, d, e, tb, tc, td, te)
popt, pcov = curve_fit(func_seasonalvariation, xdata_a, ydata_a, p0 = popt_m, bounds=(lower_bounds , upper_bounds ))

In [None]:
popt

In [None]:
yfit_a = func_seasonalvariation(xfit_a,*popt)

#### Keep the residuals of Auxtel wrt the seasonal model

In [None]:
### Save the residuals in auxtel pandas dataframe wrt seasonal variation
df_spec_sel["PWV [mm]_x_res"] = ydata_a - func_seasonalvariation(xdata_a,*popt)
ydata_a_res = df_spec_sel["PWV [mm]_x_res"]

In [None]:
fig = plt.figure(figsize=(14,8),layout="constrained")

# 🔹 Utilisation de gridspec pour gérer l'espace
gs = fig.add_gridspec(2, 1, height_ratios=[0.7, 0.3])  # 95% pour les subplots, 5% pour la colorbar

# 🔹 Subplots principaux (en haut, dans la première ligne)
ax1 = fig.add_subplot(gs[0])  # Premier subplot
ax2 = fig.add_subplot(gs[1],sharex=ax1)  # Deuxième subplot


ax1.plot(xdata_a,ydata_a,'o',c="r",ms=2.,label="Auxtel data")
ax1.plot(xfit_m,yfit_m,'b-',lw=3,label="fit seasonal model Merra2")
ax1.plot(xdata_m,ydata_m,'.',c="b",ms=0.5,label="Merra2 data")
ax1.plot(xfit_a,yfit_a,'r-',lw=3,label="fit seasonal model Auxtel")
ax1.set_xlim(xdata_min,xdata_max)
ax1.legend()
ax1.set_title("Comparison of PWV in Auxtel and Merra2 and resoduals wrt fitted model")
ax1.set_ylabel("PWV (mm)")
ax1.set_xlabel("date (mjd)")
ax1.grid()


ax2.plot(xdata_m,ydata_m_res,'.',color='b',ms=0.5)
ax2.axhline(0,color="b",lw=3)

ax2.plot(xdata_a,ydata_a_res,'o',c="r",ms=2.,label="Auxtel")
ax2.axhline(0,color="r",lw=3)

ax2.set_ylabel("PWV residuals (mm)")
ax2.grid()
ax2.set_xlabel("date (mjd)")

plt.show()

### Compare the residuals Auxtel - Merra2 after subtraction of seasonal variation

In [None]:
df_spec_sel["dpwv2"] = df_spec_sel["PWV [mm]_x_res"] - df_spec_sel["pwv_m2_res"]

In [None]:
fig,(ax1,ax2)  = plt.subplots(1,2,figsize=(14,5),layout="constrained")
#df_spec_sel.plot.scatter(x="PWV [mm]_x_res", y = "pwv_m2_res", c="ex_mjd",marker="+",ax=ax1)
sc1 = ax1.scatter(data=df_spec_sel, x='pwv_m2_res', y='PWV [mm]_x_res', c='ex_mjd', marker="+",s=30,cmap='rainbow')
ax1.grid()
ax1.set_xlim(-5,8)
ax1.set_ylim(-5,8)
ax1.set_aspect("equal")
ax1.plot([-5,8],[-5,8],'-k')
ax1.set_ylabel("PWV residuals Auxtel(mm)")
ax1.set_xlabel("PWV residuals Merra2 (mm)")

cbar = plt.colorbar(sc1)
cbar.set_label('mjd', rotation=90)
df_spec_sel["dpwv2"].hist(bins=50,ax=ax2,histtype="step",linewidth=3,color="b")
ax2.set_xlabel("$\Delta PWV$-residuals (Auxtel-Merra2) (mm)")
ax2.set_xlim(-5,5)
plt.suptitle("PWV residuals after seasonal variation correction",y=1.01,fontsize=10)
plt.show()

## Now we need to compute the residuals wrt to the fitted model

In [None]:
df_spec_sel

### Comparison of Auxtel Selected data and Merra2

In [None]:
fig,axs = plt.subplots(1,1,figsize=(14,7),layout='constrained')
ax  = axs
leg=ax.get_legend()

#df_m.plot(x="mjd",y="TQV",ax=ax,marker=".",c="b",lw=1,ms=1,ls="-",label="Merra2",legend=leg)  
df_m2.plot(x="mjd",y="TQV",ax=ax,marker=".",c="b",lw=0,ms=3,label="Merra2",legend=leg)  
df_spec_sel.plot(x="ex_mjd",y="PWV [mm]_x",ax=ax,marker='o',ms=3,c="r",lw=0.0,grid=True,label=tag,legend=leg)


ax.set_ylabel("PWV [mm]_x")
ax.set_xlabel("date")
#ax.xaxis.set_major_formatter(date_form)
ax.set_title("Precipitable water vapor measured by Auxtel (holo) \n and comparison with Merra2 data",fontweight="bold")
ax.legend()

#for key, tt in dn.items():
#    ax.axvspan(tt[0],tt[1], color='blue', alpha=0.1)

ax.axvspan(TMINMJD,DATEMJD_WITHCOLLIMATOR , color='yellow', alpha=0.1)

plt.tight_layout()
 
ax.set_ylim(0.,20.)
ax.set_xlim(TMINMJD,TMAXMJD)

figname =f"{pathfigs}/pwvholoM2_allpoints_allnights_mjd"+figtype
plt.savefig(figname)
plt.show()

In [None]:
fig = plt.figure(figsize=(16,8),layout='constrained')

gs = gridspec.GridSpec(2,1,height_ratios=[2, 1])
ax1 = fig.add_subplot(gs[0])
ax2 = fig.add_subplot(gs[1], sharex=ax1)

leg1=ax1.get_legend()
leg2=ax2.get_legend()

df_m2.plot(x="mjd",y="TQV",ax=ax1,marker=".",c="b",lw=0,ms=5,label="Merra2",legend=leg1)  
#df_spec_sel.plot(x="ex_mjd",y="PWV [mm]_x",ax=ax1,marker='o',ms=5,c="r",lw=0.0,grid=True,label="data",legend=leg1)

df_spec_wp.plot(x="ex_mjd",y="PWV [mm]_x",ax=ax1,marker='o',ms=5,c="r",lw=0.0,grid=True,label="with polar",legend=leg1)
df_spec_np_b.plot(x="ex_mjd",y="PWV [mm]_x",ax=ax1,marker='o',ms=5,c="purple",lw=0.0,grid=True,label="non polar bright",legend=leg1)
df_spec_np_f.plot(x="ex_mjd",y="PWV [mm]_x",ax=ax1,marker='o',ms=5,c="green",lw=0.0,grid=True,label="non polar faint",legend=leg1)


ax1.set_ylabel("PWV [mm]_x")

#ax.xaxis.set_major_formatter(date_form)
ax1.set_title("Precipitable water vapor measured by Auxtel (holo) \n comparison with Merra2 and relative difference",fontweight="bold")
ax1.legend()

ax1.set_ylim(0.,15.)
ax1.set_xlim(TMINMJD,TMAXMJD)
ax1.axvspan(TMINMJD,DATEMJD_WITHCOLLIMATOR , color='yellow', alpha=0.1)

#df_spec_sel.plot(x="ex_mjd",y="dpwv",ax=ax2,marker='o',ms=5,c="r",lw=0.0,grid=True,label="data- merra2",legend=leg2)
df_spec_wp.plot(x="ex_mjd",y="dpwv",ax=ax2,marker='o',ms=4,c="r",lw=0.0,grid=True,label="with polar",legend=leg2)
df_spec_np_b.plot(x="ex_mjd",y="dpwv",ax=ax2,marker='o',ms=4,c="purple",lw=0.0,grid=True,label="non polar bright",legend=leg2)
df_spec_np_f.plot(x="ex_mjd",y="dpwv",ax=ax2,marker='o',ms=4,c="green",lw=0.0,grid=True,label="non polar faint",legend=leg2)

ax2.set_ylabel("$\Delta PWV$ (mm)")
ax2.set_xlabel("date")
ax2.axvspan(TMINMJD,DATEMJD_WITHCOLLIMATOR , color='yellow', alpha=0.1)
ax2.legend()


plt.tight_layout()
#figname =f"{pathfigs}/pwvholoM2_allpoints_allnights_mjd"+figtype
#plt.savefig(figname)
plt.show()

### $\Delta PWV$ vs PWV

$$
\Delta PWV = PW{auxtel} - PWV_{merra2}
$$

In [None]:
fig,axs = plt.subplots(1,2,figsize=(14,6),layout='constrained')
ax1,ax2  = axs
leg=ax.get_legend()

#sc = ax.scatter(data=df_spec_sel, x='PWV [mm]_x', y='dpwv', c='ex_mjd', marker="+",s=30,cmap='rainbow')
sc1 = ax1.scatter(data=df_spec_sel, x='pwv_m2', y='dpwv', c='ex_mjd', marker="+",s=30,cmap='rainbow')

ax1.set_title("$\Delta PWV$ vs PWV")
ax1.set_xlabel("PWV (mm) (Merra)")
ax1.set_ylabel("$\Delta$ PWV (Merra) (mm)")


#
sc1 = ax2.scatter(data=df_spec_sel, x='pwv_m2', y='dpwv', c='ex_mjd', marker="+",s=30,cmap='rainbow')

ax2.set_title("$\Delta PWV$ vs PWV")
ax2.set_xlabel("PWV (mm) (AUXTEL)")
ax2.set_ylabel("$\Delta$ PWV (mm)")
sc2 = ax2.scatter(data=df_spec_sel, x='PWV [mm]_x', y='dpwv', c='ex_mjd', marker="+",s=30,cmap='rainbow')
cbar = plt.colorbar(sc1)
plt.show()

In [None]:
fig,axs = plt.subplots(1,2,figsize=(14,6),layout='constrained')
ax1,ax2  = axs
leg=ax.get_legend()

#sc = ax.scatter(data=df_spec_sel, x='PWV [mm]_x', y='dpwv', c='ex_mjd', marker="+",s=30,cmap='rainbow')
sc1 = ax1.scatter(data=df_spec_sel, x='pwv_m2', y='dpwv2', c='ex_mjd', marker="+",s=30,cmap='rainbow')

ax1.set_title("$\Delta PWV_2$ vs PWV")
ax1.set_xlabel("PWV (mm) (Merra)")
ax1.set_ylabel("$\Delta$ PWV2 (Merra) (mm)")


#
sc1 = ax2.scatter(data=df_spec_sel, x='pwv_m2', y='dpwv2', c='ex_mjd', marker="+",s=30,cmap='rainbow')

ax2.set_title("$\Delta PWV_2$ vs PWV")
ax2.set_xlabel("PWV (mm) (AUXTEL)")
ax2.set_ylabel("$\Delta$ PWV2 (mm)")
sc2 = ax2.scatter(data=df_spec_sel, x='PWV [mm]_x', y='dpwv2', c='ex_mjd', marker="+",s=30,cmap='rainbow')
cbar = plt.colorbar(sc1)
plt.show()

## Fit a straight line to find a link between PWV Merra2 - PWV Auxtel

In [None]:
# 🌟 Modèle linéaire : y = ax + b
def linearmodel(x, a, b):
    return a * x + b

In [None]:
#def error_propagation(x, popt, pcov):
#    """Calcule l'incertitude sur y(x) en tenant compte des erreurs sur a et b."""
#    g = np.array([x, 1])  # Gradient de f(x) = ax + b
#    sigma_y2 = g.T @ pcov @ g  # Variance de y
#    return np.sqrt(sigma_y2)  # Écart-type

def error_propagation(x, popt, pcov):
    """Calcule l'incertitude sur y(x) pour un x scalaire ou un tableau numpy."""
    x = np.atleast_1d(x)  # S'assure que x est un tableau
    g = np.vstack([x, np.ones_like(x)])  # Matrice (2, N) avec les gradients [x, 1] en colonne
    sigma_y2 = np.einsum('ij,jk,ik->i', g.T, pcov, g.T)  # Produit matriciel pour chaque x
    return np.sqrt(sigma_y2)  # Retourne l'écart-type de y(x)

def ndof(x,popt):
    dof = len(x) - len(popt) 
    return dof
    

def error_propagation_with_IC(x, popt, pcov, dof, confidence=0.95):
    """Calcule l'intervalle de confiance à `confidence` % sur y(x)."""
    x = np.atleast_1d(x)  
    g = np.vstack([x, np.ones_like(x)])  # Matrice (2, N) des gradients [x, 1]
    sigma_y2 = np.einsum('ij,jk,ik->i', g.T, pcov, g.T)  # Variance de y(x)
    sigma_y = np.sqrt(sigma_y2)  # Écart-type
    
    # Facteur de Student pour l'intervalle de confiance
    alpha = 1 - confidence
    t_factor = t.ppf(1 - alpha / 2, dof)  # Quantile de Student

    return sigma_y * t_factor  # Intervalle de confiance autour de y(x)

In [None]:
def param_label(popt):
    txt_param1 = f"slope = {popt[0]:.3f}"
    txt_param2 = f"intercept = {popt[1]:.3f}"
    txtstr = "\n".join([txt_param1,txt_param2])
    return txtstr

In [None]:
XMIN = 0.
XMAX = 15.
xfit = np.linspace(XMIN,XMAX,500)

# define a model for a line
line_orig = models.Linear1D(slope=1.0, intercept=0.5)

# initialize a linear fitter (astropy)
fit = fitting.LinearLSQFitter()
# initialize a linear model
line_init = models.Linear1D()
line_init1 = models.Linear1D()
line_init2 = models.Linear1D()
line_init3 = models.Linear1D()

#### Better fit with with curve_fit

In [None]:
# Use selected data

x = df_spec_sel["pwv_m2"].values
y = df_spec_sel["PWV [mm]_x"].values
fitted_line = fit(line_init, x, y)

# Fit all selected data
# use curve_fit by constaining the slope
#popt, pcov = curve_fit(linearmodel, x, y, sigma=np.ones_like(y) * 1.0, absolute_sigma = True)
popt, pcov = curve_fit(linearmodel, x, y, sigma=np.ones_like(y) * 1.0, p0 = [1, 0], bounds=([0.99, -np.inf], [1.01, np.inf]),absolute_sigma = True)
perr = np.sqrt(np.diag(pcov))
dof = ndof(x,popt)
txtstr = param_label(popt)

# Fit all selected polar data
x1 = df_spec_wp["pwv_m2"].values
y1 = df_spec_wp["PWV [mm]_x"].values
fitted_line1 = fit(line_init1, x1, y1)
#popt1, pcov1 = curve_fit(linearmodel, x1, y1)
popt1, pcov1 = curve_fit(linearmodel, x1, y1, sigma=np.ones_like(y1) * 1.0, p0 = [1, 0], bounds=([0.99, -np.inf], [1.01, np.inf]),absolute_sigma = True)
perr1 = np.sqrt(np.diag(pcov1))
dof1 = ndof(x1,popt1)
txtstr1 = param_label(popt1)

# Fit all selected non polar bright data
x2 = df_spec_np_b["pwv_m2"].values
y2 = df_spec_np_b["PWV [mm]_x"].values
fitted_line2 = fit(line_init2, x2, y2)
#popt2, pcov2 = curve_fit(linearmodel, x2, y2)
popt2, pcov2 = curve_fit(linearmodel, x2, y2,sigma=np.ones_like(y2) * 1.0, p0 = [1, 0], bounds=([0.99, -np.inf], [1.01, np.inf]),absolute_sigma = True)
perr2 = np.sqrt(np.diag(pcov2))
dof2 = ndof(x2,popt2)
txtstr2 = param_label(popt2)

# Fit all selected non polar faint data
x3 = df_spec_np_f["pwv_m2"].values
y3 = df_spec_np_f["PWV [mm]_x"].values
fitted_line3 = fit(line_init3, x3, y3)
#popt3, pcov3 = curve_fit(linearmodel, x3, y3)
popt3, pcov3 = curve_fit(linearmodel, x3, y3,sigma=np.ones_like(y3) * 1.0, p0 = [1, 0], bounds=([0.99, -np.inf], [1.01, np.inf]),absolute_sigma = True)
perr3 = np.sqrt(np.diag(pcov3))
dof3 = ndof(x3,popt3)
txtstr3 = param_label(popt3)

### Plot correlation Auxtel Merra2

In [None]:
palette = sns.color_palette("hls", 3)

fig,axs = plt.subplots(1,1,figsize=(6,6),layout='constrained')
ax  = axs
leg=ax.get_legend()

#df_spec_sel.plot(x="pwv_m2",y="PWV [mm]_x",ax=ax,marker='o',ms=5,c="grey",lw=0.0,grid=True,label="",legend=leg)
df_spec_wp.plot(x="pwv_m2",y="PWV [mm]_x",ax=ax,marker='+',ms=8,c=palette[0],lw=0.0,grid=True,label="with polar",legend=leg)
df_spec_np_b.plot(x="pwv_m2",y="PWV [mm]_x",ax=ax,marker='+',ms=8,c=palette[1],lw=0.0,grid=True,label="non polar bright",legend=leg)
df_spec_np_f.plot(x="pwv_m2",y="PWV [mm]_x",ax=ax,marker='+',ms=8,c=palette[2],lw=0.0,grid=True,label="non polar faint",legend=leg)

ax.set_aspect("equal")
ax.set_xlim(0.,15.)
ax.set_ylim(0.,15.)

yfit = linearmodel(xfit,*popt)
#yfit_err = error_propagation(xfit, popt, pcov)
yfit_err =  error_propagation_with_IC(xfit, popt, pcov, dof, confidence=0.95)

ax.plot(xfit,yfit,"-k",lw=3)
ax.fill_between(xfit, yfit - yfit_err, yfit + yfit_err, color='grey', alpha=0.2, label="confidence belt 95% CL")

ax.set_ylabel("PWV - Auxtel (mm)")
ax.set_xlabel("PWV - Merra2 (mm)")

ax.text(0.1, 0.95, txtstr, transform=ax.transAxes, fontsize=16,verticalalignment='top', bbox=props)


ax.legend(loc="upper right")
ax.set_title("PWV-Auxtel vs PWV-Merra2")
plt.show()

### Plot correlation Auxtel Merra2, separating polar, bright non polar and faint no polar

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# 🔹 Création de la figure avec 3 subplots
fig = plt.figure(figsize=(18,6))

# 🔹 Utilisation de gridspec pour gérer l'espace
gs = fig.add_gridspec(2, 3, height_ratios=[0.95, 0.05])  # 95% pour les subplots, 5% pour la colorbar

# 🔹 Subplots principaux (en haut, dans la première ligne)
ax1 = fig.add_subplot(gs[0, 0])  # Premier subplot
ax2 = fig.add_subplot(gs[0, 1],sharex=ax1, sharey=ax1)  # Deuxième subplot
ax3 = fig.add_subplot(gs[0, 2],sharex=ax1, sharey=ax1)  # Troisième subplot


# 🔹 Tracé des graphiques (exemple de scatter plots)
leg1=ax1.get_legend()
leg2=ax2.get_legend()
leg3=ax3.get_legend()


#df_spec_wp.plot(x="pwv_m2",y="PWV [mm]_x",ax=ax1,marker='o',ms=3,c="r",lw=0.0,grid=True,label="with polar",legend=leg1)
sc1 = ax1.scatter(data=df_spec_wp, x='pwv_m2', y='PWV [mm]_x', c='ex_mjd', marker="+",s=30,cmap='rainbow',label="polar")

#df_spec_np_b.plot(x="pwv_m2",y="PWV [mm]_x",ax=ax2,marker='o',ms=3,c="purple",lw=0.0,grid=True,label="non polar bright",legend=leg2)
sc2 = ax2.scatter(data=df_spec_np_b, x='pwv_m2', y='PWV [mm]_x', c='ex_mjd', marker="+",s=30,cmap='rainbow',label="non-polar bright")

#df_spec_np_f.plot(x="pwv_m2",y="PWV [mm]_x",ax=ax3,marker='o',ms=3,c="green",lw=0.0,grid=True,label="non polar faint",legend=leg3)
sc3 = ax3.scatter(data=df_spec_np_f, x='pwv_m2', y='PWV [mm]_x', c='ex_mjd', marker="+",s=30,cmap='rainbow',label="non-polar faint")


ax1.set_aspect("equal")
ax2.set_aspect("equal")
ax3.set_aspect("equal")

ax1.set_xlim(0.,15.)
ax1.set_ylim(0.,15.)
ax1.text(0.1, 0.95, txtstr1, transform=ax1.transAxes, fontsize=16,verticalalignment='top', bbox=props)

yfit1 = linearmodel(xfit,*popt1)
yfit_err1 =  error_propagation_with_IC(xfit, popt1, pcov1, dof1, confidence=0.95)
ax1.plot(xfit,yfit1,"-k",lw=3)
ax1.fill_between(xfit, yfit1- yfit_err1, yfit1+ yfit_err1, color='grey', alpha=0.2,label="confidence belt 95% CL")



ax2.text(0.1, 0.95, txtstr2, transform=ax2.transAxes, fontsize=16,verticalalignment='top', bbox=props)
#ax2.plot(xfit,fitted_line2(xfit),"-k",lw=3)
yfit2 = linearmodel(xfit,*popt2)
yfit_err2 =  error_propagation_with_IC(xfit, popt2, pcov2, dof2, confidence=0.95)
ax2.plot(xfit,yfit2,"-k",lw=3)
ax2.fill_between(xfit, yfit2 - yfit_err2, yfit2 + yfit_err2, color='grey', alpha=0.2,label="confidence belt 95% CL")

ax3.text(0.1, 0.95, txtstr3, transform=ax3.transAxes, fontsize=16,verticalalignment='top', bbox=props)
#ax3.plot(xfit,fitted_line3(xfit),"-k",lw=3)
yfit3 = linearmodel(xfit,*popt3)
yfit_err3 =  error_propagation_with_IC(xfit, popt3, pcov3, dof3, confidence=0.95)
ax3.plot(xfit,yfit3,"-k",lw=3)
ax3.fill_between(xfit, yfit3 - yfit_err3, yfit3 + yfit_err3, color='grey', alpha=0.2,label="confidence belt 95% CL")

ax1.legend()
ax2.legend()
ax3.legend()

ax1.set_ylabel("PWV - Auxtel (mm)")
ax1.set_xlabel("PWV - Merra2 (mm)")
ax2.set_xlabel("PWV - Merra2 (mm)")
ax3.set_xlabel("PWV - Merra2 (mm)")
ax1.plot([0,15],[0,15],'-.k')
ax2.plot([0,15],[0,15],'-.k')
ax3.plot([0,15],[0,15],'-.k')

# 🔹 Mise en forme et personnalisation des axes
ax1.set_aspect("equal")
ax2.set_aspect("equal")
ax3.set_aspect("equal")

ax1.set_xlim(0.,15.)
ax1.set_ylim(0.,15.)

# 🔹 Ajout de la colorbar sous les axes (dans la deuxième ligne de gridspec)
ax_cbar = fig.add_subplot(gs[1, :])  # Colorbar occupe toute la largeur de la deuxième ligne


fig.colorbar(sc1, cax=ax_cbar, orientation='horizontal')
cbar.set_label('date (MJD)', fontsize=12)
#cbar.ax.yaxis.set_label_position('center')  # Déplacer le label à droite si nécessaire

plt.suptitle("PWV Auxtel vs PWV Merra2")
plt.tight_layout()

# 🔹 Affichage du graphique
plt.show()


In [None]:
all_selected_nights = df_spec_sel["nightObs"].unique()

In [None]:
#all_selected_nights 

## LombScargle

In [None]:
x = df_spec_sel["ex_mjd"].values
y = df_spec_sel["PWV [mm]_x"].values

In [None]:
fig,ax = plt.subplots(1,1,figsize = (FIGXSIZE_0,FIGYSIZE_0),layout="constrained")
LombScargle_analysis(x,y, ax=ax ,mode= "logxliny",
                 title = "LombScargle : PWV  (Auxtel)",
                 xlabel="frequency (days$^{-1}$)",
                 ylabel=" ",
                 label="Auxtel  PWV")
figname =f"{pathfigs}/pwv_LombScargle_auxtel"+figtype
fig.savefig(figname)
plt.show()

In [None]:
x = df_spec_sel["ex_mjd"].values
y = df_spec_sel["PWV [mm]_x_res"].values

In [None]:
fig,ax = plt.subplots(1,1,figsize = (FIGXSIZE_0,FIGYSIZE_0),layout="constrained")
LombScargle_analysis(x,y, ax=ax ,mode= "logxliny",
                 title = "LombScargle : PWV  (Auxtel)",
                 xlabel="frequency (days$^{-1}$)",
                 ylabel=" ",
                 label="Auxtel  PWV")
figname =f"{pathfigs}/pwv_LombScargle_auxtel"+figtype
fig.savefig(figname)
plt.show()

## Discrete Covariance curve

In [None]:
SIGMA_REPEATABILITY = 0.26

In [None]:
def ComputeZDCF(filename_in,df_pwv_curve,minpts=0):
    """
    Compute the Discrete Covariance Curve with pyzdcf

    parameters :
    - df_pwv_curve : pandas dataframe with 3 columns : (time, pwv, sigma)
      The time has to be chosen in terms of days/hours,min ..., outside this function
    - filename_in : csv file where are written the tempory DTC curve
    
    """
    


    # add the error on the point 
    #df_pwvc = df_pwvc.assign(sig_pwv = lambda x: sigma_repeatability)

    full_filename_in = os.path.join(dcf_path_input,filename_in)
    df_pwv_curve.to_csv(full_filename_in, index=False,header=False)

    # parameters for the pyzdcf
    params_dcf = dict(autocf    =  True, # Autocorrelation (T) or cross-correlation (F)
              prefix            = 'acf',  # Output files prefix
              uniform_sampling  =  False, # Uniform sampling?
              omit_zero_lags    =  False,  # Omit zero lag points?
              minpts            =  minpts,     # Min. num. of points per bin (0 is a flag for default value of 11)
              num_MC            =  100,   # Num. of Monte Carlo simulations for error estimation
              lc1_name          =  filename_in,   # Name of the first light curve file
              lc2_name          =  filename_in    # Name of the second light curve file (required only if we do CCF)
             )

    # compute the ZDCF
    
    dcf_df = pyzdcf(input_dir  =  dcf_path_input + "/" , 
                    output_dir = dcf_path_output + "/", 
                    intr       = False, 
                    parameters = params_dcf, 
                    sep        = ',', 
                    sparse     = 'auto', 
                    verbose    = False)
    return dcf_df

In [None]:
def ComputeCrossZDCF(filename_in1,filename_in2,df_pwv_curve1,df_pwv_curve2,minpts=0):
    """
    Compute the Discrete Covariance Curve with pyzdcf

    parameters :
    - df_pwv_curve : pandas dataframe with 3 columns : (time, pwv, sigma)
      The time has to be chosen in terms of days/hours,min ..., outside this function
    - filename_in : csv file where are written the tempory DTC curve
    
    """

    # add the error on the point 
    #df_pwvc = df_pwvc.assign(sig_pwv = lambda x: sigma_repeatability)

    full_filename_in1 = os.path.join(dcf_path_input,filename_in1)
    df_pwv_curve1.to_csv(full_filename_in1, index=False,header=False)

    full_filename_in2 = os.path.join(dcf_path_input,filename_in2)
    df_pwv_curve2.to_csv(full_filename_in2, index=False,header=False)

    # parameters for the pyzdcf
    params_dcf = dict(autocf    =  False, # Autocorrelation (T) or cross-correlation (F)
              prefix            = 'crosscf',  # Output files prefix
              uniform_sampling  =  False, # Uniform sampling?
              omit_zero_lags    =  False,  # Omit zero lag points?
              minpts            =  minpts,     # Min. num. of points per bin (0 is a flag for default value of 11)
              num_MC            =  100,   # Num. of Monte Carlo simulations for error estimation
              lc1_name          =  filename_in1,   # Name of the first light curve file
              lc2_name          =  filename_in2    # Name of the second light curve file (required only if we do CCF)
             )

    # compute the ZDCF
    
    dcf_df = pyzdcf(input_dir  =  dcf_path_input + "/" , 
                    output_dir = dcf_path_output + "/", 
                    intr       = False, 
                    parameters = params_dcf, 
                    sep        = ',', 
                    sparse     = 'auto', 
                    verbose    = False)
    return dcf_df

### a) Search for long timescale correlation in PWV Auxtel Data

#### Discrete corvariance on the PWV values

##### Prepare the data with the three required columns

In [None]:
df_dcf_in = df_spec_sel[["ex_mjd","PWV [mm]_x"]]

In [None]:
tstart = df_dcf_in["ex_mjd"].min()
df_dcf_in["t_day"] = df_dcf_in["ex_mjd"] - tstart

In [None]:
df_dcf_in = df_dcf_in.assign(sig_pwv = lambda x: SIGMA_REPEATABILITY)

In [None]:
df_dcf_in = df_dcf_in[["t_day","PWV [mm]_x","sig_pwv"]]

In [None]:
df_dcf_in.head() 

In [None]:
df_dcf_out = ComputeZDCF("dcf_in_pwv_auxtel.csv",df_dcf_in, minpts = 21 )
#df_dcf_out = ComputeZDCF("dcf_in_pwv_auxtel.csv",df_dcf_in, minpts = 101 )

In [None]:
xerr = df_dcf_out[["-sig(tau)","+sig(tau)"]].values.T	
yerr = df_dcf_out[["-err(dcf)","+err(dcf)"]].values.T	
x = df_dcf_out["tau"].values
y = df_dcf_out["dcf"].values

In [None]:
fig,ax = plt.subplots(1,1,figsize=(12,6),layout="constrained")
ax.errorbar(x,y,xerr=xerr,yerr=yerr,marker='o', mfc='red',linewidth=0.5,
         mec='red', ms=2, mew=2,ecolor="k",elinewidth=2,capsize=2,uplims=True, lolims=True)
ax.grid()
ax.set_ylim(-1.2,1.2)
ax.set_title(f"Discrete covariance function on PWV measurements in Auxtel (holo)")
ax.set_xlabel("Time (days)")
ax.set_ylabel("DCF (no units)")

### b) Search for short timescale correlations in PWV Auxtel resudials Data

#### Prepare the data with the three required columns

In [None]:
 df_spec_sel.head()

In [None]:
df_dcf_in = df_spec_sel[["ex_mjd","PWV [mm]_x_res"]]

In [None]:
tstart = df_dcf_in["ex_mjd"].min()
df_dcf_in["t_day"] = (df_dcf_in["ex_mjd"] - tstart)

In [None]:
df_dcf_in = df_dcf_in.assign(sig_pwv = lambda x: SIGMA_REPEATABILITY)

In [None]:
df_dcf_in = df_dcf_in[["t_day","PWV [mm]_x_res","sig_pwv"]]

In [None]:
df_dcf_out = ComputeZDCF("dcf_in_pwvres_auxtel.csv",df_dcf_in, minpts = 21)

In [None]:
xerr = df_dcf_out[["-sig(tau)","+sig(tau)"]].values.T	
yerr = df_dcf_out[["-err(dcf)","+err(dcf)"]].values.T	
x = df_dcf_out["tau"].values
y = df_dcf_out["dcf"].values

In [None]:
fig,ax = plt.subplots(1,1,figsize=(12,6),layout="constrained")
ax.errorbar(x,y,xerr=xerr,yerr=yerr,marker='o', mfc='red',linewidth=0.5,
         mec='red', ms=2, mew=2,ecolor="k",elinewidth=2,capsize=2,uplims=True, lolims=True)
ax.grid()
ax.set_ylim(-1,1)
ax.set_title(f"Discrete covariance function on PWV residuals in Auxtel (holo)")
ax.set_xlabel("Time (days)")
ax.set_ylabel("DCF (no units)")
ax.set_xlim(0.,100.)
#ax.set_xscale("log")

## My calculation of cross autocorrelation

In [None]:
df_dcf_in = df_spec_sel[["ex_mjd","PWV [mm]_x"]]
tstart = df_dcf_in["ex_mjd"].min()
df_dcf_in["t_day"] = (df_dcf_in["ex_mjd"] - tstart)

In [None]:
df_dcf_in = df_dcf_in[["t_day","PWV [mm]_x"]]
df_dcf_in = df_dcf_in .rename(columns={'t_day': 't', 'PWV [mm]_x':'pwv' })

In [None]:
df_dcf_in.head()

In [None]:
countPWV,meanPWV,stdPWV = df_dcf_in["pwv"].agg(["count","mean","std"])
txt_str=f"N= {countPWV}"

In [None]:
fig,ax = plt.subplots(1,1,figsize=(14,4),layout="constrained")
#ax = axs.flatten()
df_dcf_in.plot.scatter(x="t",y="pwv",ax=ax,c="b")
ax.axhline(meanPWV,color="r") 
ax.axhspan(meanPWV-stdPWV,meanPWV+stdPWV, color='yellow', alpha=0.4)
ax.text(0.01, 0.95, txt_str, transform=ax.transAxes, fontsize=16,verticalalignment='top', bbox=props)
ax.set_title("PWV : mean and sigma ")
plt.show()

In [None]:
def ComputePWVAndTimeDiffence(df,meanpwv):
    """
    For each night return a number of quanitites related to pair differences.
    The pairs are ordered bi increaing time (t2>t1)

    Return 
        all_DPWV = [] # Difference in PWV for the pair (PWV2-PWV1)
        all_DT = []   # Difference  in time for the pair (t2-t1) which is > 0
        all_pwvpwv = []  # Product of (PW1 -meanPWV)(PWV2 - meanPWV) where the meanPWV is taken over the night 
        all_PWVpairs = [] # (Keep the values of PWV1,PWV2)
    """
    all_DPWV = [] # Difference in PWV for the pair (PWV2-PWV1)
    all_DT = []   # Difference  in time for the pair (t2-t1) which is > 0
    all_pwvpwv = []  # Product of (PW1 -meanPWV)(PWV2 - meanPWV) where the meanPWV is taken over the night 
    all_PWVpairs = [] # (Keep the values of PWV1,PWV2)
    
    N = len(df)

    meanPWV = df.describe().loc["mean"]["pwv"] 

    # loop on first element
    for row1 in df.iterrows(): 
        t1 = row1[1]["t"]
        PWV1 = row1[1]["pwv"]
        pwv1 = PWV1 - meanPWV
        

        # loop on second element
        for row2 in df.iterrows(): 
            t2 = row2[1]["t"]
            PWV2 = row2[1]["pwv"]
            pwv2 = PWV2 - meanPWV
            Delta_t = t2-t1
            Delta_pwv = PWV2-PWV1
            pwv1pwv2 = pwv1*pwv2
           
            if  Delta_t>= -0.000001:
                all_DPWV.append(Delta_pwv)
                all_DT.append(Delta_t)
                all_pwvpwv.append(pwv1pwv2)
                all_PWVpairs.append([PWV1,PWV2])
    return np.array(all_DT), np.array(all_DPWV), np.array(all_pwvpwv), np.array(all_PWVpairs) 

### Compute statistics for pairs

In [None]:
## Compute statistics for pairs
all_dt,all_dpwv,all_pwvpwv, all_pwv1pwv2 = ComputePWVAndTimeDiffence(df_dcf_in,meanPWV)

In [None]:
logbins  = np.logspace(-3.5228787452803374,2.5,50)
logbins  = np.insert(logbins , 0, 0.000000 , axis=0)

In [None]:
logbins[:3]

In [None]:
750/5/2

In [None]:
linbins = np.linspace(0.0003,750.0003,75)
linbins  = np.insert(linbins , 0, 0.000000 , axis=0)

In [None]:
linbins[:10]

In [None]:
fig,ax = plt.subplots(1,1,figsize=(16,4),layout="constrained")
#counts, edges, _ = ax.hist(all_dt,bins=150,range=(0,750));
#counts, edges, _ = ax.hist(all_dt,bins=750,range=(0,750));
#counts, edges, _ = ax.hist(all_dt,bins=logbins);
counts, edges, _ = ax.hist(all_dt,bins=linbins);
ax.set_yscale("log")
ax.set_xlabel("$\Delta t$ (days)")
ax.set_title("Distribution of pairs")
plt.show()

In [None]:
counts[:3]

In [None]:
edges[:3]

### build the dataframe of pairs

In [None]:
df_pairs = pd.DataFrame()
df_pairs["dt"] = all_dt
df_pairs["dPWV"] = all_dpwv
df_pairs["PwvixPwvj"] = all_pwvpwv

In [None]:
df_pairs[df_pairs.dt==0]

In [None]:
fig,ax = plt.subplots(1,1,figsize=(16,2),layout="constrained")
df_pairs["dt"].hist(bins=100,range=(0.,60./24./60.),ax=ax)
ax.axvline(0.0003,color="r")
ax.set_xlabel("days")
ax.set_yscale("log")
plt.show()

In [None]:
def ComputeMyDCF_PwvixPwvj(df,list_of_bins,averPWV,sigPWV):
    """
    Compute my average Discrete Covariance Function
    mean,sigma : per night
    Note : the good function
    Parameters
     df : dataframe containing the data : (dt, pwv1 x pwv2)
     list_of_bins : array contining the edges of timebin separation
    Return
     xcenter,ydata,ydataerr for the auto correlation function
    """
    Nbins = len(list_of_bins)

    xcenter = (list_of_bins[0:-1]+ list_of_bins[1:])/2.
    N = len(xcenter)
    ydata = np.zeros(N)
    ydataerr = np.zeros(N)

    # loop on time-bins
    for ibin in range(Nbins-1):

        # select all pairs in the timebin ibin
        cut = (df["dt"] >= list_of_bins[ibin]) & (df["dt"] < list_of_bins[ibin+1])
        df_sel = df[cut]
    
        
        all_yinthatbin = df_sel["PwvixPwvj"]/sigPWV**2
        
       
        # calculate the average over all night in that timebin
        n = len(all_yinthatbin)
        ydata[ibin] =  np.mean(all_yinthatbin)
        ydataerr[ibin] = np.std(all_yinthatbin)
        if n>0:
            ydataerr[ibin] /= np.sqrt(n)
            
          
    return xcenter,ydata,ydataerr

In [None]:
xcenter,ydata,ydataerr = ComputeMyDCF_PwvixPwvj(df_pairs,edges,meanPWV,stdPWV )
txtstr = "\n".join([f" meanPWV = {meanPWV:.2f} mm",f" sigmaPWV = {stdPWV:.2f} mm"])

In [None]:
fig,ax = plt.subplots(1,1,figsize=(12,6))
#ax.plot(tt1,yy1,'o-b',label="method 1 : \n $mean_{pwv},\sigma_{pwv}$ all nights")
#ax.errorbar(tt1,yy1,yerr=yyerr1,c="grey")

ax.plot(xcenter,ydata,'o-r',label="all nights")
ax.errorbar(xcenter,ydata,yerr=ydataerr ,c="grey",lw=0.5)

ax.legend(loc="upper right")
ax.text(0.01, 0.95, txtstr, transform=ax.transAxes, fontsize=12,verticalalignment='top', bbox=props,alpha=1)

ax.set_title("Discrete Covariance Function ")
ax.set_xlabel("$\Delta t$ (days)")
ax.axhline(0,color="k")
ax.set_ylim(-1.5,1.5)
#ax.set_xscale("log")
plt.show()

## TODO Cross-Correlation

In [None]:
# 1mm after 1 hour
SIGMA_MERRA2 = 1.0

In [None]:
tstart

In [None]:
df_dcf_in1 = df_spec_sel[["ex_mjd","PWV [mm]_x"]]
df_dcf_in2 = df_m2[["mjd","TQV"]]

In [None]:
df_dcf_in1["t_day"] = (df_dcf_in1["ex_mjd"] - tstart)
df_dcf_in1 = df_dcf_in1[["t_day","PWV [mm]_x"]]

In [None]:
df_dcf_in2["t_day"]  = df_dcf_in2["mjd"] - tstart
df_dcf_in2 = df_dcf_in2[["t_day","TQV"]]

In [None]:
df_dcf_in1 = df_dcf_in1.assign(sig_pwv = lambda x: SIGMA_REPEATABILITY)
df_dcf_in2 = df_dcf_in2.assign(sig_pwv = lambda x: SIGMA_MERRA2)

In [None]:
df_dcf_in1 = df_dcf_in1[ df_dcf_in1["t_day"]>=0]
df_dcf_in2 = df_dcf_in2[ df_dcf_in2["t_day"]>=0]

In [None]:
df_dcf_in1.head()

In [None]:
df_dcf_in2.head()

In [None]:
fig,axs = plt.subplots(1,1,figsize=(14,6),layout='constrained')
ax  = axs
leg=ax.get_legend()

#df_m.plot(x="mjd",y="TQV",ax=ax,marker=".",c="b",lw=1,ms=1,ls="-",label="Merra2",legend=leg)  
df_dcf_in2.plot(x="t_day",y="TQV",ax=ax,marker=".",c="b",lw=0,ms=3,label="Merra2",legend=leg)  
df_dcf_in1.plot(x="t_day",y="PWV [mm]_x",ax=ax,marker='o',ms=3,c="r",lw=0.0,grid=True,label=tag,legend=leg)


ax.set_ylabel("PWV [mm]_x")
ax.set_xlabel("date")

ax.set_title("Precipitable water vapor measured by Auxtel (holo) \n and comparison with Merra2 data",fontweight="bold")
ax.legend()

 
ax.set_ylim(0.,20.)

plt.show()

In [None]:
df_dcf_cross_out = ComputeCrossZDCF("dcf_in_pwv_auxtel_cross1.csv","dcf_in_pwv_merra2_cross2.csv", df_dcf_in1, df_dcf_in2 , minpts = 21 )

In [None]:
xerr = df_dcf_cross_out[["-sig(tau)","+sig(tau)"]].values.T	
yerr = df_dcf_cross_out[["-err(dcf)","+err(dcf)"]].values.T	
x = df_dcf_cross_out["tau"].values
y = df_dcf_cross_out["dcf"].values

In [None]:
fig,ax = plt.subplots(1,1,figsize=(12,5),layout="constrained")
ax.errorbar(x,y,xerr=xerr,yerr=yerr,marker='o', mfc='red',linewidth=0.5,
         mec='red', ms=2, mew=2,ecolor="k",elinewidth=2,capsize=2,uplims=True, lolims=True)
ax.grid()
ax.set_ylim(-1,1)
ax.set_title(f"Discrete cross covariance function on PWV Auxtel (holo) - PWV Merra2")
ax.set_xlabel("Time (days)")
ax.set_ylabel("DCF (no units)")
#ax.set_xlim(0.,100.)
#ax.set_xscale("log")