# Gaussian process and linear fit on PWV time sequence

- author Sylvie Dagoret-Campagne
- affiliation : IJCLab
- creation date 2024-11-07
- last update 2024-11-07 
- Office emac : mamba_py311
- Home emac : base (conda)
- laptop : conda_py310

**Goal** : Show Night variations of Ozone wrt date and Time. Fit a straight line.

In [None]:
import warnings
warnings.resetwarnings()
warnings.simplefilter('ignore')

In [None]:
from platform import python_version
print(python_version())

In [None]:
import os

In [None]:
# where are stored the figures
pathfigs = "figsHoloLinearandGPFitOzone"
if not os.path.exists(pathfigs):
    os.makedirs(pathfigs) 
figtype = ".png"

In [None]:
import numpy as np
from numpy.linalg import inv
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib.colors import LogNorm,SymLogNorm
from matplotlib.patches import Circle,Annulus
from astropy.visualization import ZScaleInterval
props = dict(boxstyle='round', facecolor="white", alpha=0.1)
#props = dict(boxstyle='round')

import matplotlib.colors as colors
import matplotlib.cm as cmx

import matplotlib.ticker                         # here's where the formatter is
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)

from matplotlib.gridspec import GridSpec

from astropy.visualization import (MinMaxInterval, SqrtStretch,ZScaleInterval,PercentileInterval,
                                   ImageNormalize,imshow_norm)
from astropy.visualization.stretch import SinhStretch, LinearStretch,AsinhStretch,LogStretch

from astropy.io import fits
from astropy.wcs import WCS
from astropy import units as u
from astropy import constants as c

from astropy.coordinates.earth import EarthLocation
from datetime import datetime
from pytz import timezone

from scipy import interpolate
from sklearn.neighbors import NearestNeighbors
from sklearn.neighbors import KDTree, BallTree

import pandas as pd
pd.set_option("display.max_columns", None)
pd.set_option('display.max_rows', 100)

import matplotlib.ticker                         # here's where the formatter is
import os
import re
import pandas as pd
import pickle
from collections import OrderedDict

plt.rcParams["figure.figsize"] = (4,3)
plt.rcParams["axes.labelsize"] = 'xx-large'
plt.rcParams['axes.titlesize'] = 'xx-large'
plt.rcParams['xtick.labelsize']= 'xx-large'
plt.rcParams['ytick.labelsize']= 'xx-large'

import scipy
from scipy.optimize import curve_fit,least_squares


# new color correction model
import pickle
from scipy.interpolate import RegularGridInterpolator

In [None]:
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)

from astropy.visualization import (MinMaxInterval, SqrtStretch,ZScaleInterval,PercentileInterval,
                                   ImageNormalize,imshow_norm)
from astropy.visualization.stretch import SinhStretch, LinearStretch,AsinhStretch,LogStretch

from astropy.time import Time


In [None]:
# Remove this if want to run faster the ntoebook
#import ipywidgets as widgets
#%matplotlib widget

In [None]:
from importlib.metadata import version

In [None]:
# wavelength bin colors
#jet = plt.get_cmap('jet')
#cNorm = mpl.colors.Normalize(vmin=0, vmax=NSED)
#scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet)
#all_colors = scalarMap.to_rgba(np.arange(NSED), alpha=1)

In [None]:
def convertNumToDatestr(num):
    year = num//10_000
    month= (num-year*10_000)//100
    day = (num-year*10_000-month*100)

    year_str = str(year).zfill(4)
    month_str = str(month).zfill(2)
    day_str = str(day).zfill(2)
    
    datestr = f"{year_str}-{month_str}-{day_str}"
    return pd.to_datetime(datestr)

## Configuration

In [None]:
observing_location = EarthLocation.of_site('Rubin Observatory')
tz = timezone('America/Santiago')

### Spectro Hologram data

In [None]:
FLAG_WITHCOLLIMATOR = False
DATE_WITHCOLLIMATOR = 20230930
datetime_WITHCOLLIMATOR = convertNumToDatestr(DATE_WITHCOLLIMATOR)
datetime_WITHCOLLIMATOR = pd.to_datetime("2023-09-30 00:00:00.0+0000")
datetime_WITHCOLLIMATOR

In [None]:
version_results = "v5"
legendtag = {"v1" : "old v3.1.0",
            "v2" : "v3.1.0-PWV<10mm",
            "v3" : "v3.1.0-PWV<15mm",
            "v4" : "Auxtel holo v3.1.0",
            "v5" : "Auxtel holo v3.1.0 09/22 - 10/24"}

In [None]:
atmfilenamesdict = {"v1" : "data/spectro/auxtel_atmosphere_202301_v3.1.0_doSensorFlat_rebin2_testWithMaskedEdges_newBoundaries_newPolysRescaled_newFitBounds_adjustA1_lockedOrder2_removeThroughputTails_2.npy",
                    "v2" : "auxtel_atmosphere_202301_v3.1.0_doSensorFlat_rebin2_lockedOrder2_FixA1_FixA2_FitAngstrom_FixA1_FixA2_FitAngstrom_WithGaia_freePressure_newThroughput6_BG40Scaled1.09_PeekFinder.npy",
                    "v3" : "u_dagoret_auxtel_atmosphere_202301_v3.1.0_doSensorFlat_rebin2_lockedOrder2_FixA1_FixA2_FitAngstrom_WithGaia_freePressure_newThroughput6_BG40Scaled1.09_AtmoFitPressureA2_SpecErr_PeekFinder_20240924T161119Z.npy",
                    "v4" : "u_dagoret_auxtel_atmosphere_202301_v3.1.0_doSensorFlat_rebin2_lockedOrder2_FixA1_FixA2_FitAngstrom_WithGaia_freePressure_newThroughput6_BG40Scaled1.09_AtmoFitPressureA2_SpecErr_PeekFinder_20240924T161119Z_spectrfullextend.npy",
                    "v5" : "u_dagoret_auxtel_atmosphere_202209_v3.1.0_doSensorFlat_rebin2_lockedOrder2_FixA1_FixA2_FitAngstrom_WithGaia_freePressure_newThroughput6_BG40Scaled1.09_AtmoFitPressureA2_SpecErr_No5SigmaClip_20241016T184601Z_spectrfullextended.npy"}

In [None]:
atmfilename = atmfilenamesdict[version_results]
tag = legendtag[version_results] 

## Initialisation

### Read the file

In [None]:
specdata = np.load(atmfilename,allow_pickle=True)

In [None]:
df_spec = pd.DataFrame(specdata)

### Remove spectra with red filter

In [None]:
df_spec['FILTER'].unique()

In [None]:
FLAG_REMOVE_FILTERS = True
if FLAG_REMOVE_FILTERS:
    df_spec=df_spec[df_spec["FILTER"] == 'empty']
    df_spec.reset_index(inplace=True)  

### Define if a target is faint or bright

In [None]:
def IsFaint(row):
    List_Of_Faint_targets = ['Feige110','HD074000','HD115169','HD031128','HD200654','HD167060','HD009051','HD142331','HD160617','HD111980']
    List_Of_faint_selected = List_Of_Faint_targets[:10]
    if row["TARGET"] in List_Of_faint_selected:
        return True
    else:
        return False

In [None]:
df_spec["isFaint"] = df_spec.apply(IsFaint,axis=1)

### Compute NightObs

In [None]:
df_spec["nightObs"] = df_spec.apply(lambda x: x['id']//100_000, axis=1)

In [None]:
if FLAG_WITHCOLLIMATOR:
    df_spec = df_spec[df_spec["nightObs"]> DATE_WITHCOLLIMATOR]

## Apply Quality selection

In [None]:
fig,ax = plt.subplots(1,1)
df_spec["CHI2_FIT"].hist(bins=50,ax=ax,range=(0,200))
ax.set_yscale("log")

### Add the Time in pd.datetime

#### UTC

In [None]:
df_spec["Time"] = pd.to_datetime(df_spec["DATE-OBS"])

In [None]:
DT = pd.Timedelta(minutes=7*24*60)
TMIN  = df_spec["Time"].min()-DT
TMAX  = df_spec["Time"].max()+DT

### Compute relative time to Mid-night

In [None]:
def GetTimeToMidNight(row):
    observing_time = Time(row['DATE-OBS'], scale='utc', location=observing_location)

    # time at the location , either before or after midnight
    local_time =  observing_time.to_datetime(timezone=tz)

    # take time independent  of any location now
    local_time_new = datetime(local_time.year,local_time.month,local_time.day,local_time.hour,local_time.minute,local_time.second)
    local_time_midnight = datetime(local_time_new.year,local_time_new.month,local_time_new.day)
    dt_hour = (local_time_new -local_time_midnight).seconds/3600.

    # we took the previous night mid-night , must subtract 24H
    if dt_hour > 12.:
        dt_hour_new = (dt_hour - 24.)
    else:
        dt_hour_new = dt_hour
        
    return dt_hour_new

In [None]:
df_spec["dt_midnight"] = df_spec.apply(GetTimeToMidNight,axis=1)

In [None]:
fig,ax = plt.subplots(1,1,figsize=(6,4))
df_spec["dt_midnight"].hist(bins=48,range=(-12,12),ax=ax,facecolor="blue") 
ax.set_xlabel("time relative to midnight (hour)")
ax.set_title("Observation time")

### Compute Date relative to January

In [None]:
def GetDateToMidJanuary(row):
    observing_time = Time(row['DATE-OBS'], scale='utc', location=observing_location)

    # time at the location , either before or after midnight
    local_time =  observing_time.to_datetime(timezone=tz)

    # take time independent  of any location now
    local_time_new = datetime(2024,local_time.month,local_time.day,local_time.hour,local_time.minute,local_time.second)
    
          
    return pd.to_datetime(local_time_new)

In [None]:
#df_spec["Time_january"] = df_spec.apply(GetDateToMidJanuary,axis=1)

In [None]:
def GetDateToMidJanuaryAndYear(row):
    observing_time = Time(row['DATE-OBS'], scale='utc', location=observing_location)

    # time at the location , either before or after midnight
    local_time =  observing_time.to_datetime(timezone=tz)

    # take time independent  of any location now
    local_time_new = datetime(2024,local_time.month,local_time.day,local_time.hour,local_time.minute,local_time.second)
           
    return pd.to_datetime(local_time_new),local_time.year 

In [None]:
df_spec[["Time_january","Year"]] = df_spec.apply(GetDateToMidJanuaryAndYear,axis=1,result_type="expand")

In [None]:
df_spec[["Time_january","Year"]]

## Compute night boundaries

In [None]:
def GetNightBoundariesDict(df_spec):
    """
    input:
      df_spec the dataframe for spectroscopy summary results
    output:
      the dict of night boudaries
    """
    
    Dt = pd.Timedelta(minutes=30)
    d = {}
    list_of_nightobs = df_spec["nightObs"].unique()
    for nightobs in list_of_nightobs:
        sel_flag = df_spec["nightObs"]== nightobs
        df_night = df_spec[sel_flag]
        tmin = df_night["Time"].min()-Dt
        tmax = df_night["Time"].max()+Dt
        d[nightobs] = (tmin,tmax)
    return d

In [None]:
dn = GetNightBoundariesDict(df_spec)

## Plot all data

## Apply Quality selection cuts

In [None]:
def getSelectionCut_old(df_spec, chi2max=20., o3min=1.0, o3max = 599.):
    cut =  (df_spec["CHI2_FIT"]<chi2max) & (df_spec["ozone [db]_x"] > o3min) & (df_spec["ozone [db]_x"] < o3max) 
    return cut

In [None]:
def getSelectionCut(df_spec, chi2max=20., pwvmin=0.1, pwvmax = 14.9,ozmin=100.,ozmax=600.):
    cut =  (df_spec["CHI2_FIT"]<chi2max) & (df_spec["PWV [mm]_x"] > pwvmin) & (df_spec["PWV [mm]_x"] < pwvmax) & (df_spec["D2CCD"]>186.5) &  (df_spec["D2CCD"]<187.3) & \
    (df_spec['EXPTIME'] > 20.) & (df_spec["PWV [mm]_y"] > pwvmin) & (df_spec["PWV [mm]_y"] < pwvmax) & (df_spec["ozone [db]_y"] > ozmin) & (df_spec["ozone [db]_y"] < ozmax) 
    return cut

In [None]:
def getSelectionCutNoPolar(df_spec, chi2max=20., pwvmin=0.1, pwvmax = 14.9,ozmin=100.,ozmax=600.):
    cut =  (df_spec["CHI2_FIT"]<chi2max) & (df_spec["PWV [mm]_x"] > pwvmin) & (df_spec["PWV [mm]_x"] < pwvmax) & (df_spec["D2CCD"]>186.5) &  (df_spec["D2CCD"]<187.3) & \
    (df_spec['EXPTIME'] > 20.) & (df_spec["PWV [mm]_y"] > pwvmin) & (df_spec["PWV [mm]_y"] < pwvmax) & \
    (df_spec["ozone [db]_y"] > ozmin) & (df_spec["ozone [db]_y"] < ozmax) & (df_spec["TARGET"] != "HD185975")
    return cut

In [None]:
def getSelectionCutWithPolar(df_spec, chi2max=20., pwvmin=0.1, pwvmax = 14.9,ozmin=100.,ozmax=600.):
    cut =  (df_spec["CHI2_FIT"]<chi2max) & (df_spec["PWV [mm]_x"] > pwvmin) & (df_spec["PWV [mm]_x"] < pwvmax) & (df_spec["D2CCD"]>186.5) &  (df_spec["D2CCD"]<187.3) & \
    (df_spec['EXPTIME'] > 20.) & (df_spec["PWV [mm]_y"] > pwvmin) & (df_spec["PWV [mm]_y"] < pwvmax) & \
    (df_spec["ozone [db]_y"] > ozmin) & (df_spec["ozone [db]_y"] < ozmax) & (df_spec["TARGET"] == "HD185975")
    return cut

In [None]:
cut = getSelectionCut(df_spec) 
cut_nopolar_bright = getSelectionCutNoPolar(df_spec) & (~df_spec["isFaint"])
cut_nopolar_faint = getSelectionCutNoPolar(df_spec) & (df_spec["isFaint"])
cut_wthpolar = getSelectionCutWithPolar(df_spec)
cut_nopolar = getSelectionCutNoPolar(df_spec)

In [None]:
df_spec_sel = df_spec[cut]
df_spec_np_b = df_spec[cut_nopolar_bright]
df_spec_np_f = df_spec[cut_nopolar_faint]
df_spec_wp = df_spec[cut_wthpolar]
df_spec_np = df_spec[cut_nopolar] 

In [None]:
print("Total number of Spectra          : ",len(df_spec))
print("Number of selected Spectra       : ",len(df_spec_sel))
print("Number of selected Polars        : ",len(df_spec_wp))
print("Number of selected Non-Polars    : ",len(df_spec_np))
print("Number of selected Non-Polars Bright : ",len(df_spec_np_b))
print("Number of selected Non-Polars Faint  : ",len(df_spec_np_f))

In [None]:
df_spec_sel.reset_index(drop=True,inplace=True)
df_spec_np_b.reset_index(drop=True,inplace=True)
df_spec_np_f.reset_index(drop=True,inplace=True)
df_spec_wp.reset_index(drop=True,inplace=True) 
df_spec_np.reset_index(drop=True,inplace=True)

In [None]:
#List_Of_Faint_targets = ['Feige110','HD074000','HD115169','HD031128','HD200654','HD167060','HD009051','HD142331','HD160617','HD111980']
print("Polar            :",len(df_spec_wp["TARGET"].unique()),"\t", df_spec_wp["TARGET"].unique()) 
print("Non Polar        :",len(df_spec_np["TARGET"].unique()),"\t" ,df_spec_np["TARGET"].unique())
print("Non Polar Bright :",len(df_spec_np_b["TARGET"].unique()),"\t" ,df_spec_np_b["TARGET"].unique())
print("Non Polar Faint  :",len(df_spec_np_f["TARGET"].unique()),"\t",df_spec_np_f["TARGET"].unique())

## Recompute night boundaries

In [None]:
#dn = GetNightBoundariesDict(df_spec_sel)

## Plot all data

In [None]:
from matplotlib.dates import DateFormatter
#date_form = DateFormatter("%y-%m-%dT%H:%M")
date_form = DateFormatter("%y-%m-%d")
fig,axs = plt.subplots(1,1,figsize=(14,6),layout='constrained')
ax  = axs
leg=ax.get_legend()

ax.set_xlim(TMIN,TMAX) 

#df_spec_sel.plot(x="Time",y="ozone [db]_x",ax=ax,marker='o',c="r",lw=0.0,grid=True,label=tag,legend=leg,ms=5,alpha=0.2)
#df_spec_np_f.plot(x="Time",y="ozone [db]_x",ax=ax,marker='o',c="grey",lw=0.0,grid=True,label="faint, no polar",legend=leg,ms=5,alpha=0.2)

df_spec_np.plot(x="Time",y="ozone [db]_x",ax=ax,marker='+',c="orange",lw=0.0,grid=True,label="no polar",legend=leg,ms=5,alpha=1)
df_spec_np_b.plot(x="Time",y="ozone [db]_x",ax=ax,marker='+',c="r",lw=0.0,grid=True,label="bright,no polar",legend=leg,ms=5,alpha=1)
df_spec_wp.plot(x="Time",y="ozone [db]_x",ax=ax,marker='+',c="g",lw=0.0,grid=True,label="with polar",legend=leg,ms=5,alpha=1)
ax.set_ylabel("ozone [db]_x")

ax.set_xlabel("time")
ax.xaxis.set_major_formatter(date_form)
ax.set_title("Ozone measured by holo selected vs time",fontweight="bold")
ax.legend(loc="upper right")
ax.set_ylim(100.,600.)

for key, tt in dn.items():
    ax.axvspan(tt[0],tt[1], color='blue', alpha=0.1)


ax.axvspan(TMIN,datetime_WITHCOLLIMATOR, color='yellow', alpha=0.1)


figname =f"{pathfigs}/ozone_allpoints_allnights_wthcuts"+figtype
plt.savefig(figname)
plt.show()

In [None]:
from matplotlib.dates import DateFormatter
#date_form = DateFormatter("%y-%m-%dT%H:%M")
date_form = DateFormatter("%y-%m-%d")

fig = plt.figure(figsize=(14,8),layout='constrained')
gs = GridSpec(2, 1,  height_ratios=[1,2],figure=fig)

# left figure
ax = fig.add_subplot(gs[0])
ax2 = fig.add_subplot(gs[1],sharex=ax)

leg=ax.get_legend()
leg2=ax2.get_legend()

# top
ax.set_xlim(TMIN,TMAX)   
ax.set_ylabel("ozone (DU)")
ax.set_ylim(200.,400.)
ax.legend()

#bottom
df_spec_np.plot(x="Time",y="ozone [db]_x",ax=ax2,marker='+',c="orange",lw=0.0,grid=True,label="no polar",legend=leg2,ms=5,alpha=1)
df_spec_np_b.plot(x="Time",y="ozone [db]_x",ax=ax2,marker='+',c="r",lw=0.0,grid=True,label="bright,no polar",legend=leg2,ms=5,alpha=1)
df_spec_wp.plot(x="Time",y="ozone [db]_x",ax=ax2,marker='+',c="g",lw=0.0,grid=True,label="with polar",legend=leg2,ms=5,alpha=1)
ax2.set_ylabel("ozone [db]_x")

ax2.set_xlabel("time")
ax2.xaxis.set_major_formatter(date_form)
ax.set_title("Ozone measured by holo selected vs time",fontweight="bold")
ax2.legend(loc="upper right")
ax2.set_ylim(100.,600.)
for key, tt in dn.items():
    ax2.axvspan(tt[0],tt[1], color='blue', alpha=0.1)


ax2.axvspan(TMIN,datetime_WITHCOLLIMATOR, color='yellow', alpha=0.1)


figname =f"{pathfigs}/ozone_allpoints_allnights_wthcuts_separatemeera"+figtype
plt.savefig(figname)
plt.show()

In [None]:
fig,axs = plt.subplots(1,1,figsize=(6,4),layout='constrained')
ax  = axs
leg=ax.get_legend()
df_spec_wp.plot(x="dt_midnight",y="ozone [db]_x",ax=ax,marker='+',c="g",lw=0.0,grid=True,label="with polar",legend=leg,ms=5,alpha=0.5)
ax.set_ylim(200.,500.)
ax.legend()
ax.set_xlabel("time relative to midnight (hour)")
ax.set_title("Ozone")
ax.set_ylabel("Ozone (DU)")

In [None]:
fig,axs = plt.subplots(1,1,figsize=(6,6),layout='constrained')
ax  = axs
leg=ax.get_legend()
ax.plot([0.,600.],[0.,600.],"k-")
df_spec_wp.plot(x="ozone [db]_x",y="ozone [db]_y",ax=ax,marker='+',c="g",lw=0.0,grid=True,label="polar",legend=leg,ms=5,alpha=0.5)
df_spec_np.plot(x="ozone [db]_x",y="ozone [db]_y",ax=ax,marker='+',c="r",lw=0.0,grid=True,label="no polar",legend=leg,ms=5,alpha=0.5)
ax.set_ylim(0.,600.)
ax.set_xlim(0.,600.)

ax.legend()
ax.set_xlabel("Ozone_x (DU)")
ax.set_title("Correlation Ozone 1D-2D")
ax.set_ylabel("Ozone_y (DU)")

In [None]:
#list(df_spec_wp.columns)

In [None]:
fig,axs = plt.subplots(1,1,figsize=(6,6),layout='constrained')
ax  = axs
leg=ax.get_legend()
df_spec_wp.plot(x="ozone [db]_x",y="VAOD_x",ax=ax,marker='+',c="g",lw=0.0,grid=True,label="polar",legend=leg,ms=5,alpha=0.5)
df_spec_np.plot(x="ozone [db]_x",y="VAOD_x",ax=ax,marker='+',c="r",lw=0.0,grid=True,label="no polar",legend=leg,ms=5,alpha=0.5)

ax.legend()
ax.set_xlabel("Ozone_x (DU)")
ax.set_title("Correlation Ozone - VAOD ")
ax.set_ylabel("VAOD")
ax.set_ylim(0.,0.2)
ax.set_xlim(200.,500.)

In [None]:
from matplotlib.dates import DateFormatter
date_form = DateFormatter("%y-%m-%d")

fig,axs = plt.subplots(1,1,figsize=(14,6),layout='constrained')
ax  = axs
leg=ax.get_legend()


df_spec_np_b[df_spec_np_b.Year == 2022].plot(x="Time_january",y="ozone [db]_x",ax=ax,marker='o',c="g",lw=0.0,grid=True,label="2022 (no polar)",legend=leg,ms=5,alpha=0.5)
df_spec_wp[df_spec_wp.Year == 2022].plot(x="Time_january",y="ozone [db]_x",ax=ax,marker='+',c="g",lw=0.0,grid=True,label="2022 (polar)",legend=leg,ms=8,alpha=1)

df_spec_np_b[df_spec_np_b.Year == 2023].plot(x="Time_january",y="ozone [db]_x",ax=ax,marker='o',c="b",lw=0.0,grid=True,label="2023 (no polar)",legend=leg,ms=5,alpha=0.5)
df_spec_wp[df_spec_wp.Year == 2023].plot(x="Time_january",y="ozone [db]_x",ax=ax,marker='+',c="b",lw=0.0,grid=True,label="2023 (polar)",legend=leg,ms=8,alpha=1)

df_spec_np_b[df_spec_np_b.Year == 2024].plot(x="Time_january",y="ozone [db]_x",ax=ax,marker='o',c="r",lw=0.0,grid=True,label="2024 (no polar)",legend=leg,ms=5,alpha=0.5)
df_spec_wp[df_spec_wp.Year == 2024].plot(x="Time_january",y="ozone [db]_x",ax=ax,marker='+',c="r",lw=0.0,grid=True,label="2024 (polar)",legend=leg,ms=8,alpha=1)


ax.set_ylabel("ozone [db]_x")
ax.set_xlabel("date (since January)")
ax.xaxis.set_major_formatter(date_form)
ax.set_title("Ozone measured by holo selected vs time (modulo 1 year)",fontweight="bold")
ax.legend(loc="upper right")

#for key, tt in dn.items():
#    ax.axvspan(tt[0],tt[1], color='blue', alpha=0.1)


#ax.axvspan(TMIN,datetime_WITHCOLLIMATOR, color='yellow', alpha=0.1)


plt.tight_layout()
 


ax.set_ylim(100.,600.)
#ax.set_xlim(TMIN,TMAX)

figname =f"{pathfigs}/ozoneholoM2_allpoints_allnights_modulo1year"+figtype
plt.savefig(figname)
plt.show()

In [None]:
from matplotlib.dates import DateFormatter
date_form = DateFormatter("%y-%m-%d")

fig,axs = plt.subplots(1,1,figsize=(14,6),layout='constrained')
ax  = axs
leg=ax.get_legend()


#df_spec_np_b[df_spec_np_b.Year == 2022].plot(x="Time_january",y="ozone [db]_x",ax=ax,marker='o',c="g",lw=0.0,grid=True,label="2022 (no polar)",legend=leg,ms=5,alpha=0.5)
df_spec_wp[df_spec_wp.Year == 2022].plot(x="Time_january",y="ozone [db]_x",ax=ax,marker='+',c="g",lw=0.0,grid=True,label="2022 (polar)",legend=leg,ms=8,alpha=1)

#df_spec_np_b[df_spec_np_b.Year == 2023].plot(x="Time_january",y="ozone [db]_x",ax=ax,marker='o',c="b",lw=0.0,grid=True,label="2023 (no polar)",legend=leg,ms=5,alpha=0.5)
df_spec_wp[df_spec_wp.Year == 2023].plot(x="Time_january",y="ozone [db]_x",ax=ax,marker='+',c="b",lw=0.0,grid=True,label="2023 (polar)",legend=leg,ms=8,alpha=1)

#df_spec_np_b[df_spec_np_b.Year == 2024].plot(x="Time_january",y="ozone [db]_x",ax=ax,marker='o',c="r",lw=0.0,grid=True,label="2024 (no polar)",legend=leg,ms=5,alpha=0.5)
df_spec_wp[df_spec_wp.Year == 2024].plot(x="Time_january",y="ozone [db]_x",ax=ax,marker='+',c="r",lw=0.0,grid=True,label="2024 (polar)",legend=leg,ms=8,alpha=1)


ax.set_ylabel("ozone [db]_x")
ax.set_xlabel("date (since January)")
ax.xaxis.set_major_formatter(date_form)
ax.set_title("Ozone measured by holo selected vs time (modulo 1 year)",fontweight="bold")
ax.legend(loc="upper right")

#for key, tt in dn.items():
#    ax.axvspan(tt[0],tt[1], color='blue', alpha=0.1)

#ax.axvspan(TMIN,datetime_WITHCOLLIMATOR, color='yellow', alpha=0.1)

plt.tight_layout()
 
ax.set_ylim(100.,600.)
#ax.set_xlim(TMIN,TMAX)

figname =f"{pathfigs}/ozoneholoM2_allpoints_allnights_wp_modulo1year"+figtype
plt.savefig(figname)
plt.show()

## Show time variation each night

In [None]:
all_selected_nights = df_spec_sel["nightObs"].unique()

In [None]:
def funclineres(params, x, y, yerr):
    # Return residual = fit-observed
    return (y-params[0] -params[1]*x)/yerr
def funcline(params,x):
    return params[0] + params[1]*x

In [None]:
def MakeLineFit(df_night_pwv_curve):
    """
    """

    x = df_night_pwv_curve["dt_midnight"].values
    y = df_night_pwv_curve["ozone [db]_x"].values
    yerr = df_night_pwv_curve["ozone [db]_err_x"].values
    n = len(y)
        
    #popt, pcov = optimize.curve_fit(f, x, y, [1,-4])
    fit_res = least_squares(funclineres,[5.,0],args = (x,y,yerr))
    popt = fit_res.x 
    npar = len(popt)
    J = fit_res.jac
    cov = np.linalg.inv(J.T.dot(J))
    chi2dof = ((funclineres(popt,x,y,yerr))**2).sum()/(n-npar)
    cov *= chi2dof
    perr = np.sqrt(np.diagonal(cov)) 
    
    xfit = np.linspace(x.min()*0.99,x.max()*1.05)
    yfit = funcline(popt,xfit)
        
    slope = popt[1]
    slope_err = perr[1]

    return x,y,yerr,n,chi2dof,xfit,yfit,slope,slope_err

### Plot night by night

In [None]:
def ComputeRepeatability(df):
    """
    Compute Repeatability of Ozone for Spectrogram and and Spectrum
    
    """
    N = len(df)
    dfout = pd.DataFrame(index=df.index,columns = ["nightObs","dt","dt_rep","dOZx_rep","dOZy_rep","targflag_rep","Npoints"])
    dfout["targflag_rep"].astype(bool)
    #dfout["Npoints"].astype(int)
    
    target_old = "No"
    time_old = 0.
    OZx_old = 0.
    OZy_old = 0.
    
    for index in range(N):
        
        nightObs =  df.iloc[index]["nightObs"]
        
        if index ==0:
            dt0 = df.iloc[index]["dt"]
            dfout.iloc[index] = [ nightObs,dt0, 0., 0., 0., False,N]
        else:
            target_new = df.iloc[index]["TARGET"]
            time_new = df.iloc[index]["dt"]
            OZx_new = df.iloc[index]["ozone [db]_x"]
            OZy_new = df.iloc[index]["ozone [db]_y"]
            
            flag_target = (target_new == target_old)
            dOZx_rep = OZx_new - OZx_old
            dOZy_rep = OZy_new - OZy_old
            dt_rep = (time_new-time_old)*3600. # in seconds
            
            dfout.iloc[index] = [ nightObs,time_new, dt_rep, dOZx_rep, dOZy_rep, flag_target,N]
        
        target_old = df.iloc[index]["TARGET"]
        time_old = df.iloc[index]["dt"]
        OZx_old = df.iloc[index]["ozone [db]_x"]
        OZy_old = df.iloc[index]["ozone [db]_y"]
        
    return dfout

In [None]:
all_selected_nights = df_spec_sel["nightObs"].unique()

## Fits gaussien et lineaires

In [None]:
def funclineres(params, x, y, yerr):
    # Return residual = fit-observed
    return (y-params[0] -params[1]*x)/yerr
def funcline(params,x):
    return params[0] + params[1]*x

In [None]:
def MakeLineFit(df_night_o3_curve):
    """
    """

    x = df_night_o3_curve["dt_midnight"].values
    y = df_night_o3_curve["ozone [db]_x"].values
    yerr = df_night_o3_curve["ozone [db]_err_x"].values
    n = len(y)
        
    #popt, pcov = optimize.curve_fit(f, x, y, [1,-4])
    fit_res = least_squares(funclineres,[5.,0],args = (x,y,yerr))
    popt = fit_res.x 
    npar = len(popt)
    J = fit_res.jac
    cov = np.linalg.inv(J.T.dot(J))
    chi2dof = ((funclineres(popt,x,y,yerr))**2).sum()/(n-npar)
    cov *= chi2dof
    perr = np.sqrt(np.diagonal(cov)) 
    
    xfit = np.linspace(x.min()*0.99,x.max()*1.05)
    yfit = funcline(popt,xfit)
        
    slope = popt[1]
    slope_err = perr[1]

    return x,y,yerr,n,chi2dof,xfit,yfit,slope,slope_err

In [None]:
#
# Gaussian Process regression
# https://scikit-learn.org/1.5/auto_examples/gaussian_process/plot_gpr_noisy_targets.html
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF

In [None]:
def MakeGaussianProcessFit(df_night_o3_curve):
    """
    """

    x_train = df_night_o3_curve["dt_midnight"].values
    xmin =  x_train.min()
    xmax = x_train.max()
    xfit = np.linspace(xmin,xmax,50)
    X_fit =xfit.reshape(-1,1)
    X_train = x_train.reshape(-1, 1)
    
    y_train = df_night_o3_curve["ozone [db]_x"].values
    y_mean = y_train.mean()
    yerr_train = df_night_o3_curve["ozone [db]_err_x"].values
    n = len(y_train)

    noise_std= 0.5
    
    kernel = 1. * RBF(length_scale=5.0, length_scale_bounds=(0.5, 12.))
    gaussian_process = GaussianProcessRegressor(kernel=kernel,alpha=noise_std**2 ,n_restarts_optimizer=9)
    
    gaussian_process.fit(X_train, y_train)

    mean_prediction, std_prediction = gaussian_process.predict(X_fit, return_std=True)
    return xfit, mean_prediction, std_prediction, gaussian_process

In [None]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
# https://scikit-learn.org/1.5/auto_examples/linear_model/plot_ard.html
from sklearn.linear_model import ARDRegression,BayesianRidge

In [None]:
POLY_DEGREE_MAX = 10

In [None]:
def MakeLinearModelFit(df_night_o3_curve,degree_max = POLY_DEGREE_MAX ):
    """
    https://scikit-learn.org/1.5/auto_examples/linear_model/plot_ard.html
    ARDRegression and BayesianRidge
    """

    x_train = df_night_o3_curve["dt_midnight"].values
    xmin =  x_train.min()
    xmax = x_train.max()
    xfit = np.linspace(xmin,xmax,50)
    X_fit =xfit.reshape(-1,1)
    X_train = x_train.reshape(-1, 1)
    
    y_train = df_night_o3_curve["ozone [db]_x"].values
    y_mean = y_train.mean()
    yerr_train = df_night_o3_curve["ozone [db]_err_x"].values
    n = len(y_train)

    ard_poly = make_pipeline(
    PolynomialFeatures(degree=degree_max, include_bias=False),StandardScaler(),ARDRegression(),).fit(X_train, y_train)
    
    brr_poly = make_pipeline(
    PolynomialFeatures(degree=degree_max, include_bias=False),StandardScaler(),BayesianRidge(),).fit(X_train, y_train)

    y_ard, y_ard_std = ard_poly.predict(X_fit, return_std=True)
    y_brr, y_brr_std = brr_poly.predict(X_fit, return_std=True)

    y_ard_pred = ard_poly.predict(X_train,return_std=False)
    resy = y_train - y_ard_pred
    
    return xfit,y_ard, y_ard_std, y_brr, y_brr_std, resy, y_mean

In [None]:
# Container of images that all goes in a pdf
all_figs_to_pdf = []

In [None]:
all_dateObs_sel = {}

# loop on nights
for night in all_selected_nights:
    #select the night
    
    # Choose the kind of observation
    df_spec_night = df_spec_sel[df_spec_sel["nightObs"] == night]
    df_spec_night_wp = df_spec_wp[df_spec_wp["nightObs"] == night]
    df_spec_night_np = df_spec_np[df_spec_np["nightObs"] == night]
    df_spec_night_np_f = df_spec_np_f[df_spec_np_f["nightObs"] == night]
    df_spec_night_np_b = df_spec_np_b[df_spec_np_b["nightObs"] == night]

    
    #select the variables
    df_night_o3_curve = df_spec_night[["Time","ozone [db]_x","ozone [db]_err_x","ozone [db]_y","ozone [db]_err_y","dt_midnight","AIRMASS","TARGET"]]
    df_night_o3_curve_wp = df_spec_night_wp[["Time","ozone [db]_x","ozone [db]_err_x","ozone [db]_y","ozone [db]_err_y","dt_midnight","AIRMASS","TARGET"]]
    df_night_o3_curve_np = df_spec_night_np[["Time","ozone [db]_x","ozone [db]_err_x","ozone [db]_y","ozone [db]_err_y","dt_midnight","AIRMASS","TARGET"]]
    df_night_o3_curve_np_b = df_spec_night_np_b[["Time","ozone [db]_x","ozone [db]_err_x","ozone [db]_y","ozone [db]_err_y","dt_midnight","AIRMASS","TARGET"]]
    df_night_o3_curve_np_f = df_spec_night_np_f[["Time","ozone [db]_x","ozone [db]_err_x","ozone [db]_y","ozone [db]_err_y","dt_midnight","AIRMASS","TARGET"]]

    tmin = df_night_o3_curve["Time"].min()
    tmax = df_night_o3_curve["Time"].max()


    list_of_targets = df_night_o3_curve["TARGET"].unique()
    str_list_of_targets = "\n".join(list_of_targets)

    # convert in hours
    df_night_o3_curve["dt"] = (df_night_o3_curve["Time"] - tmin).dt.total_seconds()/3600.
    df_night_o3_curve_wp["dt"] = (df_night_o3_curve_wp["Time"] - tmin).dt.total_seconds()/3600.
    df_night_o3_curve_np["dt"] = (df_night_o3_curve_np["Time"] - tmin).dt.total_seconds()/3600.
    df_night_o3_curve_np_b["dt"] = (df_night_o3_curve_np_b["Time"] - tmin).dt.total_seconds()/3600.
    df_night_o3_curve_np_f["dt"] = (df_night_o3_curve_np_f["Time"] - tmin).dt.total_seconds()/3600.
    
    
    stat = df_night_o3_curve[["ozone [db]_x"]].describe()
    date_form = DateFormatter("%y-%m-%dT%H:%M")
    count = int(stat.loc["count"].values[0])
    mean = stat.loc["mean"].values[0]
    median = stat.loc["50%"].values[0]
    std = stat.loc["std"].values[0]
    textstr = "\n".join((f"count : {count}",
                     f"mean : {mean:.1f} mm",
                     f"median : {median:.1f} mm",
                     f"std : {std:.1f} mm",
                    ))


    

    N= len(df_night_o3_curve)

    if N>10:
        # does the fit
        
        #x,y,yerr,n,chi2dof,xfit,yfit,slope,slope_err = MakeLineFit(df_night_o3_curve)


        xfit, mean_prediction, std_prediction, gp = MakeGaussianProcessFit(df_night_o3_curve)
        xfit2, y_ard, y_ard_std, y_brr, y_brr_std, resy, y_mean = MakeLinearModelFit(df_night_o3_curve)
        
        #textstr2 = "\n".join((f"d(Ozone/dt) : ",
        #             f"slope : {slope:.3f} DU/h ",
        #             f"slope err : {slope_err:.3f} DU/h",
        #             f"chi2/ndeg : {chi2dof:.2f}",       
        #            ))

        #textstr3 = "\n".join((f"d(Ozone/dt) : ",
        #             f"slope : {slope:.3f} DU/h ",
        #             f"slope err : {slope_err:.3f} DU/h",  
        #            ))
    

        #stat.loc["slope","ozone [db]_x"] = slope 
        #stat.loc["slope_err","ozone [db]_err_x"] = slope_err 
        #stat.loc["chi2","ozone [db]_x"] = chi2dof
        
        # plot
        
        fig = plt.figure(figsize=(16,4),constrained_layout=True)
        gs = GridSpec(1, 2,  width_ratios=[1,1],figure=fig)

        # left figure
        ax = fig.add_subplot(gs[0])
        ax2 = fig.add_subplot(gs[1],sharey=ax)

        leg=ax.get_legend()
        leg2=ax2.get_legend()

        
        #ax2 = fig.add_subplot(gs[1])
        #ax.set_xlim(tmin,tmax)

        ################################
        # left figure : Gaussian Process
        ################################
        
        
        ax.errorbar(x=df_night_o3_curve.dt_midnight, y=df_night_o3_curve["ozone [db]_x"],yerr=df_night_o3_curve["ozone [db]_err_x"],fmt=".",color="k",ecolor="k",ms=1,label='all')
        df_night_o3_curve_wp.plot(x="dt_midnight",y="ozone [db]_x",c="cyan",ax=ax,marker='o',lw=0.0,grid=True,legend=leg2,label="polar",ms=6,alpha=1)
        df_night_o3_curve_np_b.plot(x="dt_midnight",y="ozone [db]_x",c="r",ax=ax,marker='o',lw=0.0,grid=True,legend=leg2,label="bright, no polar",ms=6,alpha=1)
        df_night_o3_curve_np_f.plot(x="dt_midnight",y="ozone [db]_x",c="g",ax=ax,marker='o',lw=0.0,grid=True,legend=leg2,label="faint, no polar",ms=6,alpha=1)


        # fit result
        ax.plot(xfit, mean_prediction,"b-",lw=3,label="Gaussian Process (RBF)")
        ax.fill_between(xfit,mean_prediction - 1.96 * std_prediction,mean_prediction + 1.96 * std_prediction,alpha=0.3,label=r"95% CL",facecolor='grey')
        
        ax.set_xlabel("hours")
        ax.legend(loc="upper right")
        ax.set_ylabel("ozone [db]_x")
        ax.set_title(f"GP fit for night {night}")
        ax.axvline(x=0,color="k",lw=2)

        
        
        ############################
        # right figure : linear fits
        ############################
        
        ax2.errorbar(x=df_night_o3_curve.dt_midnight, y=df_night_o3_curve["ozone [db]_x"],yerr=df_night_o3_curve["ozone [db]_err_x"],fmt=".",color="k",ecolor="k",ms=1)
        df_night_o3_curve_wp.plot(x="dt_midnight",y="ozone [db]_x",c="cyan",ax=ax2,marker='o',lw=0.0,grid=True,legend=leg,label="polar",ms=6,alpha=1)
        df_night_o3_curve_np_b.plot(x="dt_midnight",y="ozone [db]_x",c="r",ax=ax2,marker='o',lw=0.0,grid=True,legend=leg,label="bright, no polar",ms=6,alpha=1)
        df_night_o3_curve_np_f.plot(x="dt_midnight",y="ozone [db]_x",c="g",ax=ax2,marker='o',lw=0.0,grid=True,legend=leg,label="faint, no polar",ms=6,alpha=1)

        ax2.plot(xfit2, y_ard,"g-",lw=3,label="ARDRegression")
        ax2.fill_between(xfit2, y_ard - 1.96 * y_ard_std,y_ard + 1.96 * y_ard_std,alpha=0.3,label=r"95% CL",facecolor='green')
        
        
        ax2.set_title(f"Linear fit for night {night}")
        ax2.set_xlabel("hours")
        ax2.set_ylim(0.,600.)
        ax2.legend()
        ax2.axvline(x=0,color="k",lw=2)
        
        
        #ax2.set_xlabel("hours")
        #ax2.set_ylim(0.,600.)
        #ax2.legend(loc="upper right")
        #ax2.plot(xfit,yfit,"k-")
        #ax2.text(0.05, 0.95, textstr2, transform=ax2.transAxes, fontsize=14,verticalalignment='top', bbox=props)
        #ax2.set_title(f"night {night}")
        #ax2.axvline(x=0,color="k",lw=2)
        
        #plt.tight_layout()

        all_figs_to_pdf.append(fig) 

        figname =f"{pathfigs}/gp_ard_o3_per_night_{night}"+figtype
        
        plt.savefig(figname)
        plt.show()

        # add statistics
        all_dateObs_sel[night] = stat

## Plot night by night on single plot

In [None]:
all_o3 = []
all_sigmao3 = []

# loop on nights
for night in all_selected_nights:
    #select the night
    # Choose the kind of observation
    df_spec_night = df_spec_sel[df_spec_sel["nightObs"] == night]
    df_spec_night_wp = df_spec_wp[df_spec_wp["nightObs"] == night]
    df_spec_night_np = df_spec_np[df_spec_np["nightObs"] == night]
    df_spec_night_np_f = df_spec_np_f[df_spec_np_f["nightObs"] == night]
    df_spec_night_np_b = df_spec_np_b[df_spec_np_b["nightObs"] == night]





    #select the variables
    df_night_o3_curve = df_spec_night[["Time","ozone [db]_x","ozone [db]_err_x","ozone [db]_y","ozone [db]_err_y","dt_midnight","AIRMASS","TARGET"]]
    df_night_o3_curve_wp = df_spec_night_wp[["Time","ozone [db]_x","ozone [db]_err_x","ozone [db]_y","ozone [db]_err_y","dt_midnight","AIRMASS","TARGET"]]
    df_night_o3_curve_np = df_spec_night_np[["Time","ozone [db]_x","ozone [db]_err_x","ozone [db]_y","ozone [db]_err_y","dt_midnight","AIRMASS","TARGET"]]
    df_night_o3_curve_np_b = df_spec_night_np_b[["Time","ozone [db]_x","ozone [db]_err_x","ozone [db]_y","ozone [db]_err_y","dt_midnight","AIRMASS","TARGET"]]
    df_night_o3_curve_np_f = df_spec_night_np_f[["Time","ozone [db]_x","ozone [db]_err_x","ozone [db]_y","ozone [db]_err_y","dt_midnight","AIRMASS","TARGET"]]

    tmin = df_night_o3_curve["Time"].min()
    tmax = df_night_o3_curve["Time"].max()


    list_of_targets = df_night_o3_curve["TARGET"].unique()
    str_list_of_targets = "\n".join(list_of_targets)

    # convert in hours
    df_night_o3_curve["dt"] = (df_night_o3_curve["Time"] - tmin).dt.total_seconds()/3600.
    df_night_o3_curve_wp["dt"] = (df_night_o3_curve_wp["Time"] - tmin).dt.total_seconds()/3600.
    df_night_o3_curve_np["dt"] = (df_night_o3_curve_np["Time"] - tmin).dt.total_seconds()/3600.
    df_night_o3_curve_np_b["dt"] = (df_night_o3_curve_np_b["Time"] - tmin).dt.total_seconds()/3600.
    df_night_o3_curve_np_f["dt"] = (df_night_o3_curve_np_f["Time"] - tmin).dt.total_seconds()/3600.


    
    
    
    # extract statistics on pwv
    stat = df_night_o3_curve[["ozone [db]_x"]].describe()
    date_form = DateFormatter("%y-%m-%dT%H:%M")
    #date_form = DateFormatter("%y-%m-%dT%H")
    count = int(stat.loc["count"].values[0])
    mean = stat.loc["mean"].values[0]
    median = stat.loc["50%"].values[0]
    std = stat.loc["std"].values[0]
    textstr = "\n".join((f"count : {count}",
                     f"mean : {mean:.1f} mm",
                     f"median : {median:.1f} mm",
                     f"std : {std:.1f} mm",
                    ))


    N= len(df_night_o3_curve)

    if N>10:
        # does the fit
        
        #x,y,yerr,n,chi2dof,xfit,yfit,slope,slope_err = MakeLineFit(df_night_pwv_curve)
        xfit, y_ard, y_ard_std, y_brr, y_brr_std, resy, y_mean = MakeLinearModelFit(df_night_o3_curve)
            
        sigma = np.std(resy)
        all_o3.append(y_mean)
        all_sigmao3.append(sigma )

        
        #textstr2 = "\n".join((f"d(PWV/dt) : ",
        #             f"slope : {slope:.3f} mm/h",
        #             f"slope err : {slope_err:.3f} mm/h",
        #             f"chi2/ndeg : {chi2dof:.2f}",       
        #            ))

        #textstr3 = "\n".join((f"d(PWV/dt) : ",
        #             f"slope : {slope:.3f} mm/h",
        #             f"slope err : {slope_err:.3f} mm/h",  
        #            ))
    

        #stat.loc["slope","PWV [mm]_x"] = slope 
        #stat.loc["slope_err","PWV [mm]_x"] = slope_err 
        #stat.loc["chi2","PWV [mm]_x"] = chi2dof

        ##########
        # plot
        ############
        
        fig,ax  = plt.subplots(1,1,figsize=(10,4))
        leg=ax.get_legend()
        
        ax.errorbar(x=df_night_o3_curve.dt_midnight, y=df_night_o3_curve["ozone [db]_x"],yerr=df_night_o3_curve["ozone [db]_err_x"],fmt="+",color="r",ecolor="k",label="stat err",ms=1)
        df_night_o3_curve_wp.plot(x="dt_midnight",y="ozone [db]_x",c="cyan",ax=ax,marker='o',lw=0.0,grid=True,legend=leg,label="polar",ms=6,alpha=1)
        df_night_o3_curve_np_b.plot(x="dt_midnight",y="ozone [db]_x",c="r",ax=ax,marker='o',lw=0.0,grid=True,legend=leg,label="bright, no polar",ms=6,alpha=1)
        df_night_o3_curve_np_f.plot(x="dt_midnight",y="ozone [db]_x",c="g",ax=ax,marker='o',lw=0.0,grid=True,legend=leg,label="faint, no polar",ms=6,alpha=1)
        ax.set_xlabel("hours since midnight")
        ax.set_ylim(0.,600.)
        ax.set_ylabel("ozone [db]_x")
        ax.legend(loc="upper right")

        ax.plot(xfit, y_ard,"g-",lw=3,label="ARDRegression")
        ax.fill_between(xfit, y_ard - 1.96 * y_ard_std,y_ard + 1.96 * y_ard_std,alpha=0.3,label=r"95% CL",facecolor='green')

        #ax.plot(xfit2, y_brr,"r-",label="BayesianRidge")
        #ax.fill_between(xfit2, y_brr - 1.96 * y_brr_std,y_brr + 1.96 * y_brr_std,alpha=0.5,label=r"95% CL",facecolor='red')
      
       
        ax.text(0.8, 0.5, str_list_of_targets, transform=ax.transAxes, fontsize=12,verticalalignment='top', bbox=props,alpha=0.5)
        ax.set_title(f"night {night}")
        ax.axvline(x=0,color="k",lw=2)


        inset_ax = inset_axes(ax,width="20%", # width = 30% of parent_bbox
                                          height="40%", # height : 1 inch
                                          loc="upper left")
        
        textstr_sigma = f"$\sigma = $" +f"{sigma:.2f} mm"
        inset_ax.hist(resy,bins=30,facecolor="b")
        inset_ax.set_xlabel("residuals (DU)")
        inset_ax.text(0.1, 0.9, textstr_sigma , transform=inset_ax.transAxes, fontsize=8,verticalalignment='top', bbox=props,alpha=1)


        all_figs_to_pdf.append(fig) 
        
        plt.tight_layout()
        figname =f"{pathfigs}/ardfit_o3_per_night_{night}"+figtype
        plt.savefig(figname)
        plt.show()


### Version v2 for Blois Conference

In [None]:


# loop on nights
for night in all_selected_nights:
    #select the night
    
    # Choose the kind of observation
    df_spec_night = df_spec_sel[df_spec_sel["nightObs"] == night]
    df_spec_night_wp = df_spec_wp[df_spec_wp["nightObs"] == night]
    df_spec_night_np = df_spec_np[df_spec_np["nightObs"] == night]
    df_spec_night_np_f = df_spec_np_f[df_spec_np_f["nightObs"] == night]
    df_spec_night_np_b = df_spec_np_b[df_spec_np_b["nightObs"] == night]

    
    #select the variables
    df_night_o3_curve = df_spec_night[["Time","ozone [db]_x","ozone [db]_err_x","ozone [db]_y","ozone [db]_err_y","dt_midnight","AIRMASS","TARGET"]]
    df_night_o3_curve_wp = df_spec_night_wp[["Time","ozone [db]_x","ozone [db]_err_x","ozone [db]_y","ozone [db]_err_y","dt_midnight","AIRMASS","TARGET"]]
    df_night_o3_curve_np = df_spec_night_np[["Time","ozone [db]_x","ozone [db]_err_x","ozone [db]_y","ozone [db]_err_y","dt_midnight","AIRMASS","TARGET"]]
    df_night_o3_curve_np_b = df_spec_night_np_b[["Time","ozone [db]_x","ozone [db]_err_x","ozone [db]_y","ozone [db]_err_y","dt_midnight","AIRMASS","TARGET"]]
    df_night_o3_curve_np_f = df_spec_night_np_f[["Time","ozone [db]_x","ozone [db]_err_x","ozone [db]_y","ozone [db]_err_y","dt_midnight","AIRMASS","TARGET"]]

    tmin = df_night_o3_curve["Time"].min()
    tmax = df_night_o3_curve["Time"].max()


    list_of_targets = df_night_o3_curve["TARGET"].unique()
    str_list_of_targets = "\n".join(list_of_targets)
    str_list_of_targets = "\n".join(["targets:",str_list_of_targets])

    # convert in hours
    df_night_o3_curve["dt"] = (df_night_o3_curve["Time"] - tmin).dt.total_seconds()/3600.
    df_night_o3_curve_wp["dt"] = (df_night_o3_curve_wp["Time"] - tmin).dt.total_seconds()/3600.
    df_night_o3_curve_np["dt"] = (df_night_o3_curve_np["Time"] - tmin).dt.total_seconds()/3600.
    df_night_o3_curve_np_b["dt"] = (df_night_o3_curve_np_b["Time"] - tmin).dt.total_seconds()/3600.
    df_night_o3_curve_np_f["dt"] = (df_night_o3_curve_np_f["Time"] - tmin).dt.total_seconds()/3600.
    
    
    stat = df_night_o3_curve[["ozone [db]_x"]].describe()
    date_form = DateFormatter("%y-%m-%dT%H:%M")
    count = int(stat.loc["count"].values[0])
    mean = stat.loc["mean"].values[0]
    median = stat.loc["50%"].values[0]
    std = stat.loc["std"].values[0]
    textstr = "\n".join((f"count : {count}",
                     f"mean : {mean:.1f} mm",
                     f"median : {median:.1f} mm",
                     f"std : {std:.1f} mm",
                    ))


    

    N= len(df_night_o3_curve)

    if N>10:
        # does the fit
        
        x,y,yerr,n,chi2dof,xfit,yfit,slope,slope_err = MakeLineFit(df_night_o3_curve)
        
        textstr2 = "\n".join((f"d(Ozone/dt) : ",
                     f"slope : {slope:.3f} DU/h ",
                     f"slope err : {slope_err:.3f} DU/h",
                     f"chi2/ndeg : {chi2dof:.2f}",       
                    ))

        textstr3 = "\n".join((f"d(Ozone/dt) : ",
                     f"slope : {slope:.3f} DU/h ",
                     f"slope err : {slope_err:.3f} DU/h",  
                    ))
    

        stat.loc["slope","ozone [db]_x"] = slope 
        stat.loc["slope_err","ozone [db]_err_x"] = slope_err 
        stat.loc["chi2","ozone [db]_x"] = chi2dof
        
        # plot
        
        fig,ax  = plt.subplots(1,1,figsize=(16,4))
        leg=ax.get_legend()
       
        
        # figure
        ax.errorbar(x=df_night_o3_curve.dt_midnight, y=df_night_o3_curve["ozone [db]_x"],yerr=df_night_o3_curve["ozone [db]_err_x"],fmt=".",color="k",ecolor="k",ms=1,label="stat err")
        df_night_o3_curve_wp.plot(x="dt_midnight",y="ozone [db]_x",c="cyan",ax=ax,marker='o',lw=0.0,grid=True,legend=leg,label="polar",ms=6,alpha=1)
        df_night_o3_curve_np_b.plot(x="dt_midnight",y="ozone [db]_x",c="r",ax=ax,marker='o',lw=0.0,grid=True,legend=leg,label="bright, no polar",ms=6,alpha=1)
        df_night_o3_curve_np_f.plot(x="dt_midnight",y="ozone [db]_x",c="g",ax=ax,marker='o',lw=0.0,grid=True,legend=leg,label="faint, no polar",ms=6,alpha=1)
        
        ax.set_xlabel("hours since midnight")
        ax.set_ylim(0.,600.)
        ax.set_ylabel("Ozone (DU)")
        ax.legend(loc="upper right")
        ax.plot(xfit,yfit,"k:",lw=0.1)
        #ax.text(0.05, 0.95, textstr2, transform=ax.transAxes, fontsize=14,verticalalignment='top', bbox=props)
        ax.text(0.01, 0.95, str_list_of_targets, transform=ax.transAxes, fontsize=12,verticalalignment='top', bbox=props,alpha=0.5)
        ax.set_title(f"night {night}")
        ax.axvline(x=0,color="k",lw=2)
        
        plt.tight_layout()
        figname =f"{pathfigs}/o3_per_night_{night}_confblois24"+figtype
        plt.savefig(figname)
        plt.show()



In [None]:
fig,ax  = plt.subplots(1,1,figsize=(5,4))   
ax.scatter(all_o3,all_sigmao3,marker='o',c='b')
ax.set_xlabel("$\overline{O_3}$ (DU)")
ax.set_ylabel("$\sigma(O_3)$ (DU)")
ax.set_title("Variability of Ozone")
all_figs_to_pdf.append(fig)
plt.show()

## Save all figs in a pdf file

In [None]:
from matplotlib.backends.backend_pdf import PdfPages
pdf_filename = f"holo_O3_night_fitvariation_polydeg_{POLY_DEGREE_MAX}_tight.pdf"
pdf_fullfilename = os.path.join(pathfigs,pdf_filename)
with PdfPages(pdf_fullfilename) as pdf:
    for fig in all_figs_to_pdf:
        pdf.savefig(fig, bbox_inches='tight') 

In [None]:
pdf_filename = f"holo_O3_night_fitvariation_polydeg_{POLY_DEGREE_MAX}_notight.pdf"
pdf_fullfilename = os.path.join(pathfigs,pdf_filename)
with PdfPages(pdf_fullfilename) as pdf:
    for fig in all_figs_to_pdf:
        pdf.savefig(fig) 

In [None]:
len(all_figs_to_pdf) 