# Convert H5 file to pandas

Convert hdf5 file produced by CheckForSpectraction notebook into a pandas dataframe.
Note only attributes are written, not datasets.

work with Weakly_2023_11
- use jupyter kernel LSST
- author : Sylvie Dagoret-Campagne
- affiliation : IJCLab
- creation date : 2023/04/01
- last update : 2023/04/07


In [1]:
import h5py

In [2]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib.colors import LogNorm
import pandas as pd

In [3]:
import matplotlib.ticker                         # here's where the formatter is
import os
import re
import pandas as pd

plt.rcParams["figure.figsize"] = (14,6)
plt.rcParams["axes.labelsize"] = 'xx-large'
plt.rcParams['axes.titlesize'] = 'xx-large'
plt.rcParams['xtick.labelsize']= 'xx-large'
plt.rcParams['ytick.labelsize']= 'xx-large'

In [4]:
from astropy.time import Time
import astropy.units as u

In [5]:
def GetColumnHfData(hf,nameval):
    
    list_of_keys = list(hf.keys())
    all_data = []
    for key in list_of_keys:
        group=hf.get(key)
        val=group.attrs[nameval]
        all_data.append(val)
    return all_data

In [1]:
DateToRuncollection = {
    
  # september 2022
 20220912 :  "u/dagoret/BPS_manyspectro_v60", # Missing collection
 20220913 :  "u/dagoret/BPS_manyspectro_v61", # Missing collection
 20220914 :  "u/dagoret/BPS_manyspectro_v62", # Missing collection  
    
 20220927 :  "u/dagoret/BPS_manyspectro_v63", # Mount errors available, mitigé
 20220928 :  "u/dagoret/BPS_manyspectro_v64", # Mount errors available, very nice spectra
 20220929 :  "u/dagoret/BPS_manyspectro_v65", # Mount errors available, very nice spectra
    
    
 # october 2022
 20221012 :  "u/dagoret/BPS_manyspectro_v56", # mount errors , nice, all good
 20221025 :  "u/dagoret/BPS_manyspectro_v57", # mount errors, very very bad spectra, run not selected
 20221026 :  "u/dagoret/BPS_manyspectro_v58", # mount errors,  very nice spectra, all good
 20221027 :  "u/dagoret/BPS_manyspectro_v59", # mount errors,  nice spectra, all good
    
 # november 2022
 20221109 :  "u/dagoret/BPS_manyspectro_v53", # mount errors, nice spectra, few bad labelled as good 
 20221110 :  "u/dagoret/BPS_manyspectro_v54", # mount errors available, many bad rec spectra, few bad labelled as good, versy strange
 20221123 :  "u/dagoret/BPS_manyspectro_v55", # mount errors , nice   
    
 # december 2022   
 20221207 :  "u/dagoret/BPS_manyspectro_v48", # No mount error, nice spectra
 20221208 :  "u/dagoret/BPS_manyspectro_v49", # No mount error, nice spectra, one bad labelled as good
 20221209 :  "u/dagoret/BPS_manyspectro_v50", # No mount error, spectra at distance from optical center, very interesting
 20221210 :  "u/dagoret/BPS_manyspectro_v51", # No mount error, bof
 20221212 :  "u/dagoret/BPS_manyspectro_v52", # No mount error, very bad gains and instability 
    
 #January 2023 
 20230117 :  "u/dagoret/BPS_manyspectro_v79", # no mount errors, some mucol spectra that looks good are rejected on DCCD
 20230118 :  "u/dagoret/BPS_manyspectro_v80", # no mount error, some mucol looking good are rejected, many bad centroid calculation
 20230119 :  "u/dagoret/BPS_manyspectro_v77", # no mount errors, many blurred spectra probably due to strong tracking error
 20230131 :  "u/dagoret/BPS_manyspectro_v78", # missing for moment

 #february 2023
 20230214 :  "u/dagoret/BPS_manyspectro_v42", # no mount errors, nothing special
 20230215 :  "u/dagoret/BPS_manyspectro_v43", # mount errors, OK
 20230216 :  "u/dagoret/BPS_manyspectro_v44", # mount errors, OK, nice spectra 
 20230228 :  "u/dagoret/BPS_manyspectro_v70", # No mount error,OK nice spectra
    
 #march 2023  
 20230301 :  "u/dagoret/BPS_manyspectro_v71", # no mount errors, OK many spectra
 20230302 :  "u/dagoret/BPS_manyspectro_v72", # no mount error, sometime bad subtraction of order2 on HD38666 (mucol) 
 20230315 :  "u/dagoret/BPS_manyspectro_v73", # mount errors
 20230316 :  "u/dagoret/BPS_manyspectro_v74", # mount errors 
       
    
}

In [7]:
DATE = 20220927
my_collection = DateToRuncollection[DATE]
filterdispersername = "empty~holo4_003"

In [8]:
configmode = "PSF2DFFM_REBIN2"
specver="specV2.4"

In [9]:
output_summary_file = f"fullsummaryspectra_{DATE}-{filterdispersername}-{configmode}-{specver}-oga.csv"
input_file_h5 = f"allspectra_{DATE}-{filterdispersername}-{configmode}-{specver}-oga.hdf5"

In [10]:
input_file_h5

'allspectra_20220927-empty~holo4_003-PSF2DFFM_REBIN2-specV2.4-oga.hdf5'

In [11]:
path_in="/sdf/home/d/dagoret/rubin-user/ExtractedSpectra/2023-04"

In [12]:
input_fullfilename_h5 = os.path.join(path_in,input_file_h5)
input_fullfilename_h5

'/sdf/home/d/dagoret/rubin-user/ExtractedSpectra/2023-04/allspectra_20220927-empty~holo4_003-PSF2DFFM_REBIN2-specV2.4-oga.hdf5'

In [13]:
hf =  h5py.File(input_file_h5, 'r') 
hf.keys()

<KeysViewHDF5 ['2022092700534', '2022092700535', '2022092700546', '2022092700547', '2022092700557', '2022092700558', '2022092700568', '2022092700569', '2022092700579', '2022092700580', '2022092700591', '2022092700632', '2022092700633', '2022092700643', '2022092700644', '2022092700657', '2022092700658', '2022092700668', '2022092700669', '2022092700679', '2022092700680', '2022092700695', '2022092700696', '2022092700706', '2022092700707', '2022092700723', '2022092700724', '2022092700734', '2022092700735', '2022092700745', '2022092700746', '2022092700756', '2022092700757', '2022092700767', '2022092700768', '2022092700778', '2022092700779']>

In [14]:
key_sel = list(hf.keys())[0]
print(key_sel)

2022092700534


In [15]:
group = hf.get(key_sel)

In [16]:
for k in group.attrs.keys():
    print('{} => {}'.format(k, group.attrs[k]))

exposure => 2022092700534
index => 0
me_az_rms => 0.0116227237687181
me_el_rms => 0.0748375611447573
me_image_az_rms => 0.0097448822294612
me_image_el_rms => 0.0748375611447573
me_image_rot_rms => 9.01926937878896e-05
me_rot_rms => 0.0664413518180812
spec_airmass => 1.83073179532443
spec_camera_angle => -87.540586589839
spec_date_obs => 2022-09-28T05:17:00.004501045Z
spec_dec => -65.9990136019746
spec_filter_label => empty
spec_header_a2_fit => 1.0
spec_header_airmass => 1.83073179532443
spec_header_am_fit => 1.83073179532443
spec_header_cam_rot => -87.540586589839
spec_header_chi2_fit => 4.934931659174201
spec_header_d2ccd => 187.6714457704309
spec_header_date-obs => 2022-09-28T05:17:00.004501045Z
spec_header_dec => -65.9990136019746
spec_header_exptime => 30.0
spec_header_filter => empty
spec_header_grating => holo4_003
spec_header_ha => -76.3653713882986
spec_header_lbda_ref => 513.3021144256002
spec_header_lshift => 0.0
spec_header_meanfwhm => 9.5284760954123
spec_header_outhum => 

In [17]:
all_subgroup_keys = []
for k in group.attrs.keys():
    all_subgroup_keys.append(k)  

In [18]:
df = pd.DataFrame()

In [19]:
for key in all_subgroup_keys:
    print(key)
    arr=GetColumnHfData(hf,key)
    df[key] = arr
    

exposure
index
me_az_rms
me_el_rms
me_image_az_rms
me_image_el_rms
me_image_rot_rms
me_rot_rms
spec_airmass
spec_camera_angle
spec_date_obs
spec_dec
spec_filter_label
spec_header_a2_fit
spec_header_airmass
spec_header_am_fit
spec_header_cam_rot
spec_header_chi2_fit
spec_header_d2ccd
spec_header_date-obs
spec_header_dec
spec_header_exptime
spec_header_filter
spec_header_grating
spec_header_ha
spec_header_lbda_ref
spec_header_lshift
spec_header_meanfwhm
spec_header_outhum
spec_header_outpress
spec_header_outtemp
spec_header_parangle
spec_header_pixshift
spec_header_psf_reg
spec_header_rebin
spec_header_redshift
spec_header_rotangle
spec_header_s_dec
spec_header_s_nx
spec_header_s_ny
spec_header_s_sat
spec_header_s_x0
spec_header_s_xmax
spec_header_s_xmin
spec_header_s_y0
spec_header_s_ymax
spec_header_s_ymin
spec_header_target
spec_header_targetx
spec_header_targety
spec_header_trace_r
spec_header_version
spec_hour_angle
spec_humidity
spec_order
spec_parallactic_angle
spec_pressure
spec_

In [20]:
#list(group.items())

In [21]:
df.columns

Index(['exposure', 'index', 'me_az_rms', 'me_el_rms', 'me_image_az_rms',
       'me_image_el_rms', 'me_image_rot_rms', 'me_rot_rms', 'spec_airmass',
       'spec_camera_angle', 'spec_date_obs', 'spec_dec', 'spec_filter_label',
       'spec_header_a2_fit', 'spec_header_airmass', 'spec_header_am_fit',
       'spec_header_cam_rot', 'spec_header_chi2_fit', 'spec_header_d2ccd',
       'spec_header_date-obs', 'spec_header_dec', 'spec_header_exptime',
       'spec_header_filter', 'spec_header_grating', 'spec_header_ha',
       'spec_header_lbda_ref', 'spec_header_lshift', 'spec_header_meanfwhm',
       'spec_header_outhum', 'spec_header_outpress', 'spec_header_outtemp',
       'spec_header_parangle', 'spec_header_pixshift', 'spec_header_psf_reg',
       'spec_header_rebin', 'spec_header_redshift', 'spec_header_rotangle',
       'spec_header_s_dec', 'spec_header_s_nx', 'spec_header_s_ny',
       'spec_header_s_sat', 'spec_header_s_x0', 'spec_header_s_xmax',
       'spec_header_s_xmin', 'spec_h

In [22]:
if 'me_az_rms' in all_subgroup_keys:
    df = df.reindex(columns=['index',
                         'exposure',
                         'spec_date_obs',
                         'spec_target_label','spec_filter_label',
                         'spec_airmass',
                         'spec_pressure',
                         'spec_temperature',
                         'spec_humidity',
                         'spec_hour_angle',
                         'spec_parallactic_angle',
                         'spec_camera_angle',
                         'spec_order',
                         "spec_header_target",
                         "spec_header_redshift",
                         "spec_header_grating",
                         "spec_header_rotangle",
                         "spec_header_d2ccd",
                         "spec_header_lshift",
                         "spec_header_parangle",
                         "spec_header_targetx",
                         "spec_header_targety",
                         "spec_header_lbda_ref",
                         "spec_header_pixshift",
                         "spec_header_psf_reg",
                         "spec_header_trace_r",  
                         "spec_header_chi2_fit", 
                         "spec_header_a2_fit",
                         "spec_header_am_fit",
                         "spec_header_meanfwhm",
                         "spec_header_version",
                         "spec_header_rebin",
                         "spec_header_date-obs",
                         "spec_header_exptime",
                         "spec_header_airmass",
                         "spec_header_dec",
                         "spec_header_ha",
                         "spec_header_outtemp",
                         "spec_header_outpress",
                         "spec_header_outhum",
                         "spec_header_filter",
                         "spec_header_cam_rot",
                         "spec_header_s_x0",
                         "spec_header_s_y0",
                         "spec_header_s_xmin",    
                         "spec_header_s_xmax",                                             
                         "spec_header_s_ymin",    
                         "spec_header_s_ymax",                                                                                                                
                         "spec_header_s_nx",
                         "spec_header_s_ny",
                         "spec_header_s_dec",
                         "spec_header_s_sat",
                         "spec_spectrogram_x0",
                         "spec_spectrogram_y0",
                         "spec_spectrogram_xmin",
                         "spec_spectrogram_xmax",
                         "spec_spectrogram_ymin",
                         "spec_spectrogram_ymax",
                         "spec_spectrogram_deg",
                         "spec_spectrogram_saturation",    
                         "spec_spectrogram_Nx",
                         "spec_spectrogram_Ny",
                         "me_az_rms",
                         "me_el_rms",
                         "me_rot_rms",
                         "me_image_az_rms",
                         "me_image_el_rms",
                         "me_image_rot_rms",
                        ])
else:
    df = df.reindex(columns=['index',
                         'exposure',
                         'spec_date_obs',
                         'spec_target_label','spec_filter_label',
                         'spec_airmass',
                         'spec_pressure',
                         'spec_temperature',
                         'spec_humidity',
                         'spec_hour_angle',
                         'spec_parallactic_angle',
                         'spec_camera_angle',
                         'spec_order',
                         "spec_header_target",
                         "spec_header_redshift",
                         "spec_header_grating",
                         "spec_header_rotangle",
                         "spec_header_d2ccd",
                         "spec_header_lshift",
                         "spec_header_parangle",
                         "spec_header_targetx",
                         "spec_header_targety",
                         "spec_header_lbda_ref",
                         "spec_header_pixshift",
                         "spec_header_psf_reg",
                         "spec_header_trace_r",  
                         "spec_header_chi2_fit", 
                         "spec_header_a2_fit",
                         "spec_header_am_fit",
                         "spec_header_meanfwhm",
                         "spec_header_version",
                         "spec_header_rebin",
                         "spec_header_date-obs",
                         "spec_header_exptime",
                         "spec_header_airmass",
                         "spec_header_dec",
                         "spec_header_ha",
                         "spec_header_outtemp",
                         "spec_header_outpress",
                         "spec_header_outhum",
                         "spec_header_filter",
                         "spec_header_cam_rot",
                         "spec_header_s_x0",
                         "spec_header_s_y0",
                         "spec_header_s_xmin",    
                         "spec_header_s_xmax",                                             
                         "spec_header_s_ymin",    
                         "spec_header_s_ymax",                                                                                                                
                         "spec_header_s_nx",
                         "spec_header_s_ny",
                         "spec_header_s_dec",
                         "spec_header_s_sat",
                         "spec_spectrogram_x0",
                         "spec_spectrogram_y0",
                         "spec_spectrogram_xmin",
                         "spec_spectrogram_xmax",
                         "spec_spectrogram_ymin",
                         "spec_spectrogram_ymax",
                         "spec_spectrogram_deg",
                         "spec_spectrogram_saturation",    
                         "spec_spectrogram_Nx",
                         "spec_spectrogram_Ny",
                        ])
        

In [23]:
pd.options.display.max_columns = None

In [24]:
df

Unnamed: 0,index,exposure,spec_date_obs,spec_target_label,spec_filter_label,spec_airmass,spec_pressure,spec_temperature,spec_humidity,spec_hour_angle,spec_parallactic_angle,spec_camera_angle,spec_order,spec_header_target,spec_header_redshift,spec_header_grating,spec_header_rotangle,spec_header_d2ccd,spec_header_lshift,spec_header_parangle,spec_header_targetx,spec_header_targety,spec_header_lbda_ref,spec_header_pixshift,spec_header_psf_reg,spec_header_trace_r,spec_header_chi2_fit,spec_header_a2_fit,spec_header_am_fit,spec_header_meanfwhm,spec_header_version,spec_header_rebin,spec_header_date-obs,spec_header_exptime,spec_header_airmass,spec_header_dec,spec_header_ha,spec_header_outtemp,spec_header_outpress,spec_header_outhum,spec_header_filter,spec_header_cam_rot,spec_header_s_x0,spec_header_s_y0,spec_header_s_xmin,spec_header_s_xmax,spec_header_s_ymin,spec_header_s_ymax,spec_header_s_nx,spec_header_s_ny,spec_header_s_dec,spec_header_s_sat,spec_spectrogram_x0,spec_spectrogram_y0,spec_spectrogram_xmin,spec_spectrogram_xmax,spec_spectrogram_ymin,spec_spectrogram_ymax,spec_spectrogram_deg,spec_spectrogram_saturation,spec_spectrogram_Nx,spec_spectrogram_Ny,me_az_rms,me_el_rms,me_rot_rms,me_image_az_rms,me_image_el_rms,me_image_rot_rms
0,0,2022092700534,2022-09-28T05:17:00.004501045Z,HD42525,empty,1.830732,0,0,0,-76.365371,-91.28576,-87.540587,1,HD42525,5.9e-05,holo4_003,0.415308,187.671446,0.0,-91.28576,148.614527,860.499625,513.302114,-0.842913,0.002904023,286.579992,4.934932,1.0,1.830732,9.528476,2.4,2,2022-09-28T05:17:00.004501045Z,30.0,1.830732,-65.999014,-76.365371,0,0,0,empty,-87.540587,-418.892308,84.499625,569,1705,776,961,1136,185,2,22666.666667,-418.892308,84.499625,569,1705,776,961,2,22666.666667,1136,185,0.011623,0.074838,0.066441,0.009745,0.074838,9e-05
1,1,2022092700535,2022-09-28T05:17:42.818994862Z,HD42525,empty,1.827189,0,0,0,-76.186504,-91.123347,-87.540459,1,HD42525,5.9e-05,holo4_003,0.407001,185.958512,0.0,-91.123347,150.480563,860.278187,513.302114,-1.2e-05,7.120328e-11,678.805452,7.155884,1.0,1.827189,10.671866,2.4,2,2022-09-28T05:17:42.818994862Z,30.0,1.827189,-65.999015,-76.186504,0,0,0,empty,-87.540459,-417.788241,84.278187,570,1707,776,961,1137,185,2,22666.666667,-417.788241,84.278187,570,1707,776,961,2,22666.666667,1137,185,0.011416,0.069099,0.058589,0.009564,0.069099,8e-05
2,2,2022092700546,2022-09-28T05:24:46.622992156Z,HD185975,empty,1.922195,0,0,0,-290.190975,71.199845,109.941048,1,HD185975,-6.5e-05,holo4_003,0.470658,187.397811,0.0,71.199845,162.275634,866.168447,597.257159,0.054723,0.3732502,182.929672,3.569781,1.0,1.922195,10.159259,2.4,2,2022-09-28T05:24:46.622992156Z,30.0,1.922195,-87.511618,-290.190975,0,0,0,empty,109.941048,-417.95365,84.168447,580,1716,782,967,1136,185,2,22666.666667,-417.95365,84.168447,580,1716,782,967,2,22666.666667,1136,185,0.011382,0.010147,0.084177,0.009729,0.010147,0.000114
3,3,2022092700547,2022-09-28T05:25:29.532001588Z,HD185975,empty,1.922606,0,0,0,-290.011746,71.380416,109.941229,1,HD185975,-6.5e-05,holo4_003,0.471939,187.293808,0.0,71.380416,164.222208,867.720241,597.257159,0.103227,0.210287,186.149275,2.544129,1.0,1.922606,10.480197,2.4,2,2022-09-28T05:25:29.532001588Z,30.0,1.922606,-87.511608,-290.011746,0,0,0,empty,109.941229,-418.036611,83.720241,582,1718,784,969,1136,185,2,22666.666667,-418.036611,83.720241,582,1718,784,969,2,22666.666667,1136,185,0.008143,0.012577,0.141199,0.006961,0.012577,0.000192
4,4,2022092700557,2022-09-28T05:32:08.277495328Z,HD42525,empty,1.759066,0,0,0,-72.576395,-87.815261,268.965305,1,HD42525,5.9e-05,holo4_003,0.39225,192.322265,0.0,-87.815261,161.961157,859.476573,513.302114,-0.000883,4.976655e-11,740.076076,17.053464,1.0,1.759066,10.992531,2.4,2,2022-09-28T05:32:08.277495328Z,30.0,1.759066,-66.000288,-72.576395,0,0,0,empty,268.965305,-417.924744,84.476573,578,1715,775,960,1137,185,2,22666.666667,-417.924744,84.476573,578,1715,775,960,2,22666.666667,1137,185,0.024802,0.102973,0.049059,0.020424,0.102973,6.7e-05
5,5,2022092700558,2022-09-28T05:32:51.092503150Z,HD42525,empty,1.755855,0,0,0,-72.3974,-87.64976,268.965287,1,HD42525,5.9e-05,holo4_003,0.390367,186.55075,0.0,-87.64976,158.753465,860.217397,513.302114,-0.006924,4.143573e-11,791.449252,8.270089,1.0,1.755855,10.818394,2.4,2,2022-09-28T05:32:51.092503150Z,30.0,1.755855,-66.000294,-72.3974,0,0,0,empty,268.965287,-417.585346,84.217397,578,1715,776,961,1137,185,2,22666.666667,-417.585346,84.217397,578,1715,776,961,2,22666.666667,1137,185,0.018448,0.134302,0.066974,0.015178,0.134302,9.1e-05
6,6,2022092700568,2022-09-28T05:39:39.890492431Z,HD031128,empty,1.424795,0,0,0,-52.172476,-106.949346,-72.815335,1,HD031128,0.000373,holo4_003,0.350195,187.442959,0.0,-106.949346,151.966592,867.430052,572.93915,-0.046691,0.1427866,223.423301,1.700667,1.0,1.424795,7.26794,2.4,2,2022-09-28T05:39:39.890492431Z,30.0,1.424795,-27.025458,-52.172476,0,0,0,empty,-72.815335,-417.851848,86.430052,570,1707,781,966,1137,185,2,22666.666667,-417.851848,86.430052,570,1707,781,966,2,22666.666667,1137,185,0.099051,0.108836,0.114324,0.070631,0.108836,0.000155
7,7,2022092700569,2022-09-28T05:40:22.798504495Z,HD031128,empty,1.420954,0,0,0,-51.99322,-106.913753,-72.81535,1,HD031128,0.000373,holo4_003,0.352368,187.358956,0.0,-106.913753,152.504953,866.880471,572.93915,-0.155003,0.1948442,199.931906,1.864317,1.0,1.420954,8.30979,2.4,2,2022-09-28T05:40:22.798504495Z,30.0,1.420954,-27.025407,-51.99322,0,0,0,empty,-72.81535,-417.174254,85.880471,570,1707,781,966,1137,185,2,22666.666667,-417.174254,85.880471,570,1707,781,966,2,22666.666667,1137,185,0.115297,0.119144,0.122171,0.081999,0.119144,0.000166
8,8,2022092700579,2022-09-28T05:47:11.873491883Z,HD42525,empty,1.694493,0,0,0,-68.807893,-84.298218,265.503651,1,HD42525,5.9e-05,holo4_003,0.376279,187.667225,0.0,-84.298218,154.044808,863.76256,513.302114,-0.655147,0.008484476,309.179372,8.338628,1.0,1.694493,7.76317,2.4,2,2022-09-28T05:47:11.873491883Z,30.0,1.694493,-66.000018,-68.807893,0,0,0,empty,265.503651,-419.532698,84.76256,577,1713,779,964,1136,185,2,22666.666667,-419.532698,84.76256,577,1713,779,964,2,22666.666667,1136,185,0.022407,0.063109,0.063887,0.018106,0.063109,8.7e-05
9,9,2022092700580,2022-09-28T05:47:55.068504865Z,HD42525,empty,1.691526,0,0,0,-68.627626,-84.128162,265.503809,1,HD42525,5.9e-05,holo4_003,0.38201,187.6459,0.0,-84.128162,154.536727,862.44448,513.302114,-0.669229,0.00107044,337.495804,8.531937,1.0,1.691526,8.085193,2.4,2,2022-09-28T05:47:55.068504865Z,30.0,1.691526,-66.000023,-68.627626,0,0,0,empty,265.503809,-417.687503,84.44448,575,1712,778,963,1137,185,2,22666.666667,-417.687503,84.44448,575,1712,778,963,2,22666.666667,1137,185,0.027174,0.058432,0.058566,0.021938,0.058432,8e-05


In [25]:
df.to_csv(output_summary_file)

In [26]:
hf.close() 