# Convert H5 file to pandas

Convert hdf5 file produced by CheckForSpectraction notebook into a pandas dataframe.
Note only attributes are written, not datasets.

work with Weakly_2023_11
- use jupyter kernel LSST
- author : Sylvie Dagoret-Campagne
- affiliation : IJCLab
- creation date : 2023/04/01
- last update : 2023/04/01


In [1]:
import h5py

In [2]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib.colors import LogNorm
import pandas as pd

In [3]:
import matplotlib.ticker                         # here's where the formatter is
import os
import re
import pandas as pd

plt.rcParams["figure.figsize"] = (14,6)
plt.rcParams["axes.labelsize"] = 'xx-large'
plt.rcParams['axes.titlesize'] = 'xx-large'
plt.rcParams['xtick.labelsize']= 'xx-large'
plt.rcParams['ytick.labelsize']= 'xx-large'

In [4]:
from astropy.time import Time
import astropy.units as u

In [5]:
def GetColumnHfData(hf,nameval):
    
    list_of_keys = list(hf.keys())
    all_data = []
    for key in list_of_keys:
        group=hf.get(key)
        val=group.attrs[nameval]
        all_data.append(val)
    return all_data

In [6]:
DateToRuncollection = {

 # september 2022
 20220912 :  "u/dagoret/BPS_manyspectro_v60", # Missing collection
 20220913 :  "u/dagoret/BPS_manyspectro_v61", # Missing collection
 20220914 :  "u/dagoret/BPS_manyspectro_v62", # Missing collection  
 20220927 :  "u/dagoret/BPS_manyspectro_v63", # Mount errors available, mitigé
 20220928 :  "u/dagoret/BPS_manyspectro_v64", # Mount errors available, very nice spectra
 20220929 :  "u/dagoret/BPS_manyspectro_v65", # Mount errors available, very nice spectra
    
    
    
 # october 2022
 20221012 :  "u/dagoret/BPS_manyspectro_v56", # mount errors , nice
 20221025 :  "u/dagoret/BPS_manyspectro_v57", # mount errors, very bad spectra
 20221026 :  "u/dagoret/BPS_manyspectro_v58", # mount errors,  nice spectra
 20221027 :  "u/dagoret/BPS_manyspectro_v59", # mount errors,  nice spectra
    
 # november 2022
 20221109 :  "u/dagoret/BPS_manyspectro_v53", # nice
 20221110 :  "u/dagoret/BPS_manyspectro_v54", # mount errors available, many bad rec spectra
 20221123 :  "u/dagoret/BPS_manyspectro_v55", # mount errors , nice   
    
 # december 2022   
 20221207 :  "u/dagoret/BPS_manyspectro_v48",
 20221208 :  "u/dagoret/BPS_manyspectro_v49",
 20221209 :  "u/dagoret/BPS_manyspectro_v50", 
 20221210 :  "u/dagoret/BPS_manyspectro_v51", 
 20221212 :  "u/dagoret/BPS_manyspectro_v52",    
    
 #January 2023
 20230117 :  "u/dagoret/BPS_manyspectro_v79", # no mount errors
 20230118 :  "u/dagoret/BPS_manyspectro_v80", # no mount
 20230119 :  "u/dagoret/BPS_manyspectro_v77", # missing for moment
 20230131 :  "u/dagoret/BPS_manyspectro_v78", # missing for moment


 # february 2023
 20230214 :  "u/dagoret/BPS_manyspectro_v42",
 20230215 :  "u/dagoret/BPS_manyspectro_v43", # mount errors
 20230216 :  "u/dagoret/BPS_manyspectro_v44", # mount errors 
 20230228 :  "u/dagoret/BPS_manyspectro_v70",   
 
 # march 2023  
 20230301 :  "u/dagoret/BPS_manyspectro_v71", 
 20230302 :  "u/dagoret/BPS_manyspectro_v72", 
 20230315 :  "u/dagoret/BPS_manyspectro_v73", 
 20230316 :  "u/dagoret/BPS_manyspectro_v74",    
    
}

In [7]:
DATE = 20230118
my_collection = DateToRuncollection[DATE]
filterdispersername = "empty~holo4_003"

In [8]:
configmode = "PSF2DFFM_REBIN2"
specver="specV2.4"

In [9]:
output_summary_file = f"fullsummaryspectra_{DATE}-{filterdispersername}-{configmode}-{specver}-oga.csv"
input_file_h5 = f"allspectra_{DATE}-{filterdispersername}-{configmode}-{specver}-oga.hdf5"

In [10]:
input_file_h5

'allspectra_20230118-empty~holo4_003-PSF2DFFM_REBIN2-specV2.4-oga.hdf5'

In [11]:
path_in="/sdf/home/d/dagoret/rubin-user/ExtractedSpectra/2023-04"

In [12]:
input_fullfilename_h5 = os.path.join(path_in,input_file_h5)
input_fullfilename_h5

'/sdf/home/d/dagoret/rubin-user/ExtractedSpectra/2023-04/allspectra_20230118-empty~holo4_003-PSF2DFFM_REBIN2-specV2.4-oga.hdf5'

In [13]:
hf =  h5py.File(input_file_h5, 'r') 
hf.keys()

<KeysViewHDF5 ['2023011800207', '2023011800208', '2023011800217', '2023011800226', '2023011800227', '2023011800248', '2023011800249', '2023011800263', '2023011800264', '2023011800273', '2023011800274', '2023011800287', '2023011800288', '2023011800299', '2023011800300', '2023011800312', '2023011800313', '2023011800321', '2023011800322', '2023011800337', '2023011800338', '2023011800346', '2023011800347', '2023011800355', '2023011800356', '2023011800374', '2023011800375', '2023011800395', '2023011800396', '2023011800407', '2023011800408', '2023011800419', '2023011800420', '2023011800431', '2023011800432', '2023011800444', '2023011800445', '2023011800454', '2023011800455', '2023011800463', '2023011800464', '2023011800472', '2023011800473', '2023011800482', '2023011800501', '2023011800539', '2023011800540', '2023011800554', '2023011800555', '2023011800563', '2023011800564', '2023011800572', '2023011800573', '2023011800581', '2023011800582', '2023011800590', '2023011800591', '2023011800599',

In [14]:
key_sel = list(hf.keys())[0]
print(key_sel)

2023011800207


In [15]:
group = hf.get(key_sel)

In [16]:
for k in group.attrs.keys():
    print('{} => {}'.format(k, group.attrs[k]))

exposure => 2023011800207
index => 0
spec_airmass => 1.08330093396988
spec_camera_angle => -87.941141624142
spec_date_obs => 2023-01-19T00:51:33.404506808Z
spec_dec => -32.2615890266184
spec_filter_label => empty
spec_header_a2_fit => 1.0
spec_header_airmass => 1.08330093396988
spec_header_am_fit => 1.08330093396988
spec_header_cam_rot => -87.941141624142
spec_header_chi2_fit => 45.96642258629928
spec_header_d2ccd => 185.0387893701667
spec_header_date-obs => 2023-01-19T00:51:33.404506808Z
spec_header_dec => -32.2615890266184
spec_header_exptime => 30.0
spec_header_filter => empty
spec_header_grating => holo4_003
spec_header_ha => -26.49311361882009
spec_header_lbda_ref => 550
spec_header_lshift => 0.0
spec_header_meanfwhm => 4.382310394585121
spec_header_outhum => 0
spec_header_outpress => 0
spec_header_outtemp => 0
spec_header_parangle => -91.9660585382704
spec_header_pixshift => -1.999999999999746
spec_header_psf_reg => 0.06153539603306835
spec_header_rebin => 2
spec_header_redshift 

In [17]:
all_subgroup_keys = []
for k in group.attrs.keys():
    all_subgroup_keys.append(k)  

In [18]:
df = pd.DataFrame()

In [19]:
for key in all_subgroup_keys:
    print(key)
    arr=GetColumnHfData(hf,key)
    df[key] = arr
    

exposure
index
spec_airmass
spec_camera_angle
spec_date_obs
spec_dec
spec_filter_label
spec_header_a2_fit
spec_header_airmass
spec_header_am_fit
spec_header_cam_rot
spec_header_chi2_fit
spec_header_d2ccd
spec_header_date-obs
spec_header_dec
spec_header_exptime
spec_header_filter
spec_header_grating
spec_header_ha
spec_header_lbda_ref
spec_header_lshift
spec_header_meanfwhm
spec_header_outhum
spec_header_outpress
spec_header_outtemp
spec_header_parangle
spec_header_pixshift
spec_header_psf_reg
spec_header_rebin
spec_header_redshift
spec_header_rotangle
spec_header_s_dec
spec_header_s_nx
spec_header_s_ny
spec_header_s_sat
spec_header_s_x0
spec_header_s_xmax
spec_header_s_xmin
spec_header_s_y0
spec_header_s_ymax
spec_header_s_ymin
spec_header_target
spec_header_targetx
spec_header_targety
spec_header_trace_r
spec_header_version
spec_hour_angle
spec_humidity
spec_order
spec_parallactic_angle
spec_pressure
spec_spectrogram_Nx
spec_spectrogram_Ny
spec_spectrogram_deg
spec_spectrogram_saturat

In [20]:
#list(group.items())

In [21]:
df.columns

Index(['exposure', 'index', 'spec_airmass', 'spec_camera_angle',
       'spec_date_obs', 'spec_dec', 'spec_filter_label', 'spec_header_a2_fit',
       'spec_header_airmass', 'spec_header_am_fit', 'spec_header_cam_rot',
       'spec_header_chi2_fit', 'spec_header_d2ccd', 'spec_header_date-obs',
       'spec_header_dec', 'spec_header_exptime', 'spec_header_filter',
       'spec_header_grating', 'spec_header_ha', 'spec_header_lbda_ref',
       'spec_header_lshift', 'spec_header_meanfwhm', 'spec_header_outhum',
       'spec_header_outpress', 'spec_header_outtemp', 'spec_header_parangle',
       'spec_header_pixshift', 'spec_header_psf_reg', 'spec_header_rebin',
       'spec_header_redshift', 'spec_header_rotangle', 'spec_header_s_dec',
       'spec_header_s_nx', 'spec_header_s_ny', 'spec_header_s_sat',
       'spec_header_s_x0', 'spec_header_s_xmax', 'spec_header_s_xmin',
       'spec_header_s_y0', 'spec_header_s_ymax', 'spec_header_s_ymin',
       'spec_header_target', 'spec_header_target

In [22]:
if 'me_az_rms' in all_subgroup_keys:
    df = df.reindex(columns=['index',
                         'exposure',
                         'spec_date_obs',
                         'spec_target_label','spec_filter_label',
                         'spec_airmass',
                         'spec_pressure',
                         'spec_temperature',
                         'spec_humidity',
                         'spec_hour_angle',
                         'spec_parallactic_angle',
                         'spec_camera_angle',
                         'spec_order',
                         "spec_header_target",
                         "spec_header_redshift",
                         "spec_header_grating",
                         "spec_header_rotangle",
                         "spec_header_d2ccd",
                         "spec_header_lshift",
                         "spec_header_parangle",
                         "spec_header_targetx",
                         "spec_header_targety",
                         "spec_header_lbda_ref",
                         "spec_header_pixshift",
                         "spec_header_psf_reg",
                         "spec_header_trace_r",  
                         "spec_header_chi2_fit", 
                         "spec_header_a2_fit",
                         "spec_header_am_fit",
                         "spec_header_meanfwhm",
                         "spec_header_version",
                         "spec_header_rebin",
                         "spec_header_date-obs",
                         "spec_header_exptime",
                         "spec_header_airmass",
                         "spec_header_dec",
                         "spec_header_ha",
                         "spec_header_outtemp",
                         "spec_header_outpress",
                         "spec_header_outhum",
                         "spec_header_filter",
                         "spec_header_cam_rot",
                         "spec_header_s_x0",
                         "spec_header_s_y0",
                         "spec_header_s_xmin",    
                         "spec_header_s_xmax",                                             
                         "spec_header_s_ymin",    
                         "spec_header_s_ymax",                                                                                                                
                         "spec_header_s_nx",
                         "spec_header_s_ny",
                         "spec_header_s_dec",
                         "spec_header_s_sat",
                         "spec_spectrogram_x0",
                         "spec_spectrogram_y0",
                         "spec_spectrogram_xmin",
                         "spec_spectrogram_xmax",
                         "spec_spectrogram_ymin",
                         "spec_spectrogram_ymax",
                         "spec_spectrogram_deg",
                         "spec_spectrogram_saturation",    
                         "spec_spectrogram_Nx",
                         "spec_spectrogram_Ny",
                         "me_az_rms",
                         "me_el_rms",
                         "me_rot_rms",
                         "me_image_az_rms",
                         "me_image_el_rms",
                         "me_image_rot_rms",
                        ])
else:
    df = df.reindex(columns=['index',
                         'exposure',
                         'spec_date_obs',
                         'spec_target_label','spec_filter_label',
                         'spec_airmass',
                         'spec_pressure',
                         'spec_temperature',
                         'spec_humidity',
                         'spec_hour_angle',
                         'spec_parallactic_angle',
                         'spec_camera_angle',
                         'spec_order',
                         "spec_header_target",
                         "spec_header_redshift",
                         "spec_header_grating",
                         "spec_header_rotangle",
                         "spec_header_d2ccd",
                         "spec_header_lshift",
                         "spec_header_parangle",
                         "spec_header_targetx",
                         "spec_header_targety",
                         "spec_header_lbda_ref",
                         "spec_header_pixshift",
                         "spec_header_psf_reg",
                         "spec_header_trace_r",  
                         "spec_header_chi2_fit", 
                         "spec_header_a2_fit",
                         "spec_header_am_fit",
                         "spec_header_meanfwhm",
                         "spec_header_version",
                         "spec_header_rebin",
                         "spec_header_date-obs",
                         "spec_header_exptime",
                         "spec_header_airmass",
                         "spec_header_dec",
                         "spec_header_ha",
                         "spec_header_outtemp",
                         "spec_header_outpress",
                         "spec_header_outhum",
                         "spec_header_filter",
                         "spec_header_cam_rot",
                         "spec_header_s_x0",
                         "spec_header_s_y0",
                         "spec_header_s_xmin",    
                         "spec_header_s_xmax",                                             
                         "spec_header_s_ymin",    
                         "spec_header_s_ymax",                                                                                                                
                         "spec_header_s_nx",
                         "spec_header_s_ny",
                         "spec_header_s_dec",
                         "spec_header_s_sat",
                         "spec_spectrogram_x0",
                         "spec_spectrogram_y0",
                         "spec_spectrogram_xmin",
                         "spec_spectrogram_xmax",
                         "spec_spectrogram_ymin",
                         "spec_spectrogram_ymax",
                         "spec_spectrogram_deg",
                         "spec_spectrogram_saturation",    
                         "spec_spectrogram_Nx",
                         "spec_spectrogram_Ny",
                        ])
        

In [23]:
pd.options.display.max_columns = None

In [24]:
df

Unnamed: 0,index,exposure,spec_date_obs,spec_target_label,spec_filter_label,spec_airmass,spec_pressure,spec_temperature,spec_humidity,spec_hour_angle,spec_parallactic_angle,spec_camera_angle,spec_order,spec_header_target,spec_header_redshift,spec_header_grating,spec_header_rotangle,spec_header_d2ccd,spec_header_lshift,spec_header_parangle,spec_header_targetx,spec_header_targety,spec_header_lbda_ref,spec_header_pixshift,spec_header_psf_reg,spec_header_trace_r,spec_header_chi2_fit,spec_header_a2_fit,spec_header_am_fit,spec_header_meanfwhm,spec_header_version,spec_header_rebin,spec_header_date-obs,spec_header_exptime,spec_header_airmass,spec_header_dec,spec_header_ha,spec_header_outtemp,spec_header_outpress,spec_header_outhum,spec_header_filter,spec_header_cam_rot,spec_header_s_x0,spec_header_s_y0,spec_header_s_xmin,spec_header_s_xmax,spec_header_s_ymin,spec_header_s_ymax,spec_header_s_nx,spec_header_s_ny,spec_header_s_dec,spec_header_s_sat,spec_spectrogram_x0,spec_spectrogram_y0,spec_spectrogram_xmin,spec_spectrogram_xmax,spec_spectrogram_ymin,spec_spectrogram_ymax,spec_spectrogram_deg,spec_spectrogram_saturation,spec_spectrogram_Nx,spec_spectrogram_Ny
0,0,2023011800207,2023-01-19T00:51:33.404506808Z,HD38666,empty,1.083301,0,0,0,-26.493114,-91.966059,-87.941142,1,HD38666,0.000364,holo4_003,0.253071,185.038789,0.0,-91.966059,133.306841,861.688896,550.000000,-2.000000,6.153540e-02,314.155882,45.966423,1.0,1.083301,4.382310,2.4,2,2023-01-19T00:51:33.404506808Z,30.0,1.083301,-32.261589,-26.493114,0,0,0,empty,-87.941142,-420.034376,88.688896,559,1695,773,958,1136,185,2,22666.666667,-420.034376,88.688896,559,1695,773,958,2,22666.666667,1136,185
1,1,2023011800208,2023-01-19T00:52:16.221000908Z,HD38666,empty,1.082111,0,0,0,-26.314179,-91.882744,-87.941063,1,HD38666,0.000364,holo4_003,-0.022287,185.524713,0.0,-91.882744,1293.773081,869.692619,550.000000,-1.999997,7.954722e-13,277.335116,28.348347,1.0,1.082111,6.728203,2.4,2,2023-01-19T00:52:16.221000908Z,30.0,1.082111,-32.261591,-26.314179,0,0,0,empty,-87.941063,-420.151819,93.692619,1715,1999,776,958,284,182,2,22666.666667,-420.151819,93.692619,1715,1999,776,958,2,22666.666667,284,182
2,2,2023011800217,2023-01-19T00:57:43.847004258Z,HD38666,empty,1.073385,0,0,0,-24.946060,-91.230300,-88.764305,1,HD38666,0.000364,holo4_003,0.234850,187.172642,0.0,-91.230300,122.152008,862.724104,550.000000,-0.457371,3.202580e+00,197.040203,143.146765,1.0,1.073385,4.079133,2.4,2,2023-01-19T00:57:43.847004258Z,30.0,1.073385,-32.261635,-24.946060,0,0,0,empty,-88.764305,-418.275175,88.724104,540,1677,774,959,1137,185,2,22666.666667,-418.275175,88.724104,540,1677,774,959,2,22666.666667,1137,185
3,3,2023011800226,2023-01-19T01:02:55.506003157Z,HD38666,empty,1.066001,0,0,0,-23.712034,-90.720990,-89.447969,1,HD38666,0.000364,holo4_003,0.202267,187.576208,0.0,-90.720990,164.140118,877.176338,550.000000,-0.508894,1.713957e+01,144.131740,2.034451,1.0,1.066001,4.186696,2.4,2,2023-01-19T01:02:55.506003157Z,30.0,1.066001,-32.222626,-23.712034,0,0,0,empty,-89.447969,-418.255843,89.176338,584,1721,788,973,1137,185,2,22666.666667,-418.255843,89.176338,584,1721,788,973,2,22666.666667,1137,185
4,4,2023011800227,2023-01-19T01:03:38.322003085Z,HD38666,empty,1.064966,0,0,0,-23.533156,-90.630486,-89.447939,1,HD38666,0.000364,holo4_003,0.206403,187.378093,0.0,-90.630486,165.543030,876.840489,550.000000,-0.224120,2.726622e+00,193.733399,2.155984,1.0,1.064966,4.497436,2.4,2,2023-01-19T01:03:38.322003085Z,30.0,1.064966,-32.222611,-23.533156,0,0,0,empty,-89.447939,-417.572604,89.840489,584,1721,787,972,1137,185,2,22666.666667,-417.572604,89.840489,584,1721,787,972,2,22666.666667,1137,185
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82,82,2023011800708,2023-01-19T07:48:46.743007097Z,HD38666,empty,2.365924,0,0,0,78.130153,111.351311,68.809980,1,HD38666,0.000364,holo4_003,0.476654,185.248983,0.0,111.351311,156.020162,857.802504,550.000000,0.452118,9.776592e-03,395.113188,86.572771,1.0,2.365924,5.388991,2.4,2,2023-01-19T07:48:46.743007097Z,30.0,2.365924,-32.343507,78.130153,0,0,0,empty,68.809980,-417.292895,81.802504,573,1710,776,961,1137,185,2,22666.666667,-417.292895,81.802504,573,1710,776,961,2,22666.666667,1137,185
83,83,2023011800730,2023-01-19T08:06:41.270009476Z,HD111980,empty,1.101852,0,0,0,-24.198409,-122.768115,-57.440631,1,HD111980,0.000517,holo4_003,0.230055,187.029341,0.0,-122.768115,153.914866,862.403423,579.806538,0.573332,4.182515e+00,212.472566,4.489257,1.0,1.101852,4.056033,2.4,2,2023-01-19T08:06:41.270009476Z,30.0,1.101852,-18.481731,-24.198409,0,0,0,empty,-57.440631,-416.461505,88.403423,569,1706,774,959,1137,185,2,22666.666667,-416.461505,88.403423,569,1706,774,959,2,22666.666667,1137,185
84,84,2023011800731,2023-01-19T08:07:24.086003293Z,HD111980,empty,1.100586,0,0,0,-24.019544,-122.910159,-57.440539,1,HD111980,0.000517,holo4_003,0.232569,186.956087,0.0,-122.910159,152.324523,864.276195,579.806538,-0.085123,4.756089e+00,187.327429,6.669644,1.0,1.100586,4.563882,2.4,2,2023-01-19T08:07:24.086003293Z,30.0,1.100586,-18.481752,-24.019544,0,0,0,empty,-57.440539,-417.558916,89.276195,568,1705,775,960,1137,185,2,22666.666667,-417.558916,89.276195,568,1705,775,960,2,22666.666667,1137,185
85,85,2023011800753,2023-01-19T08:24:50.606000576Z,HD185975,empty,2.094363,0,0,0,-132.924615,-133.972113,-45.235533,1,HD185975,-0.000065,holo4_003,0.491115,187.381233,0.0,-133.972113,147.477869,879.930224,597.257159,-0.022181,9.874156e-02,288.648481,6.594958,1.0,2.094363,5.644697,2.4,2,2023-01-19T08:24:50.606000576Z,30.0,2.094363,-87.435940,-132.924615,0,0,0,empty,-45.235533,-417.442567,82.930224,565,1702,797,982,1137,185,2,22666.666667,-417.442567,82.930224,565,1702,797,982,2,22666.666667,1137,185


In [25]:
df.to_csv(output_summary_file)

In [26]:
hf.close() 