# Convert H5 file to pandas

Convert hdf5 file produced by CheckForSpectraction notebook into a pandas dataframe.
Note only attributes are written, not datasets.

work with Weakly_2023_11
- use jupyter kernel LSST
- author : Sylvie Dagoret-Campagne
- affiliation : IJCLab
- creation date : 2023/04/01
- last update : 2023/04/01


In [1]:
import h5py

In [2]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib.colors import LogNorm
import pandas as pd

In [3]:
import matplotlib.ticker                         # here's where the formatter is
import os
import re
import pandas as pd

plt.rcParams["figure.figsize"] = (14,6)
plt.rcParams["axes.labelsize"] = 'xx-large'
plt.rcParams['axes.titlesize'] = 'xx-large'
plt.rcParams['xtick.labelsize']= 'xx-large'
plt.rcParams['ytick.labelsize']= 'xx-large'

In [4]:
from astropy.time import Time
import astropy.units as u

In [5]:
def GetColumnHfData(hf,nameval):
    
    list_of_keys = list(hf.keys())
    all_data = []
    for key in list_of_keys:
        group=hf.get(key)
        val=group.attrs[nameval]
        all_data.append(val)
    return all_data

In [6]:
DateToRuncollection = {
 20230228 :  "u/dagoret/BPS_manyspectro_v70",
 20230301 :  "u/dagoret/BPS_manyspectro_v71", 
 20230302 :  "u/dagoret/BPS_manyspectro_v72", 
 20230315 :  "u/dagoret/BPS_manyspectro_v73", 
 20230316 :  "u/dagoret/BPS_manyspectro_v74", 
}

In [7]:
DATE = 20230315
my_collection = DateToRuncollection[DATE]
filterdispersername = "empty~holo4_003"

In [8]:
configmode = "PSF2DFFM_REBIN2"
specver="specV2.4"

In [9]:
output_summary_file = f"fullsummaryspectra_{DATE}-{filterdispersername}-{configmode}-{specver}-oga.csv"
input_file_h5 = f"allspectra_{DATE}-{filterdispersername}-{configmode}-{specver}-oga.hdf5"

In [10]:
hf =  h5py.File(input_file_h5, 'r') 
hf.keys()

<KeysViewHDF5 ['2023031500617', '2023031500618', '2023031500625', '2023031500626', '2023031500633', '2023031500634', '2023031500648', '2023031500649', '2023031500662', '2023031500663', '2023031500670', '2023031500671', '2023031500678', '2023031500679', '2023031500686', '2023031500687', '2023031500694', '2023031500695', '2023031500702', '2023031500703', '2023031500710', '2023031500711', '2023031500718', '2023031500719', '2023031500733', '2023031500734', '2023031500741', '2023031500742', '2023031500749', '2023031500750', '2023031500757', '2023031500758', '2023031500772', '2023031500773', '2023031500780', '2023031500781', '2023031500788', '2023031500789', '2023031500796', '2023031500797', '2023031500804', '2023031500805', '2023031500815', '2023031500816', '2023031500826', '2023031500827', '2023031500835', '2023031500836', '2023031500851', '2023031500852', '2023031500859', '2023031500860', '2023031500868', '2023031500869', '2023031500876', '2023031500877', '2023031500891', '2023031500892',

In [11]:
key_sel = list(hf.keys())[0]
print(key_sel)

2023031500617


In [12]:
group = hf.get(key_sel)

In [13]:
for k in group.attrs.keys():
    print('{} => {}'.format(k, group.attrs[k]))

exposure => 2023031500617
index => 0
me_az_rms => 0.0175212923514513
me_el_rms => 0.0144468395661677
me_image_az_rms => 0.0153778557479842
me_image_el_rms => 0.0144468395661677
me_image_rot_rms => 0.0001612010711057
me_rot_rms => 0.1187503846373602
spec_airmass => 2.08009019139918
spec_camera_angle => -51.77500369824298
spec_date_obs => 2023-03-16T05:11:21.499995014Z
spec_dec => -87.4294695819789
spec_filter_label => empty
spec_header_a2_fit => 1.0
spec_header_airmass => 2.08009019139918
spec_header_am_fit => 2.08009019139918
spec_header_cam_rot => -51.77500369824298
spec_header_chi2_fit => 10.32260792927881
spec_header_d2ccd => 187.40810873051
spec_header_date-obs => 2023-03-16T05:11:21.499995014Z
spec_header_dec => -87.4294695819789
spec_header_exptime => 30.0
spec_header_filter => empty
spec_header_grating => holo4_003
spec_header_ha => -126.1948434157015
spec_header_lbda_ref => 597.2571590735706
spec_header_lshift => 0.0
spec_header_meanfwhm => 4.30669333352735
spec_header_outhum =

In [14]:
all_subgroup_keys = []
for k in group.attrs.keys():
    all_subgroup_keys.append(k)  

In [15]:
df = pd.DataFrame()

In [16]:
for key in all_subgroup_keys:
    arr=GetColumnHfData(hf,key)
    df[key] = arr
    

In [17]:
#list(group.items())

In [18]:
df.columns

Index(['exposure', 'index', 'me_az_rms', 'me_el_rms', 'me_image_az_rms',
       'me_image_el_rms', 'me_image_rot_rms', 'me_rot_rms', 'spec_airmass',
       'spec_camera_angle', 'spec_date_obs', 'spec_dec', 'spec_filter_label',
       'spec_header_a2_fit', 'spec_header_airmass', 'spec_header_am_fit',
       'spec_header_cam_rot', 'spec_header_chi2_fit', 'spec_header_d2ccd',
       'spec_header_date-obs', 'spec_header_dec', 'spec_header_exptime',
       'spec_header_filter', 'spec_header_grating', 'spec_header_ha',
       'spec_header_lbda_ref', 'spec_header_lshift', 'spec_header_meanfwhm',
       'spec_header_outhum', 'spec_header_outpress', 'spec_header_outtemp',
       'spec_header_parangle', 'spec_header_pixshift', 'spec_header_psf_reg',
       'spec_header_rebin', 'spec_header_redshift', 'spec_header_rotangle',
       'spec_header_s_dec', 'spec_header_s_nx', 'spec_header_s_ny',
       'spec_header_s_sat', 'spec_header_s_x0', 'spec_header_s_xmax',
       'spec_header_s_xmin', 'spec_h

In [19]:
if 'me_az_rms' in all_subgroup_keys:
    df = df.reindex(columns=['index',
                         'exposure',
                         'spec_date_obs',
                         'spec_target_label','spec_filter_label',
                         'spec_airmass',
                         'spec_pressure',
                         'spec_temperature',
                         'spec_humidity',
                         'spec_hour_angle',
                         'spec_parallactic_angle',
                         'spec_camera_angle',
                         'spec_order',
                         "spec_header_target",
                         "spec_header_redshift",
                         "spec_header_grating",
                         "spec_header_rotangle",
                         "spec_header_d2ccd",
                         "spec_header_lshift",
                         "spec_header_parangle",
                         "spec_header_targetx",
                         "spec_header_targety",
                         "spec_header_lbda_ref",
                         "spec_header_pixshift",
                         "spec_header_psf_reg",
                         "spec_header_trace_r",  
                         "spec_header_chi2_fit", 
                         "spec_header_a2_fit",
                         "spec_header_am_fit",
                         "spec_header_meanfwhm",
                         "spec_header_version",
                         "spec_header_rebin",
                         "spec_header_date-obs",
                         "spec_header_exptime",
                         "spec_header_airmass",
                         "spec_header_dec",
                         "spec_header_ha",
                         "spec_header_outtemp",
                         "spec_header_outpress",
                         "spec_header_outhum",
                         "spec_header_filter",
                         "spec_header_cam_rot",
                         "spec_header_s_x0",
                         "spec_header_s_y0",
                         "spec_header_s_xmin",    
                         "spec_header_s_xmax",                                             
                         "spec_header_s_ymin",    
                         "spec_header_s_ymax",                                                                                                                
                         "spec_header_s_nx",
                         "spec_header_s_ny",
                         "spec_header_s_dec",
                         "spec_header_s_sat",
                         "spec_spectrogram_x0",
                         "spec_spectrogram_y0",
                         "spec_spectrogram_xmin",
                         "spec_spectrogram_xmax",
                         "spec_spectrogram_ymin",
                         "spec_spectrogram_ymax",
                         "spec_spectrogram_deg",
                         "spec_spectrogram_saturation",    
                         "spec_spectrogram_Nx",
                         "spec_spectrogram_Ny",
                         "me_az_rms",
                         "me_el_rms",
                         "me_rot_rms",
                         "me_image_az_rms",
                         "me_image_el_rms",
                         "me_image_rot_rms",
                        ])
else:
    df = df.reindex(columns=['index',
                         'exposure',
                         'spec_date_obs',
                         'spec_target_label','spec_filter_label',
                         'spec_airmass',
                         'spec_pressure',
                         'spec_temperature',
                         'spec_humidity',
                         'spec_hour_angle',
                         'spec_parallactic_angle',
                         'spec_camera_angle',
                         'spec_order',
                         "spec_header_target",
                         "spec_header_redshift",
                         "spec_header_grating",
                         "spec_header_rotangle",
                         "spec_header_d2ccd",
                         "spec_header_lshift",
                         "spec_header_parangle",
                         "spec_header_targetx",
                         "spec_header_targety",
                         "spec_header_lbda_ref",
                         "spec_header_pixshift",
                         "spec_header_psf_reg",
                         "spec_header_trace_r",  
                         "spec_header_chi2_fit", 
                         "spec_header_a2_fit",
                         "spec_header_am_fit",
                         "spec_header_meanfwhm",
                         "spec_header_version",
                         "spec_header_rebin",
                         "spec_header_date-obs",
                         "spec_header_exptime",
                         "spec_header_airmass",
                         "spec_header_dec",
                         "spec_header_ha",
                         "spec_header_outtemp",
                         "spec_header_outpress",
                         "spec_header_outhum",
                         "spec_header_filter",
                         "spec_header_cam_rot",
                         "spec_header_s_x0",
                         "spec_header_s_y0",
                         "spec_header_s_xmin",    
                         "spec_header_s_xmax",                                             
                         "spec_header_s_ymin",    
                         "spec_header_s_ymax",                                                                                                                
                         "spec_header_s_nx",
                         "spec_header_s_ny",
                         "spec_header_s_dec",
                         "spec_header_s_sat",
                         "spec_spectrogram_x0",
                         "spec_spectrogram_y0",
                         "spec_spectrogram_xmin",
                         "spec_spectrogram_xmax",
                         "spec_spectrogram_ymin",
                         "spec_spectrogram_ymax",
                         "spec_spectrogram_deg",
                         "spec_spectrogram_saturation",    
                         "spec_spectrogram_Nx",
                         "spec_spectrogram_Ny",
                        ])
        

In [20]:
pd.options.display.max_columns = None

In [21]:
df

Unnamed: 0,index,exposure,spec_date_obs,spec_target_label,spec_filter_label,spec_airmass,spec_pressure,spec_temperature,spec_humidity,spec_hour_angle,spec_parallactic_angle,spec_camera_angle,spec_order,spec_header_target,spec_header_redshift,spec_header_grating,spec_header_rotangle,spec_header_d2ccd,spec_header_lshift,spec_header_parangle,spec_header_targetx,spec_header_targety,spec_header_lbda_ref,spec_header_pixshift,spec_header_psf_reg,spec_header_trace_r,spec_header_chi2_fit,spec_header_a2_fit,spec_header_am_fit,spec_header_meanfwhm,spec_header_version,spec_header_rebin,spec_header_date-obs,spec_header_exptime,spec_header_airmass,spec_header_dec,spec_header_ha,spec_header_outtemp,spec_header_outpress,spec_header_outhum,spec_header_filter,spec_header_cam_rot,spec_header_s_x0,spec_header_s_y0,spec_header_s_xmin,spec_header_s_xmax,spec_header_s_ymin,spec_header_s_ymax,spec_header_s_nx,spec_header_s_ny,spec_header_s_dec,spec_header_s_sat,spec_spectrogram_x0,spec_spectrogram_y0,spec_spectrogram_xmin,spec_spectrogram_xmax,spec_spectrogram_ymin,spec_spectrogram_ymax,spec_spectrogram_deg,spec_spectrogram_saturation,spec_spectrogram_Nx,spec_spectrogram_Ny,me_az_rms,me_el_rms,me_rot_rms,me_image_az_rms,me_image_el_rms,me_image_rot_rms
0,0,2023031500617,2023-03-16T05:11:21.499995014Z,HD185975,empty,2.080090,0,0,0,-126.194843,-127.358760,-51.775004,1,HD185975,-0.000065,holo4_003,0.524906,187.408109,0.0,-127.358760,147.302473,884.252706,597.257159,0.029404,0.347336,298.547682,10.322608,1.0,2.080090,4.306693,2.4,2,2023-03-16T05:11:21.499995014Z,30.0,2.080090,-87.429470,-126.194843,0,0,0,empty,-51.775004,-417.753982,83.252706,565,1702,801,986,1137,185,2,22666.666667,-417.753982,83.252706,565,1702,801,986,2,22666.666667,1137,185,0.017521,0.014447,0.118750,0.015378,0.014447,0.000161
1,1,2023031500618,2023-03-16T05:12:04.498004925Z,HD185975,empty,2.079669,0,0,0,-126.014674,-127.181454,-51.775288,1,HD185975,-0.000065,holo4_003,0.498560,187.306036,0.0,-127.181454,148.371488,884.997932,597.257159,0.124271,0.619798,234.722805,13.834057,1.0,2.079669,5.228513,2.4,2,2023-03-16T05:12:04.498004925Z,30.0,2.079669,-87.429464,-126.014674,0,0,0,empty,-51.775288,-416.996039,82.997932,565,1702,802,987,1137,185,2,22666.666667,-416.996039,82.997932,565,1702,802,987,2,22666.666667,1137,185,0.015482,0.028492,0.105376,0.013587,0.028492,0.000143
2,2,2023031500625,2023-03-16T05:19:03.555003211Z,HD142331,empty,1.817454,0,0,0,-56.298958,-120.715337,-59.433487,1,HD142331,-0.000236,holo4_003,0.428282,187.316312,0.0,-120.715337,152.005239,859.687341,596.970132,0.081979,1.748588,209.148635,5.101009,1.0,1.817454,5.039207,2.4,2,2023-03-16T05:19:03.555003211Z,30.0,1.817454,-8.533026,-56.298958,0,0,0,empty,-59.433487,-418.328229,84.687341,570,1706,775,960,1136,185,2,22666.666667,-418.328229,84.687341,570,1706,775,960,2,22666.666667,1136,185,0.032113,0.063058,0.134142,0.026841,0.063058,0.000182
3,3,2023031500626,2023-03-16T05:19:46.505502400Z,HD142331,empty,1.810158,0,0,0,-56.119452,-120.748417,-59.433438,1,HD142331,-0.000236,holo4_003,0.433932,187.316568,0.0,-120.748417,151.927771,858.918048,596.970132,0.126478,1.152126,218.131442,3.111382,1.0,1.810158,5.222475,2.4,2,2023-03-16T05:19:46.505502400Z,30.0,1.810158,-8.533037,-56.119452,0,0,0,empty,-59.433438,-417.412176,84.918048,569,1706,774,959,1137,185,2,22666.666667,-417.412176,84.918048,569,1706,774,959,2,22666.666667,1137,185,0.037792,0.102919,0.133464,0.031532,0.102919,0.000181
4,4,2023031500633,2023-03-16T05:23:17.260500736Z,HD146233,empty,2.021655,0,0,0,-60.564428,-120.144927,-59.885846,1,HD146233,0.000039,holo4_003,0.469873,185.885943,0.0,-120.144927,156.720068,856.295174,591.927760,-0.237063,0.252348,266.065949,115.957429,1.0,2.021655,4.851941,2.4,2,2023-03-16T05:23:17.260500736Z,30.0,2.021655,-8.324728,-60.564428,0,0,0,empty,-59.885846,-418.526325,83.295174,576,1712,773,958,1136,185,2,22666.666667,-418.526325,83.295174,576,1712,773,958,2,22666.666667,1136,185,0.033304,0.083883,0.108508,0.028971,0.083883,0.000147
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69,69,2023031500945,2023-03-16T09:34:55.443493527Z,HD185975,empty,1.911657,0,0,0,-61.381162,-62.738007,242.462234,1,HD185975,-0.000065,holo4_003,0.476843,187.368487,0.0,-62.738007,153.850060,868.023009,597.257159,0.098002,2.016044,218.282985,15.316184,1.0,1.911657,4.570507,2.4,2,2023-03-16T09:34:55.443493527Z,30.0,1.911657,-87.428851,-61.381162,0,0,0,empty,242.462234,-418.567344,84.023009,572,1708,784,969,1136,185,2,22666.666667,-418.567344,84.023009,572,1708,784,969,2,22666.666667,1136,185,0.013690,0.011627,0.104797,0.011678,0.011627,0.000142
70,70,2023031500952,2023-03-16T09:40:03.110001207Z,HD200654,empty,1.708892,0,0,0,-68.957317,-96.161929,-83.731263,1,HD200654,-0.000149,holo4_003,0.419857,187.369872,0.0,-96.161929,149.316967,861.500835,596.126479,0.053127,42.030301,134.918484,4.502988,1.0,1.708892,4.243629,2.4,2,2023-03-16T09:40:03.110001207Z,30.0,1.708892,-49.912095,-68.957317,0,0,0,empty,-83.731263,-418.899340,84.500835,568,1704,777,962,1136,185,2,22666.666667,-418.899340,84.500835,568,1704,777,962,2,22666.666667,1136,185,0.337301,0.057303,0.122293,0.273778,0.057303,0.000166
71,71,2023031500953,2023-03-16T09:40:46.011498417Z,HD200654,empty,1.704161,0,0,0,-68.777963,-96.033504,-83.731295,1,HD200654,-0.000149,holo4_003,0.435952,187.340120,0.0,-96.033504,149.323857,861.305977,596.126479,0.000000,36.712377,138.451640,4.732465,1.0,1.704161,3.800717,2.4,2,2023-03-16T09:40:46.011498417Z,30.0,1.704161,-49.912206,-68.777963,0,0,0,empty,-83.731295,-417.783189,84.305977,567,1704,777,962,1137,185,2,22666.666667,-417.783189,84.305977,567,1704,777,962,2,22666.666667,1137,185,0.276610,0.063007,0.132950,0.224191,0.063007,0.000180
72,72,2023031500960,2023-03-16T09:45:43.375503574Z,HD111980,empty,1.602301,0,0,0,55.827630,114.313139,65.653154,1,HD111980,0.000517,holo4_003,0.420157,187.501838,0.0,114.313139,149.910196,860.294120,579.806538,-0.025761,0.400751,332.672422,13.213855,1.0,1.602301,3.011770,2.4,2,2023-03-16T09:45:43.375503574Z,30.0,1.602301,-18.572631,55.827630,0,0,0,empty,65.653154,-418.017685,84.294120,568,1705,776,961,1137,185,2,22666.666667,-418.017685,84.294120,568,1705,776,961,2,22666.666667,1137,185,0.033789,0.062951,0.081076,0.026424,0.062951,0.000110


In [22]:
df.to_csv(output_summary_file)

In [23]:
hf.close() 