# Scan all SDAQ-MODULE.hdf5 files and extract Lucifer settings

In [None]:
import os
import strawb
import pandas
import tqdm.notebook
import h5py

In [None]:
def scan_lucifer_options(db):
    # mask by device and dataProductCode
    mask = db.dataframe.dataProductCode == 'SMRD'
    # From file_version 2 there are lucifer data 
    mask &= db.dataframe.file_version > 1

    df = pandas.DataFrame()

    for file_i in tqdm.notebook.tqdm(db.dataframe.fullPath[mask]):
        module = strawb.Module(file_i)
        df_i = pandas.DataFrame()

        # loop over individual lucifer
        for lucifer_j in module.file_handler.lucifer_list:
            data = {}
            
            # get data from hdf5 files
            for key_h in ['id', 'current', 'current_mA', 'duration', 'duration_seconds', 'mode', 'time', 'version']:
                a = lucifer_j.__getattribute__(key_h)
                if isinstance(a, h5py.Dataset):
                    a = a[:]
                data.update({key_h: a})

            df_i = df_i.append(pandas.DataFrame(data))

        df_i['deviceCode'] = module.file_handler.module
        df = df.append(df_i)

    df.time = pandas.to_datetime(strawb.tools.asdatetime(df.time))
    df.sort_values('time', inplace=True)
    df.reset_index(inplace = True, drop = True)
    
    return df

def remove_no_change(df):
    # remove doubles
    gb = df.groupby(['deviceCode', 'id'])#[['current','duration', 'mode']].size()
    df_c = pandas.DataFrame()
    for i in gb.groups:
        df_i = gb.get_group(i)
        m = df_i[['current','duration', 'mode']].diff().any(axis=1)
        m.iloc[0] = True
        df_c = df_c.append(df_i[m])
    return df_c

In [None]:
## Load ONC DB

In [None]:
# in case execute db.load_entire_db_from_ONC() to load the entire db
if os.path.exists(strawb.Config.pandas_file_sync_db):
    db = strawb.SyncDBHandler()  # loads the db from disc
else:
    db = strawb.SyncDBHandler(load_db=False)  # doesn't load from disc
db.load_onc_db_update(output=True, save_db=True)  # get updates

In [None]:
df = scan_lucifer_options(db)
df = remove_no_change(df)

str_formater = 'lucifer_settings_{t_start:%Y%m%dT%H%M%S}_{t_end:%Y%m%dT%H%M%S}.gz'

formater_dict = {'t_start': df.time.min(),
                 't_end': df.time.max()}

file_name = str_formater.format(**formater_dict)
file_name = os.path.join(strawb.Config.proc_data_dir, file_name)

db_lucifer = strawb.ONCDeviceDB(file_name=file_name, load_db=False)
db_lucifer.dataframe = df
db_lucifer.save_db()