In [1]:
import h5py
import zarr
import tqdm
import os
import glob
import time

In [2]:
flist = sorted(glob.glob("/data/wsd04/OOI-DAS/Raw-Data/*.h5"))

In [3]:
z = zarr.open(f"/data/wsd04/OOI-DAS/Zarr-1/2021-11-02T000014Z.zarr/")

In [7]:
z["/Acquisition/Raw0/RawData"].info

0,1
Name,/Acquisition/Raw0/RawData
Type,zarr.core.Array
Data type,int32
Shape,"(47500, 12000)"
Chunk shape,"(1, 12000)"
Order,C
Read-only,False
Compressor,"Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)"
Store type,zarr.storage.DirectoryStore
No. bytes,2280000000 (2.1G)


In [20]:
chunksize = 10000
t0 = time.time()
if not os.path.exists(f"/data/wsd04/OOI-DAS/Zarr-{chunksize}"):
    os.makedirs(f"/data/wsd04/OOI-DAS/Zarr-{chunksize}")
for idf, f in enumerate(flist):
    print(f"working on {f}")
    zname = f.split("_")[-1][:-3]
    f = h5py.File(f,'r')
    z = zarr.open(f"/data/wsd04/OOI-DAS/Zarr-{chunksize}/{zname}.zarr")
    
    zacq = z.create_group("Acquisition")
    zcustom1 = zacq.create_group("Custom")

    # Raw[0] seem not working for fsspec
    # use Raw%5B0%5B? Raw0
    zraw = zacq.create_group("Raw0") 

    # chunck size to alter
    zrawdata = zraw.create_dataset("RawData", shape=(47500, 12000), chunks=(chunksize, 12000), dtype='i4')
    zrawdata[:, :] = f['/Acquisition/Raw[0]/RawData'][:, :]

    zrawdatatime = zraw.create_dataset("RawDataTime", shape=(12000,),  dtype='i8')
    zrawdatatime[:] = f['/Acquisition/Raw[0]/RawDataTime'][:]

    zcustom2 = zraw.create_group("Custom")
    zgpbit = zcustom2.create_dataset("GpBits", shape=(12000,),  dtype='u1')
    zgpbit[:] = f['/Acquisition/Raw[0]/Custom/GpBits'][:]

    zgps = zcustom2.create_dataset("GpsStatus", shape=(12000,), dtype='u1')
    zgps[:] = f['/Acquisition/Raw[0]/Custom/GpsStatus'][:]

    zpps = zcustom2.create_dataset("PpsOffset", shape=(12000,), dtype='u4')
    zpps[:] = f['/Acquisition/Raw[0]/Custom/PpsOffset'][:]

    zspc = zcustom2.create_dataset("SampleCount", shape=(12000,), dtype='i8')
    zspc[:] = f['/Acquisition/Raw[0]/Custom/SampleCount'][:]
    
    f.close()
print((time.time() - t0)/10)

working on /data/wsd04/OOI-DAS/Raw-Data/South-C1-LR-95km-P1kHz-GL50m-SP2m-FS200Hz_2021-11-02T000014Z.h5
working on /data/wsd04/OOI-DAS/Raw-Data/South-C1-LR-95km-P1kHz-GL50m-SP2m-FS200Hz_2021-11-02T000114Z.h5
working on /data/wsd04/OOI-DAS/Raw-Data/South-C1-LR-95km-P1kHz-GL50m-SP2m-FS200Hz_2021-11-02T000214Z.h5
working on /data/wsd04/OOI-DAS/Raw-Data/South-C1-LR-95km-P1kHz-GL50m-SP2m-FS200Hz_2021-11-02T000314Z.h5
working on /data/wsd04/OOI-DAS/Raw-Data/South-C1-LR-95km-P1kHz-GL50m-SP2m-FS200Hz_2021-11-02T000414Z.h5
working on /data/wsd04/OOI-DAS/Raw-Data/South-C1-LR-95km-P1kHz-GL50m-SP2m-FS200Hz_2021-11-02T000514Z.h5
working on /data/wsd04/OOI-DAS/Raw-Data/South-C1-LR-95km-P1kHz-GL50m-SP2m-FS200Hz_2021-11-02T000614Z.h5
working on /data/wsd04/OOI-DAS/Raw-Data/South-C1-LR-95km-P1kHz-GL50m-SP2m-FS200Hz_2021-11-02T000714Z.h5
working on /data/wsd04/OOI-DAS/Raw-Data/South-C1-LR-95km-P1kHz-GL50m-SP2m-FS200Hz_2021-11-02T000814Z.h5
working on /data/wsd04/OOI-DAS/Raw-Data/South-C1-LR-95km-P1kHz-G