In [1]:
import tiledb
import glob
import h5py
import time
import numpy as np
import os

In [None]:
flist = glob.glob("/data/data2/south-data-ejm/hdd/South-C1-LR-95km-P1kHz-GL50m-SP2m-FS200Hz_2021-11-01T16_09_15-0700/*2021-11-02*")
flist = sorted(flist)

In [None]:
# Create a configuration object
config = tiledb.Config()

# Set configuration parameters
config["vfs.s3.scheme"] = "http"
config["vfs.s3.region"] = ""
config["vfs.s3.endpoint_override"] = "pnwstore1.ess.washington.edu:9000"
config["vfs.s3.use_virtual_addressing"] = "false"
config["vfs.s3.aws_access_key_id"] = "this-is-key"
config["vfs.s3.aws_secret_access_key"] = "this-is-secret"
config["sm.consolidation.mode"] = "fragment_meta"
config["sm.vacuum.mode"] = "fragment_meta"

# Create contex
ctx = tiledb.Ctx(config)

In [None]:
bucket = f"s3://TileDB-OOI-DAS-{tile_size}/"

dim1 = tiledb.Dim(name="time", domain=(0, 200*60*60*24*7), tile=12000, dtype=np.uint32, 
                  filters = tiledb.FilterList([tiledb.DoubleDeltaFilter(),tiledb.GzipFilter(level=-1)]))
dim2 = tiledb.Dim(name="channel", domain=(0, 47500-1), tile=tile_size, dtype=np.uint32,
                  filters = tiledb.FilterList([tiledb.DoubleDeltaFilter(),tiledb.GzipFilter(level=-1)]))

dom = tiledb.Domain(dim2, dim1)
rawdata = tiledb.Attr(name="RawData", dtype=np.int32, 
                       filters=tiledb.FilterList([tiledb.ByteShuffleFilter(), tiledb.LZ4Filter(level = 5)]))
schema = tiledb.ArraySchema(domain=dom, sparse=False, attrs=[rawdata])
tiledb.Array.create(f"{bucket}/RawData/", schema = schema, ctx=ctx)

In [None]:
# sequentially convert 10 files
for idf, f in enumerate(flist[:10]):
    zname = f.split("_")[-1][:-3]
    print(f"{idf+1}: working on {zname}")
    f = h5py.File(f,'r')

    with tiledb.open(f"{bucket}/RawData", 'w', ctx = ctx) as A:
        A[:, idf * 12000 : (idf+1) * 12000] = f['/Acquisition/Raw[0]/RawData'][:, :]

    f.close()

In [None]:
with tiledb.open(f"{bucket}/RawData", 'w', ctx = ctx) as A:
    A.meta['overview.location'] = "offshore oregon"
    A.meta['overview.deployment_type'] = 'temporary'
    A.meta['overview.network'] = "OO"
    A.meta['overview.site_name'] = "regional cabled array"
    A.meta['overview.number_of_interrogators'] = 1
    A.meta['overview.principle_investigators'] = "University of Washington"
    A.meta['overview.start_datetime'] = "2021-11-01"
    A.meta['overview.end_datetime'] = "2021-11-07"
    A.meta['overview.purpose_of_collection'] = "experiment"
    A.meta['overview.collection_mode'] = "continuous"
    A.meta['overview.comment'] = "N/A"

    A.meta['cable_and_fiber.cable_fiber_id'] = "N/A"
    A.meta['cable_and_fiber.cable_start_time'] = "N/A"
    A.meta['cable_and_fiber.cable_end_time'] = "N/A"
    A.meta['cable_and_fiber.cable_characteristics'] = "N/A"
    A.meta['cable_and_fiber.cable_environment'] = "N/A"
    A.meta['cable_and_fiber.cable_model'] = "N/A"
    A.meta['cable_and_fiber.cable_diameter'] = "N/A"
    A.meta['cable_and_fiber.cable_coordinates'] = "N/A"
    A.meta['cable_and_fiber.cable_connector_coordinates'] = "N/A"
    A.meta['cable_and_fiber.fiber_mode'] = "continuous"
    A.meta['cable_and_fiber.fiber_refraction_index'] = "N/A"
    A.meta['cable_and_fiber.attenuation'] = "N/A"
    A.meta['cable_and_fiber.fiber_geometry'] = "N/A"
    A.meta['cable_and_fiber.winding_angle'] = "N/A"
    A.meta['cable_and_fiber.fiber_start_location'] = "N/A"
    A.meta['cable_and_fiber.fiber_end_location'] = "N/A"
    A.meta['cable_and_fiber.fiber_length'] = "N/A"
    A.meta['cable_and_fiber.comment'] = "N/A"

    A.meta['interrogator.interrogator_id'] = "N/A"
    A.meta['interrogator.manufacturer'] = "N/A"
    A.meta['interrogator.model'] = "N/A"
    A.meta['interrogator.unit_of_measure'] = "N/A"
    A.meta['interrogator.comment'] = "N/A"

    A.meta['acquisition.acquisition_id'] = "N/A"
    A.meta['acquisition.acquisition_start_time'] = "2021-11-02T00:00:14.000000"
    A.meta['acquisition.acquisition_end_time'] = "2021-11-02T00:09:14.000000"
    A.meta['acquisition.acquisition_sample_rate'] = 200
    A.meta['acquisition.pulse_repetition_rate'] = "N/A"
    A.meta['acquisition.interrogator_rate'] = "N/A"
    A.meta['acquisition.pulse_width'] = "N/A"
    A.meta['acquisition.gauge_length'] = "N/A"
    A.meta['acquisition.number_of_channels'] = "N/A"
    A.meta['acquisition.channel_spacing'] = "N/A"
    A.meta['acquisition.archived_sample_rate'] = "N/A"
    A.meta['acquisition.unit_of_measure'] = "N/A"
    A.meta['acquisition.decimation'] = "N/A"
    A.meta['acquisition.filtering'] = "N/A"
    A.meta['acquisition.comment'] = "N/A"


In [None]:
tiledb.consolidate(f"{bucket}/RawData", ctx = ctx)