In [1]:
import h5py
import tiledb
import numpy as np
import time
import os
import glob
import pandas as pd
from datetime import datetime, timedelta
from tqdm import tqdm

In [2]:
rootpath = '/data/data3/seadasn_2022-10-07_2023-01-13/'
flist = sorted(glob.glob(rootpath + "/seadasn_2022-12-*"))

In [3]:
# Create a configuration object
config = tiledb.Config()

# Set configuration parameters
config["vfs.s3.scheme"] = "http"
config["vfs.s3.region"] = ""
config["vfs.s3.endpoint_override"] = "dasway.ess.washington.edu:9000"
config["vfs.s3.use_virtual_addressing"] = "false"
config["vfs.s3.aws_access_key_id"] = "this-is-key"
config["vfs.s3.aws_secret_access_key"] = "this-is-secret"
config["sm.consolidation.mode"] = "fragment_meta"
config["sm.vacuum.mode"] = "fragment_meta"

# config["sm.consolidation.timestamp_start"] = "0"
# config["sm.consolidation.timestamp_end"] = "1"

# Create contex
ctx = tiledb.Ctx(config)
bucket = f"s3://seadas-december-2022-tiledb/"

In [None]:
tiledb.consolidate(f"{bucket}/RawData", ctx = ctx)

In [4]:
dim1 = tiledb.Dim(name="time", domain=(0, 100*60*60*24*31-1), tile=6000, dtype=np.uint32, 
                  filters = tiledb.FilterList([tiledb.DoubleDeltaFilter(),tiledb.GzipFilter(level=-1)]))
dim2 = tiledb.Dim(name="channel", domain=(0, 2089-1), tile=5, dtype=np.uint32,
                  filters = tiledb.FilterList([tiledb.DoubleDeltaFilter(),tiledb.GzipFilter(level=-1)]))

dom = tiledb.Domain(dim2, dim1)
rawdata = tiledb.Attr(name="RawData", dtype=np.float32, 
                       filters=tiledb.FilterList([tiledb.ByteShuffleFilter(), tiledb.LZ4Filter(level = 5)]))
schema = tiledb.ArraySchema(domain=dom, sparse=False, attrs=[rawdata])
tiledb.Array.create(f"{bucket}/RawData/", schema = schema, ctx=ctx)

In [5]:
with tiledb.open(f"{bucket}/RawData", 'w', ctx = ctx) as A:
    A.meta['overview.location'] = "Seattle, WA"
    A.meta['overview.deployment_type'] = 'temporary'
    A.meta['overview.network'] = "N/A"
    A.meta['overview.site_name'] = "SeaDAS-N"
    A.meta['overview.number_of_interrogators'] = 1
    A.meta['overview.principle_investigators'] = "University of Washington"
    A.meta['overview.start_datetime'] = "2022-12-01"
    A.meta['overview.end_datetime'] = "2022-12-31"
    A.meta['overview.purpose_of_collection'] = "experiment"
    A.meta['overview.collection_mode'] = "continuous"
    A.meta['overview.comment'] = "N/A"

    A.meta['cable_and_fiber.cable_fiber_id'] = "N/A"
    A.meta['cable_and_fiber.cable_start_time'] = "N/A"
    A.meta['cable_and_fiber.cable_end_time'] = "N/A"
    A.meta['cable_and_fiber.cable_characteristics'] = "N/A"
    A.meta['cable_and_fiber.cable_environment'] = "N/A"
    A.meta['cable_and_fiber.cable_model'] = "N/A"
    A.meta['cable_and_fiber.cable_diameter'] = "N/A"
    A.meta['cable_and_fiber.cable_coordinates'] = "N/A"
    A.meta['cable_and_fiber.cable_connector_coordinates'] = "N/A"
    A.meta['cable_and_fiber.fiber_mode'] = "continuous"
    A.meta['cable_and_fiber.fiber_refraction_index'] = "N/A"
    A.meta['cable_and_fiber.attenuation'] = "N/A"
    A.meta['cable_and_fiber.fiber_geometry'] = "N/A"
    A.meta['cable_and_fiber.winding_angle'] = "N/A"
    A.meta['cable_and_fiber.fiber_start_location'] = "N/A"
    A.meta['cable_and_fiber.fiber_end_location'] = "N/A"
    A.meta['cable_and_fiber.fiber_length'] = "N/A"
    A.meta['cable_and_fiber.comment'] = "N/A"

    A.meta['interrogator.interrogator_id'] = "N/A"
    A.meta['interrogator.manufacturer'] = "N/A"
    A.meta['interrogator.model'] = "N/A"
    A.meta['interrogator.unit_of_measure'] = "N/A"
    A.meta['interrogator.comment'] = "N/A"

    A.meta['acquisition.acquisition_id'] = "N/A"
    A.meta['acquisition.acquisition_start_time'] = "2022-12-01T00:00:00.000000"
    A.meta['acquisition.acquisition_end_time'] = "2022-12-31T23:59:59.990000"
    A.meta['acquisition.acquisition_sample_rate'] = 100
    A.meta['acquisition.pulse_repetition_rate'] = "N/A"
    A.meta['acquisition.interrogator_rate'] = "N/A"
    A.meta['acquisition.pulse_width'] = "N/A"
    A.meta['acquisition.gauge_length'] = "N/A"
    A.meta['acquisition.number_of_channels'] = "N/A"
    A.meta['acquisition.channel_spacing'] = "N/A"
    A.meta['acquisition.archived_sample_rate'] = "N/A"
    A.meta['acquisition.unit_of_measure'] = "N/A"
    A.meta['acquisition.decimation'] = "N/A"
    A.meta['acquisition.filtering'] = "N/A"
    A.meta['acquisition.comment'] = "N/A"



In [8]:
# sequentially convert 10 files
for idf, f in enumerate(flist[:10]):
    zname = f.split("_")[-1][:-3]
    print(f"{idf+1}: working on {zname}")
    f = h5py.File(f,'r')

    with tiledb.open(f"{bucket}/RawData", 'w', ctx = ctx) as A:
        A[:, idf * 6000 : (idf+1) * 6000] = f['/Acquisition/Raw[0]/RawData'][:, :]

    f.close()

1: working on GMT
2: working on GMT
3: working on GMT
4: working on GMT
5: working on GMT
6: working on GMT
7: working on GMT
8: working on GMT
9: working on GMT
10: working on GMT


In [None]:
# Create a configuration object
config = tiledb.Config()

# Set configuration parameters
config["vfs.s3.scheme"] = "http"
config["vfs.s3.region"] = ""
config["vfs.s3.endpoint_override"] = "dasway.ess.washington.edu:9000"
config["vfs.s3.use_virtual_addressing"] = "false"
config["vfs.s3.aws_access_key_id"] = "niyiyu"
config["vfs.s3.aws_secret_access_key"] = "huayuAN553088447"
config["sm.consolidation.mode"] = "fragment_meta"
config["sm.vacuum.mode"] = "fragment_meta"

config["sm.consolidation.timestamp_start"] = 5
config["sm.consolidation.timestamp_end"] = 9

# Create contex
ctx = tiledb.Ctx(config)
bucket = f"s3://seadas-december-2022-tiledb/"

In [None]:
A = tiledb.open(f"{bucket}/RawData", 'r', ctx = ctx)
dict(A.meta)

In [14]:
d

OrderedDict([('RawData',
              array([15.90601   , -0.08085087, 55.418667  , ..., -0.2357815 ,
                     -0.19828375, -0.5492738 ], dtype=float32))])