Execute in a shell:

```
mkdir supercdms-data
alias ='aws s3 --profile slac_public --endpoint-url https://maritime.sealstorage.io/api/v0/s3 --no-verify-ssl'
s3 ls --recursive s3://utah/supercdms-data/CDMS/UMN/R68/Raw/  | awk '{print $4}' > supercdms-data/list.txt
```

In [1]:
with open("supercdms-data/list.txt","r") as f:
  files=[it.strip() for it in f.readlines() if it.strip().endswith(".mid.gz")]
print("found",len(files),".mid.gz files")

found 39522 .mid.gz files


In [2]:
import os,sys,boto3
from botocore.client import Config

config = Config(signature_version = 's3v4')

s3 = boto3.resource('s3',endpoint_url='https://maritime.sealstorage.io/api/v0/s3', aws_access_key_id='any', aws_secret_access_key='any', config=config,)
bucket = s3.Bucket("utah")

max_files=30
for I,key in enumerate(files):
  local=key

  # download file
  if not os.path.isfile(local):
    os.makedirs(os.path.dirname(local),exist_ok=True)
    bucket.download_file(key,local)
    print(f"Downloaded file {key} {os.path.getsize(local)}")
  else:
    print(f"File {key} already exists {os.path.getsize(local)}")


  if max_files and I>=max_files:
    break


File supercdms-data/CDMS/UMN/R68/Raw/07180808_1558/07180808_1558_F0001.mid.gz already exists 48189875
File supercdms-data/CDMS/UMN/R68/Raw/07180808_1558/07180808_1558_F0002.mid.gz already exists 185526004
File supercdms-data/CDMS/UMN/R68/Raw/07180808_1558/07180808_1558_F0003.mid.gz already exists 180300511
File supercdms-data/CDMS/UMN/R68/Raw/07180808_1558/07180808_1558_F0004.mid.gz already exists 179814119
File supercdms-data/CDMS/UMN/R68/Raw/07180808_1558/07180808_1558_F0005.mid.gz already exists 182222931
File supercdms-data/CDMS/UMN/R68/Raw/07180808_1558/07180808_1558_F0006.mid.gz already exists 179705278
File supercdms-data/CDMS/UMN/R68/Raw/07180808_1558/07180808_1558_F0007.mid.gz already exists 183954816
File supercdms-data/CDMS/UMN/R68/Raw/07180808_1558/07180808_1558_F0008.mid.gz already exists 181079571
File supercdms-data/CDMS/UMN/R68/Raw/07180808_1558/07180808_1558_F0009.mid.gz already exists 181132576
File supercdms-data/CDMS/UMN/R68/Raw/07180808_1558/07180808_1558_F0010.mid

In [3]:
import midas.file_reader

import json,os,sys, xmltodict
from pprint import pprint

# ///////////////////////////////////////////////////////////////////
def LoadXML(filename):
	with open(filename, 'rt') as file: 
		body = file.read() 
	return 

# ///////////////////////////////////////////////////////////////////
def Merge(d1, d2):
    return {**d1, **d2} if d1 and d2 else (d1 or d2)

# /////////////////////////////////////////
def ParseEvent(evt, parse_xml=False):

    ret={}

    if parse_xml:
        body=evt.non_bank_data.decode("latin-1")
        d=xmltodict.parse(''.join(c for c in body if c.isprintable())) # I am loosing some char this
        assert(isinstance(d,dict))
        assert(list(d.keys())==["odb"])
        ret["odb"]=d['odb']
        evt.non_bank_data=""

    ret.update({
        "header" : {
            "event_id": evt.header.event_id,  # int
            "trigger_mask": evt.header.trigger_mask,  # int
            "serial_number":evt.header.serial_number,  # int
            "timestamp": evt.header.timestamp,  # int UNIX timestamp of event
            "event_data_size_bytes": evt.header.event_data_size_bytes,  # int Size of all banks
        }, 
        "all_bank_size_bytes": evt.all_bank_size_bytes,  # (int)
        "flags":evt.flags,  # (int) 
        # "non_bank_data" : # evt.non_bank_data,  # (bytes or None) - Content of some special events that don't  use banks (e.g. begin-of-run ODB dump)
        "banks" : { # (dict of {str: `Bank`}) - Keyed by bank name
            bank_name: {
                "name": bank.name,  # (str) - 4 characters
                "type": bank.type,  # (int) - See `TID_xxx` members in `midas` module
                "size_bytes": bank.size_bytes,  # (int)
                # "data": "", # (tuple of int/float/byte etc, or a numpy array if use_numpy is specified when unpacking)
            } for bank_name, bank in evt.banks.items()
        },
    })

    return ret

# /////////////////////////////////////////
def GenerateJson(src_filename,dst_filename):
    mfile = midas.file_reader.MidasFile(src_filename)
    events=[]
    for E,evt in enumerate(mfile):
        events.append(ParseEvent(evt, parse_xml=(E==0)))
    with open(filename + ".json","w") as out:
        out.write(json.dumps({"events":events}, sort_keys=False, indent=2))
    

# /////////////////////////////////////////
for filename in files[0:max_files]:
    if not os.path.isfile(filename+".json"):
        GenerateJson(filename,filename+".json")


In [4]:
import bokeh
import bokeh.io
import bokeh.plotting

bokeh.io.output_notebook()

In [5]:
def non_decreasing(L):
    return all(x<=y for x, y in zip(L, L[1:]))

def non_increasing(L):
    return all(x>=y for x, y in zip(L, L[1:]))

def monotonic(L):
    return non_decreasing(L) or non_increasing(L)

# /////////////////////////////////////////
def GetBanksData(filename):
    mfile = midas.file_reader.MidasFile(filename)
    ret=[]
    for E,evt in enumerate(mfile):
      timestamp=evt.header.timestamp
      ret.extend([(timestamp,bank) for __, bank in evt.banks.items() if bank.data])
    return ret

timesteps,pulse=[],[]
bank_name=None

for F,filename in enumerate(files):
    print(filename)
    for timetamp,bank in GetBanksData(filename):
        assert(bank_name is None or bank.name==bank_name)
        bank_name=bank.name

        # not sure if append here is "logically right"
        timesteps.append(timetamp)
        pulse.extend(bank.data)
    
    if F==5:
        break

assert(non_decreasing(timesteps))

import numpy as np
pulse=np.array(pulse)
print(pulse.dtype,pulse.shape)

supercdms-data/CDMS/UMN/R68/Raw/07180808_1558/07180808_1558_F0001.mid.gz
supercdms-data/CDMS/UMN/R68/Raw/07180808_1558/07180808_1558_F0002.mid.gz
supercdms-data/CDMS/UMN/R68/Raw/07180808_1558/07180808_1558_F0003.mid.gz
supercdms-data/CDMS/UMN/R68/Raw/07180808_1558/07180808_1558_F0004.mid.gz
supercdms-data/CDMS/UMN/R68/Raw/07180808_1558/07180808_1558_F0005.mid.gz
supercdms-data/CDMS/UMN/R68/Raw/07180808_1558/07180808_1558_F0006.mid.gz
int64 (690028440,)


In [15]:
import shutil
import numpy as np
import OpenVisus as ov
idx_filename='/mnt/c/big/visus-datasets/signal1d_slac/visus.idx'

N=pulse.shape[0]
# shutil.rmtree(os.path.dirname(idx_filename), ignore_errors=True)
db=ov.CreateIdx(
  url=idx_filename, 
  dims=[N],
  fields=[ov.Field('data',ov.DType.fromString(str(pulse.dtype)),'row_major')], 
  compression="raw", 
  arco=f"{4*1024*1024}")
assert(os.path.isfile(idx_filename))

logic_box=logic_box=ov.BoxNi(ov.PointNi([0]),ov.PointNi([N]))
db.write(pulse,  logic_box=logic_box)
print("IDX write uncompressed done","logic_box",logic_box.toString())

# use the python version
db=ov.LoadDataset(idx_filename)
db.compressDataset("zip") 
print("compress dataset done")

IDX write uncompressed done logic_box 0 690028440
compress dataset done


In [16]:
logic_box=db.getLogicBox()
print("logic_box",logic_box)
print("db.getMaxResolution()",db.getMaxResolution())

resolution=12
data=db.read(logic_box=logic_box, max_resolution=resolution)
print(f"IDX read done dtype={data.dtype} shape={data.shape} vmin={np.min(data)} vmax={np.max(data)}")

logic_box ([0], [690028440])
db.getMaxResolution() 30
IDX read done dtype=int64 shape=(2633,) vmin=0 vmax=3221225488


In [14]:
from bokeh.plotting import figure, show
p = bokeh.plotting.figure(title="Simple line example", x_axis_label='x', y_axis_label='y')

y=data
x=list(range(len(y)))
p.line(x, y, legend_label="Pulse", line_width=1)
show(p)

Copy blocks to S3. For example:


```bash
aws s3 sync --endpoint-url https://maritime.sealstorage.io/api/v0/s3 --profile sealstorage --size-only  /mnt/c/big/visus-datasets/signal1d_slac/ s3://utah/visus-datasets/signal1d_slac/
```