Execute in a shell:

```
mkdir supercdms-data
alias ='aws s3 --profile slac_public --endpoint-url https://maritime.sealstorage.io/api/v0/s3 --no-verify-ssl'
s3 ls --recursive s3://utah/supercdms-data/CDMS/UMN/R68/Raw/  | awk '{print $4}' > supercdms-data/list.txt
```

In [None]:
with open("supercdms-data/list.txt","r") as f:
  files=[it.strip() for it in f.readlines() if it.strip().endswith(".mid.gz")]
print("found",len(files),".mid.gz files")

In [None]:
import os,sys,boto3
from botocore.client import Config

config = Config(signature_version = 's3v4')

s3 = boto3.resource('s3',endpoint_url='https://maritime.sealstorage.io/api/v0/s3', aws_access_key_id='any', aws_secret_access_key='any', config=config,)
bucket = s3.Bucket("utah")

max_files=30
for I,key in enumerate(files):
  local=key

  # download file
  if not os.path.isfile(local):
    os.makedirs(os.path.dirname(local),exist_ok=True)
    bucket.download_file(key,local)
    print(f"Downloaded file {key} {os.path.getsize(local)}")
  else:
    print(f"File {key} already exists {os.path.getsize(local)}")


  if max_files and I>=max_files:
    break


In [None]:
import midas.file_reader

import json,os,sys, xmltodict
from pprint import pprint

# ///////////////////////////////////////////////////////////////////
def LoadXML(filename):
	with open(filename, 'rt') as file: 
		body = file.read() 
	return 

# ///////////////////////////////////////////////////////////////////
def Merge(d1, d2):
    return {**d1, **d2} if d1 and d2 else (d1 or d2)

# /////////////////////////////////////////
def ParseEvent(evt, parse_xml=False):

    ret={}

    if parse_xml:
        body=evt.non_bank_data.decode("latin-1")
        d=xmltodict.parse(''.join(c for c in body if c.isprintable())) # I am loosing some char this
        assert(isinstance(d,dict))
        assert(list(d.keys())==["odb"])
        ret["odb"]=d['odb']
        evt.non_bank_data=""

    ret.update({
        "header" : {
            "event_id": evt.header.event_id,  # int
            "trigger_mask": evt.header.trigger_mask,  # int
            "serial_number":evt.header.serial_number,  # int
            "timestamp": evt.header.timestamp,  # int UNIX timestamp of event
            "event_data_size_bytes": evt.header.event_data_size_bytes,  # int Size of all banks
        }, 
        "all_bank_size_bytes": evt.all_bank_size_bytes,  # (int)
        "flags":evt.flags,  # (int) 
        # "non_bank_data" : # evt.non_bank_data,  # (bytes or None) - Content of some special events that don't  use banks (e.g. begin-of-run ODB dump)
        "banks" : { # (dict of {str: `Bank`}) - Keyed by bank name
            bank_name: {
                "name": bank.name,  # (str) - 4 characters
                "type": bank.type,  # (int) - See `TID_xxx` members in `midas` module
                "size_bytes": bank.size_bytes,  # (int)
                # "data": "", # (tuple of int/float/byte etc, or a numpy array if use_numpy is specified when unpacking)
            } for bank_name, bank in evt.banks.items()
        },
    })

    return ret

# /////////////////////////////////////////
def GenerateJson(src_filename,dst_filename):
    mfile = midas.file_reader.MidasFile(src_filename)
    events=[]
    for E,evt in enumerate(mfile):
        events.append(ParseEvent(evt, parse_xml=(E==0)))
    with open(filename + ".json","w") as out:
        out.write(json.dumps({"events":events}, sort_keys=False, indent=2))
    

# /////////////////////////////////////////
for filename in files[0:max_files]:
    if not os.path.isfile(filename+".json"):
        GenerateJson(filename,filename+".json")


In [None]:
# /////////////////////////////////////////
def GetBanksData(filename):
    mfile = midas.file_reader.MidasFile(filename)
    for E,evt in enumerate(mfile):
      timestamp=evt.header.timestamp
      for __name,bank in evt.banks.items():
        yield (timestamp,bank)

# # not sure if append here is "logically right"
timesteps,pulse=[],[]
last=None
for F,filename in enumerate(files[0:5]): # TODO ... signal otherwise is too big
    print("Reading",F, "filename", filename)
    for timetamp,bank in GetBanksData(filename):
        assert(last is None or bank.name==last.name)
        last=bank
        timesteps.append(timetamp) 
        pulse.extend(bank.data)

def non_decreasing(L):return all(x<=y for x, y in zip(L, L[1:]))
assert(non_decreasing(timesteps))
import numpy as np
pulse=np.array(pulse)
print("dtype",pulse.dtype,"shape", pulse.shape,"vmin",np.min(pulse),"vmax",np.max(pulse))

In [None]:
import shutil
import numpy as np
import OpenVisus as ov
idx_filename='/mnt/c/big/visus-datasets/signal1d_slac/visus.idx'

N=pulse.shape[0]
shutil.rmtree(os.path.splitext(os.path.dirname(idx_filename))[0], ignore_errors=True)
db=ov.CreateIdx(
  url=idx_filename, 
  dims=[N],
  fields=[ov.Field('data',ov.DType.fromString(str(pulse.dtype)),'row_major')], 
  compression="raw", 
  arco=f"{4*1024*1024}")
assert(os.path.isfile(idx_filename))

print("Writing data...")
logic_box=logic_box=ov.BoxNi(ov.PointNi([0]),ov.PointNi([N]))
db.write(pulse,  logic_box=logic_box)
print("write uncompressed data done","logic_box",logic_box.toString())

In [None]:
print("Compressing data")
db=ov.LoadDataset(idx_filename)
db.compressDataset("zip") 
print("compress dataset done")

Read data

In [None]:
logic_box=db.getLogicBox()
print("logic_box",logic_box)
print("db.getMaxResolution()",db.getMaxResolution())
resolution=12
data=db.read(logic_box=logic_box, max_resolution=resolution)
print(f"IDX read done dtype={data.dtype} shape={data.shape} vmin={np.min(data)} vmax={np.max(data)}")

Show data

In [None]:
import bokeh
import bokeh.io 
bokeh.io.output_notebook()

In [None]:
from bokeh.plotting import figure, show
p = bokeh.plotting.figure(title="Simple line example", x_axis_label='x', y_axis_label='y')
y=data
x=list(range(len(y)))
p.line(x, y, legend_label="Pulse", line_width=1)
show(p)

Copy blocks to S3. For example:


```bash
aws s3 sync --endpoint-url https://maritime.sealstorage.io/api/v0/s3 --profile sealstorage --size-only  /mnt/c/big/visus-datasets/signal1d_slac/ s3://utah/visus-datasets/signal1d_slac/
```

# With `max` filter

In [None]:
import shutil
import numpy as np

import OpenVisus as ov

idx_filename_max='/mnt/c/big/visus-datasets/signal1d_slac_max/visus.idx'
shutil.rmtree(os.path.splitext(idx_filename_max)[0], ignore_errors=True)
field=ov.Field('data',f"int64[2]",'row_major')
N=pulse.shape[0]

db=ov.CreateIdx(url=idx_filename_max, 
  dim=1, 
  dims=[N],
  fields=[field], 
  compression="raw",
  filters=['max'],
  arco=2*1024*1024
)
assert(os.path.isfile(idx_filename_max))

print(f"Source pulse dtype={pulse.dtype} shape={pulse.shape} vmin={np.min(pulse):,} vmax={np.max(pulse)}")
pulse_max=np.zeros((N, 2), dtype=pulse.dtype)
pulse_max[:,0]=pulse[:]
vmin,vmax=np.min(pulse_max[:,0]),np.max(pulse_max[:,0])
print(f"New pulse with extra channel shape={pulse_max.shape} dtype={pulse_max.dtype} vmin={vmin:,} vmax={vmax:,}")

print("Writing data...")
logic_box=ov.BoxNi(ov.PointNi([0]),ov.PointNi([N]))
db.write(pulse_max, logic_box=logic_box)
print(f"written max pulse logic_box=[{logic_box.toString()}]")

Compute filter (SLOW OPERATION!)

In [None]:
import OpenVisus as ov
GB=1024*1024*1024
db.computeFilter(db.getField(), 1*GB)
print("Filter computed")

In [None]:
print("Compressing data...")
db.compressDataset("zip") 
print("compress dataset done")

In [1]:
import bokeh
import bokeh.io 
bokeh.io.output_notebook()

In [5]:
import OpenVisus as ov
import numpy as np

db_id =ov.LoadDataset('/mnt/c/big/visus-datasets/signal1d_slac/visus.idx'    )
db_max=ov.LoadDataset('/mnt/c/big/visus-datasets/signal1d_slac_max/visus.idx')

logic_box=db_id.getLogicBox()

print("logic_box",logic_box)
print("db.getMaxResolution()",db_id.getMaxResolution())

resolution=8
data_id =db_id .read(logic_box=logic_box, max_resolution=resolution)
data_max=db_max.read(logic_box=logic_box, max_resolution=resolution)[:,0]
#print([f"{it:,}" for it in data_id ])
#print([f"{it:,}" for it in data_max])
print(f"read id  dtype={data_id.dtype } shape={data_id.shape } vmin={np.min(data_id) } vmax={np.max(data_id )}")
print(f"read max dtype={data_max.dtype} shape={data_max.shape} vmin={np.min(data_max)} vmax={np.max(data_max)}")

logic_box ([0], [560336136])
db.getMaxResolution() 30
read id  dtype=int64 shape=(134,) vmin=0 vmax=3221225488
read max dtype=int64 shape=(134,) vmin=3221225488 vmax=3221225912


In [6]:
from bokeh.plotting import figure, show
p = bokeh.plotting.figure(title="Simple line example", x_axis_label='x', y_axis_label='y')
p.line(list(range(len(data_id ))), data_id , legend_label="id",  line_width=1, color="blue" );p.circle(list(range(len(data_id ))),data_id , color="blue" )
p.line(list(range(len(data_max))), data_max, legend_label="max", line_width=1, color="green");p.circle(list(range(len(data_max))),data_max, color="green")
bokeh.io.show(p, notebook_handle=True)

Copy blocks to S3. For example:


```bash
aws s3 sync --endpoint-url https://maritime.sealstorage.io/api/v0/s3 --profile sealstorage --size-only  /mnt/c/big/visus-datasets/signal1d_slac_max/ s3://utah/visus-datasets/signal1d_slac_max/
```