In [1]:
%load_ext autoreload
%autoreload 2
%load_ext jupyter_black

By default the application assumes that the catalog is collocated with the data as shown below.  Assuming that is the
case an environment variable can be set and the application will find the data.

```bash
ls /mnt/nuc/c/sevir/
CATALOG.csv  data
```

In [2]:
import os

os.environ["PATH_TO_SEVIR"] = "/mnt/nuc/c/sevir"  # you can set the default path via an environment variable

In [17]:
import sevir

cat = sevir.Catalog(img_types={sevir.VIS, sevir.IR_069})
cat.data.head()

id,file_name,file_index,img_type,time_utc,minute_offsets,episode_id,event_id,event_type,llcrnrlat,llcrnrlon,urcrnrlat,urcrnrlon,proj,size_x,size_y,height_m,width_m,data_min,data_max,pct_missing
str,str,i64,str,datetime[μs],str,f64,f64,str,f64,f64,f64,f64,str,i64,i64,f64,f64,f64,f64,f64
"""R1803250502768…","""/mnt/nuc/c/sev…",0,"""vis""",2018-03-25 05:00:00,"""-120:-115:-110…",,,,33.216708,-91.635132,36.336627,-87.070254,"""+proj=laea +la…",768,768,384000.0,384000.0,-0.003361,0.0056,0.0
"""R1803250502767…","""/mnt/nuc/c/sev…",1,"""vis""",2018-03-25 05:00:00,"""-120:-115:-110…",,,,33.084309,-91.849435,36.213723,-87.301535,"""+proj=laea +la…",768,768,384000.0,384000.0,-0.003361,0.0056,0.0
"""R1803250502772…","""/mnt/nuc/c/sev…",2,"""vis""",2018-03-25 05:00:00,"""-120:-115:-110…",,,,46.661866,-123.509928,50.883159,-120.009277,"""+proj=laea +la…",768,768,384000.0,384000.0,-0.00287,0.005548,0.0
"""R1803251650824…","""/mnt/nuc/c/sev…",3,"""vis""",2018-03-25 16:50:00,"""-120:-115:-110…",,,,40.883237,-85.350213,43.686191,-79.903987,"""+proj=laea +la…",768,768,384000.0,384000.0,0.029911,0.7856,0.0
"""R1803251650762…","""/mnt/nuc/c/sev…",4,"""vis""",2018-03-25 16:50:00,"""-120:-115:-110…",,,,44.946047,-123.972935,49.169436,-120.575175,"""+proj=laea +la…",768,768,384000.0,384000.0,0.02856,0.79261,0.0


Once the catalog is created it can be passed to the H5Store to open all of the files.

In [4]:
h5 = sevir.H5Store(cat)

INFO:root:Loading 91 files with 2 image types.
100%|██████████| 91/91 [00:00<00:00, 93.11it/s]


In [5]:
# the index is a polars dataframe that contains the metadata for the h5 files.
h5.index.head()

id,file_name,file_index,img_type,time_utc,minute_offsets,episode_id,event_id,event_type,llcrnrlat,llcrnrlon,urcrnrlat,urcrnrlon,proj,size_x,size_y,height_m,width_m,data_min,data_max,pct_missing,data_index
str,str,i64,str,datetime[μs],str,f64,f64,str,f64,f64,f64,f64,str,i64,i64,f64,f64,f64,f64,f64,i32
"""R1803250502768…","""/mnt/nuc/c/sev…",0,"""vis""",2018-03-25 05:00:00,"""-120:-115:-110…",,,,33.216708,-91.635132,36.336627,-87.070254,"""+proj=laea +la…",768,768,384000.0,384000.0,-0.003361,0.0056,0.0,69
"""R1803250502767…","""/mnt/nuc/c/sev…",1,"""vis""",2018-03-25 05:00:00,"""-120:-115:-110…",,,,33.084309,-91.849435,36.213723,-87.301535,"""+proj=laea +la…",768,768,384000.0,384000.0,-0.003361,0.0056,0.0,69
"""R1803250502772…","""/mnt/nuc/c/sev…",2,"""vis""",2018-03-25 05:00:00,"""-120:-115:-110…",,,,46.661866,-123.509928,50.883159,-120.009277,"""+proj=laea +la…",768,768,384000.0,384000.0,-0.00287,0.005548,0.0,69
"""R1803251650824…","""/mnt/nuc/c/sev…",3,"""vis""",2018-03-25 16:50:00,"""-120:-115:-110…",,,,40.883237,-85.350213,43.686191,-79.903987,"""+proj=laea +la…",768,768,384000.0,384000.0,0.029911,0.7856,0.0,69
"""R1803251650762…","""/mnt/nuc/c/sev…",4,"""vis""",2018-03-25 16:50:00,"""-120:-115:-110…",,,,44.946047,-123.972935,49.169436,-120.575175,"""+proj=laea +la…",768,768,384000.0,384000.0,0.02856,0.79261,0.0,69


In [9]:
import random

id_ = random.choice(h5.index[sevir.ID])
print(f"random id: {id_}")
# because the h5 store was initialized with a catalog containing 2 image types, they are both spit out
vis, vil = h5.__getitem__(id_)
print(f"vis: {vis.shape}, vil: {vil.shape}")

random id: R19092713297600
vis: (1, 768, 768, 49), vil: (1, 192, 192, 49)


In [12]:
import numpy as np

# advanced indexing is supported where a tuple with and id_ and a list of image types cand be passed
# h5[id_, [sevir.VIS, sevir.VIL]] is equivalent to h5[id_]
(vis2,) = h5.__getitem__((id_, [sevir.VIS]))
assert np.allclose(vis, vis2)
print(f"vis: {vis2.shape}")

vis: (1, 768, 768, 49)


In [13]:
# when you're done, close the h5 store
print(len(h5))
h5.close_all()
print(len(h5))

91
0


In [16]:
inputs = sevir.IR_069, sevir.IR_107
features = (sevir.VIL,)
cat = sevir.Catalog(img_types=inputs + features)
# this is effectively a Generator[pytorch.data.IterableDataset[tuple[Tensor, Tensor]]]
# using the session as a context manager will close the h5 store when you're done
with sevir.SEVIRGenerator(cat, inputs=inputs, features=features).session() as gen:
    for i, ((x, y), df) in enumerate(gen.iter_batches(metadata=True)):
        print(x.shape, y.shape)
        if i == 2:
            break
        print(df)

INFO:root:Loading 30 files with 3 image types.
100%|██████████| 30/30 [00:00<00:00, 91.03it/s]
  0%|          | 1/12896 [00:00<1:32:54,  2.31it/s]

torch.Size([2, 1, 192, 192, 49]) torch.Size([1, 1, 384, 384, 49])
shape: (3, 21)
┌─────────┬────────────┬────────────┬──────────┬───┬──────────┬────────────┬───────────┬───────────┐
│ id      ┆ file_name  ┆ file_index ┆ img_type ┆ … ┆ width_m  ┆ data_min   ┆ data_max  ┆ pct_missi │
│ ---     ┆ ---        ┆ ---        ┆ ---      ┆   ┆ ---      ┆ ---        ┆ ---       ┆ ng        │
│ str     ┆ str        ┆ i64        ┆ str      ┆   ┆ f64      ┆ f64        ┆ f64       ┆ ---       │
│         ┆            ┆            ┆          ┆   ┆          ┆            ┆           ┆ f64       │
╞═════════╪════════════╪════════════╪══════════╪═══╪══════════╪════════════╪═══════════╪═══════════╡
│ S782061 ┆ /mnt/nuc/c ┆ 229        ┆ vil      ┆ … ┆ 384000.0 ┆ 0.0        ┆ 254.0     ┆ 0.0       │
│         ┆ /sevir/dat ┆            ┆          ┆   ┆          ┆            ┆           ┆           │
│         ┆ a/vil/2018 ┆            ┆          ┆   ┆          ┆            ┆           ┆           │
│         

  0%|          | 2/12896 [00:00<1:31:18,  2.35it/s]

torch.Size([2, 1, 192, 192, 49]) torch.Size([1, 1, 384, 384, 49])
shape: (3, 21)
┌────────────┬───────────┬───────────┬──────────┬───┬──────────┬───────────┬───────────┬───────────┐
│ id         ┆ file_name ┆ file_inde ┆ img_type ┆ … ┆ width_m  ┆ data_min  ┆ data_max  ┆ pct_missi │
│ ---        ┆ ---       ┆ x         ┆ ---      ┆   ┆ ---      ┆ ---       ┆ ---       ┆ ng        │
│ str        ┆ str       ┆ ---       ┆ str      ┆   ┆ f64      ┆ f64       ┆ f64       ┆ ---       │
│            ┆           ┆ i64       ┆          ┆   ┆          ┆           ┆           ┆ f64       │
╞════════════╪═══════════╪═══════════╪══════════╪═══╪══════════╪═══════════╪═══════════╪═══════════╡
│ R191124214 ┆ /mnt/nuc/ ┆ 1547      ┆ vil      ┆ … ┆ 384000.0 ┆ 0.0       ┆ 192.0     ┆ 0.0       │
│ 88363      ┆ c/sevir/d ┆           ┆          ┆   ┆          ┆           ┆           ┆           │
│            ┆ ata/vil/2 ┆           ┆          ┆   ┆          ┆           ┆           ┆           │
│         

  0%|          | 2/12896 [00:01<2:13:19,  1.61it/s]
INFO:root:Closing SEVIRstoreHDF5


torch.Size([2, 1, 192, 192, 49]) torch.Size([1, 1, 384, 384, 49])
