# DEMO 3: Getting UCI data into HSP2

### Python imports

In [None]:
from collections import defaultdict

from pandas import DataFrame, HDFStore, read_hdf

hdfname = "test10.h5"

### Discover the data in the HDF5 file

In [None]:
with HDFStore(hdfname) as store:
    keys = store.keys()

    for key in keys:
        print(key)

Notes:
 * The use of a context manager to automatically close file correctly even if a exception is raised.
 * H5PY's key() is very fast, pytables' key() was **slow**.  That is why using the key was an option.  It is now just slow, but *maybe* acceptable.

Split the path name into the leading operation and activity module name and whatever else is left

In [None]:
with HDFStore(hdfname) as store:
    for path in store.keys():
        operation, module, *other = path[1:].split(sep="/", maxsplit=3)
        s = "_".join(other)

        print(operation, module, s)

### Read all desired tables (DataFrame) into memory

In [None]:
with HDFStore(hdfname) as store:
    for path in store.keys():
        operation, module, *other = path[1:].split(sep="/", maxsplit=3)
        s = "_".join(other)
        if operation in {"PERLND", "IMPLND", "RCHRES"}:
            df = store[path]

            print(operation, module, s)
            print(df)
            print()

### Convert table (DataFrame) into a dictionary

Simple example

In [None]:
df = read_hdf(hdfname, "RCHRES/HYDR/PARAMETERS")
df

In [None]:
d = df.to_dict("index")
d

In [None]:
d.keys()

In [None]:
d["R004"]["DELTH"]

Now incorporate this together.
 * Note: defaultdict()

In [None]:
ui = defaultdict(dict)
with HDFStore(hdfname) as store:
    for path in store.keys():
        op, module, *other = path[1:].split(sep="/", maxsplit=3)
        s = "_".join(other)
        if op in {"PERLND", "IMPLND", "RCHRES"}:
            df = store[path]
            ids = df.to_dict("index")
            for id in ids:
                ui[(op, module, id)][s] = ids[id]

ui

In [None]:
ui[("PERLND", "SNOW", "P001")].keys()

In [None]:
(
    ui[("PERLND", "SNOW", "P001")]["FLAGS"],
    ui[("PERLND", "SNOW", "P001")]["FLAGS"]["ICEFG"],
)

In [None]:
ui[("RCHRES", "HYDR", "R004")]["PARAMETERS"]["DELTH"]

In [None]:
ui[("RCHRES", "HYDR", "R004")]["PARAMETERS"]["TYPE"]

But main() will take its knowelege of operation, module, segment to pass the appropriate directory to the called program

In [None]:
passthis = ui[("PERLND", "SNOW", "P001")]

type(passthis), passthis["FLAGS"], passthis["FLAGS"]["ICEFG"]

In [None]:
passthis = ui[("RCHRES", "HYDR", "R004")]
passthis["PARAMETERS"]["DELTH"]

### CLEANUP

The original code from above
```
ui = defaultdict(dict)
with HDFStore(hdfname) as store:
    for path in store.keys():
        op, module, *other = path[1:].split(sep='/', maxsplit=3)
        s = '_'.join(other)
        if op in {'PERLND', 'IMPLND', 'RCHRES'}:
            df = store[path]
            ids = df.to_dict('index')
            for id in ids:
                ui[(op, module, id)][s] = ids[id]
```

In [None]:
ui2 = defaultdict(dict)
with HDFStore(hdfname) as store:
    for path in store.keys():
        op, module, *other = path[1:].split(sep="/", maxsplit=3)
        s = "_".join(other)
        if op in {"PERLND", "IMPLND", "RCHRES"}:
            for id, vdict in store[path].to_dict("index").items():
                ui2[(op, module, id)][s] = vdict

ui2

In [None]:
ui == ui2