In [None]:
import time


class Timer:
    def __init__(self, title):
        self._title = title
        return

    def __enter__(self):
        self._t0 = time.perf_counter()
        print(self._title)
        return

    def __exit__(self, type, value, traceback):
        t1 = time.perf_counter()
        print(f"Elapsed: {t1 - self._t0:0.3f} seconds.")
        return

    pass

In [None]:
from fmu.sumo.explorer import Explorer

exp = Explorer(env="dev")

In [None]:
def total_blob_size(sc):
    tbs = sc.metrics.sum("file.size_bytes")
    if tbs == 0:
        tbs = sc.metrics.sum("_sumo.blob_size")
    return tbs


def fmt_size_bytes(num_bytes):
    kb = 1024
    mb = 1024 * 1024
    gb = 1024 * 1024 * 1024
    if num_bytes < mb:
        return f"{num_bytes / kb:.3f} KiB"
    elif num_bytes < gb:
        return f"{num_bytes / mb:.3f} MiB"
    else:
        return f"{num_bytes / gb:.3f} GiB"


def do_aggregate(title, tagname, rels, columns):
    stats = rels.metrics.stats("_sumo.blob_size")
    minsize, maxsize = stats["min"], stats["max"]
    print(f"{tagname}: {len(rels)} objects, {len(rels.columns)} columns.")
    print(
        f"Blob size: min={fmt_size_bytes(minsize)}, max={fmt_size_bytes(maxsize)}"
    )
    tot_size_bytes = total_blob_size(rels)
    print(f"Total size of input: {fmt_size_bytes(tot_size_bytes)}")
    with Timer(title):
        agg = rels.filter(column=columns)._aggregate(columns=columns)
    print(agg.to_pandas().sort_values(by=["REAL", "DATE"]))


def run_exp(caseuuid, ensemblename, tagname, columns):
    case = exp.get_case_by_uuid(caseuuid)
    print(f"{case.asset}; {case.name}; {caseuuid}; {case.status}")
    rels = case.tables.visible.filter(
        ensemble=ensemblename,
        realization=True,
        tagname=tagname,
        column=columns,
    )
    do_aggregate("Full-sized tables", tagname, rels, columns)
    rels = case.tables.hidden.filter(
        ensemble=ensemblename,
        realization=True,
        tagname=tagname,
        column=columns,
    )
    do_aggregate("Split tables", tagname, rels, columns)
    with Timer("Fetch single-vector table"):
        aggs = case.tables.filter(
            ensemble=ensemblename,
            aggregation=True,
            tagname=tagname,
            column=columns[0],
        )
        if len(aggs) > 0:
            print(aggs[0].to_pandas().sort_values(by=["REAL", "DATE"]))
        else:
            print("Aggregated table not found.")

In [None]:
run_exp("359e7c72-a4ca-43ee-9203-f09cd0f149a9", "pred-0", "summary", ["FOPT"])

In [None]:
run_exp("fc6cc7d3-6162-46a3-9d69-48ad1eaecdfb", "iter-0", "summary", ["FOPT"])

In [None]:
run_exp("8ffeb5f8-ca60-42ee-998e-53d34e47d3e2", "iter-0", "summary", ["FOPT"])

In [None]:
run_exp(
    "8ffeb5f8-ca60-42ee-998e-53d34e47d3e2",
    "iter-0",
    "summary",
    ["FOPR", "FOPT"],
)

In [None]:
caseuuid = "8ffeb5f8-ca60-42ee-998e-53d34e47d3e2"
ensemble = "iter-0"
case = exp.get_case_by_uuid(caseuuid)
hidden = case.tables.hidden
cols_f0 = hidden.filter(complex={"term": {"_sumo.fragment": 0}}).columns
cols_f1 = hidden.filter(complex={"term": {"_sumo.fragment": 1}}).columns
intersection = set(cols_f0) & set(cols_f1)
intersection

In [None]:
import random


def some_cols(cols, n, skip_cols=["DATE", "YEARS"]):
    cols = list(set(cols) - set(skip_cols))
    return [cols[random.randrange(0, len(cols))] for i in range(0, n)]

In [None]:
some_f0 = some_cols(cols_f0, 50)
some_f1 = some_cols(cols_f1, 50)
cols = some_f0 + some_f1

In [None]:
run_exp(caseuuid, ensemble, "summary", some_f0)