In [None]:
import time
import pandas as pd
from fmu.sumo.explorer import Explorer, AggregatedTable
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

### Initialize Explorer

In [None]:
sumo = Explorer("dev")

### Get Case

In [None]:
# Get case by name (name is not guaranteed to be unique)
case = sumo.cases.filter(name="drogon_ahm-2023-02-22")[0]


### Finding info about tables connected to case


In [None]:
tables = case.tables
print(f"Table names: {tables.names}")
print(f"Table tags: {tables.tagnames}")

### Working with aggregated tables
There are two ways of getting to aggregated tables
1. The filtering way
2. Through the AggregatedTable class (Which is using filtering under the hood)


##### The filtering way

##### Getting one aggregated table

In [None]:
table = tables.filter(name="summary", tagname="eclipse", iteration="iter-0", aggregation="collection", column="FOPT")[0]
table.to_pandas.head()

##### Access to the global variables the filtering way

In [None]:
# This functionality has been deactivated for now, will come back in next komodo release
# pd.DataFrame(table["fmu"]["iteration"]["parameters"]["GLOBVAR"])



### For even more user friendly access to summary data


In [None]:
# Get case surfaces
summary = AggregatedTable(case, "summary", "eclipse", "iter-0")
summary.parameters



##### When you have read the parameters once, it will be faster, kept in memory of instance

In [None]:
summary.parameters

##### Quite a lot of data in the global variables

In [None]:

total_len= 0
for group_name in summary.parameters:
    length = len(summary.parameters[group_name])
    total_len += length
    print(f"{group_name} : {length}")
    if length != 100:
        for var_name in summary.parameters[group_name]:
            sub_length = len(summary.parameters[group_name][var_name])
            print(f"  {var_name}: {sub_length}")
            total_len += sub_length
        
print(f"{total_len} in total")
    

##### Access to global variables

Calculate CV (coefficient of variation) for all global variables to see which ones are varied the most

In [None]:

globals = pd.DataFrame(summary.parameters["GLOBVAR"])
std = globals.std()
mean = globals.mean()
selection = (mean > 0) & (std > 0)
cv = 100 * std.loc[selection] / mean.loc[selection]
cv.sort_values(ascending=False).round(2)    

### Add column with global var

In [None]:
FOPT = summary["FOPT"].to_pandas
FOPT["RELPERM_INT_WO"] = FOPT["REAL"].replace(globals["RELPERM_INT_WO"])
FOPT.head()

### Plot

In [None]:
sns.lineplot(data=FOPT, x="DATE", y="FOPT", size="REAL", hue="RELPERM_INT_WO", legend=False)
plt.xticks(rotation=45)
plt.show()

### If you prefer arrow to pandas

In [None]:
summary["FOPT"].to_arrow.schema

### Inplace volumes

In [None]:
# Get case surfaces
inplace = AggregatedTable(case, "geogrid", "vol", "iter-0")

inplace["STOIIP_OIL"].to_pandas.groupby(["ZONE", "REAL"])["STOIIP_OIL"].agg("sum")["Therys"].plot(kind="hist")

## Access speed
**NB only works in proper notebook, not via vscode**

### Summary speedtest 


In [None]:
start = time.perf_counter()
count = 0
for col_name in summary.columns[:20]:
    vector = summary[col_name]
    print(vector.to_pandas.head(1))
    count += 1
print(f"{count} cols in total time: {time.perf_counter() - start: .1f} s")

### Inplace speedtest

In [None]:
start = time.perf_counter()
count = 0
for col_name in inplace.columns[:20]:
    col = inplace[col_name]
    print(col.to_pandas.head(1))
    count += 1
print(f"{count} cols in total time: {time.perf_counter() - start: .1f} s")