In [3]:
%load_ext lab_black

In [9]:
import pandas as pd
import dmidc
import dmidc.harmonie
import dmidc.harmonie.grib.danra as danra_grib
import rich

dmidc.__version__

'0.4.0'

In [8]:
danra_metainfo = danra_grib.source.metainfo_cache.load_metadata(None)

In [11]:
(
    PARAMS_SHORT_NAME,
    PARAMS_LONG_NAME,
    LEVEL_TYPES,
    PARAMS_BY_LEVEL_TYPE,
) = danra_grib.source.metainfo_cache.load_metadata(fp_tar_example=None)

In [113]:
def build_level_type_dataframes():
    dfs = {}

    for level_type in PARAMS_BY_LEVEL_TYPE.keys():
        dfs_leveltype = []
        vars_on_level_type = PARAMS_BY_LEVEL_TYPE[level_type]
        for v in vars_on_level_type:
            vals = list(set(vars_on_level_type[v]))
            df_var = pd.DataFrame([True] * len(vals), index=vals, columns=[v])
            dfs_leveltype.append(df_var)

        df_leveltype = pd.concat(dfs_leveltype, axis=1).sort_index(ascending=False)
        dfs[level_type] = df_leveltype
    return dfs


dfs = build_level_type_dataframes()

In [114]:
dfs["heightAboveGround"]

Unnamed: 0,pres,t,tmax,tmin,vis,u,v,r,tp,sf,...,xhail,ugst,vgst,rain,snow,ssi,grpl,lhsub,wevap,snsub
801,,True,,,,,,,,,...,,,,,,True,,,,
800,,True,,,,,,,,,...,,,,,,,,,,
500,,True,,,,True,True,True,,,...,,,,,,,,,,
300,,True,,,,True,True,True,,,...,,,,,,,,,,
250,,True,,,,True,True,True,,,...,,,,,,,,,,
200,,True,,,,True,True,True,,,...,,,,,,,,,,
150,,True,,,,True,True,True,,,...,,,,,,,,,,
100,,True,,,,True,True,True,,,...,,,,,,,,,,
75,,True,,,,True,True,True,,,...,,,,,,,,,,
50,,True,,,,True,True,True,,,...,,,,,,,,,,


In [115]:
def sort_cols_by_nonnull_entries(df):
    df_col_sorting_parts = pd.DataFrame(
        dict(
            notnull=df.notnull().sum().sort_values(ascending=True),
            nonzero_index=df.apply(lambda x: x.index[x.notnull()].min()),
        )
    )
    new_col_order = df_col_sorting_parts.sort_values(
        ["notnull", "nonzero_index"], ascending=[False, True]
    ).index

    return df[new_col_order]

In [116]:
dfs = {k: sort_cols_by_nonnull_entries(v) for k, v in dfs.items()}

In [140]:
str_markdown = ""

# sort level types by number of variables
level_types_sorted = sorted(
    dfs.keys(), key=lambda k: dfs[k].notnull().sum().sum(), reverse=True
)

for level_type in level_types_sorted:
    # place a tick if the value is True other empty string
    df = dfs[level_type].map(lambda v: "✓" if v is True else "")
    str_markdown += f"\n\n## {level_type}\n\n"

    cols_new = {}
    for v in df.columns:
        param_id = PARAMS_SHORT_NAME.inverse[v]
        cols_new[v] = f"<abbr title='{PARAMS_LONG_NAME[param_id]}'>{v}</abbr>"

    df = df.rename(columns=cols_new)

    df = df.T
    df = df[sorted(df.columns)]

    str_markdown += df.to_markdown()
    # include a table of the variables on this level type using the metainfo
    # cache
    str_markdown += "\n\n"

with open("level_types.md", "w") as f:
    f.write(str_markdown)

In [86]:
df = dfs["heightAboveGround"]
# sort columns from left to right by number of non-null values
df = df[df.notnull().sum().sort_values(ascending=False).index]
df.fillna("")

Unnamed: 0,t,u,r,v,sm,tcc,vis,tmin,pres,tmax,...,xhail,ugst,vgst,rain,snow,ssi,grpl,lhsub,wevap,snsub
801,True,,,,True,,,,,,...,,,,,,True,,,,
800,True,,,,True,,,,,,...,,,,,,,,,,
500,True,True,True,True,,,,,,,...,,,,,,,,,,
300,True,True,True,True,,,,,,,...,,,,,,,,,,
250,True,True,True,True,,,,,,,...,,,,,,,,,,
200,True,True,True,True,,,,,,,...,,,,,,,,,,
150,True,True,True,True,,,,,,,...,,,,,,,,,,
100,True,True,True,True,,,,,,,...,,,,,,,,,,
75,True,True,True,True,,,,,,,...,,,,,,,,,,
50,True,True,True,True,,,,,,,...,,,,,,,,,,


In [91]:
# for each column find lowest index with non-null value
df2 = df.apply(lambda x: x.index[x.notnull()].min())

# and sort columns of df from left to right by this index
df[df2.sort_values().index].fillna("")

Unnamed: 0,t,tcc,vis,mcc,lcc,tp,sf,pres,hcc,mld,...,snsub,tmin,r,tmax,vgst,v,u,ugst,sm,ssi
801,True,,,,,,,,,,...,,,,,,,,,True,True
800,True,,,,,,,,,,...,,,,,,,,,True,
500,True,,,,,,,,,,...,,,True,,,True,True,,,
300,True,,,,,,,,,,...,,,True,,,True,True,,,
250,True,,,,,,,,,,...,,,True,,,True,True,,,
200,True,,,,,,,,,,...,,,True,,,True,True,,,
150,True,,,,,,,,,,...,,,True,,,True,True,,,
100,True,,,,,,,,,,...,,,True,,,True,True,,,
75,True,,,,,,,,,,...,,,True,,,True,True,,,
50,True,,,,,,,,,,...,,,True,,,True,True,,,


In [60]:
dfs["heightAboveGround"].to_markdown(fp_overview_file)

In [106]:
df_col_sorting_parts = pd.DataFrame(
    dict(
        notnull=df.notnull().sum().sort_values(ascending=True),
        nonzero_index=df.apply(lambda x: x.index[x.notnull()].min()),
    )
)
df_col_sorting_parts

Unnamed: 0,notnull,nonzero_index
dni,1,0
grad,1,0
grpl,1,0
hcc,1,0
icei,1,0
lcc,1,0
lhe,1,0
lhsub,1,0
lwavr,1,0
mcc,1,0


In [109]:
new_col_order = df_col_sorting_parts.sort_values(
    ["notnull", "nonzero_index"], ascending=[False, True]
).index
new_col_order

Index(['t', 'r', 'u', 'v', 'tcc', 'sm', 'dni', 'grad', 'grpl', 'hcc', 'icei',
       'lcc', 'lhe', 'lhsub', 'lwavr', 'mcc', 'mld', 'nlwrs', 'nswrs', 'pres',
       'prtp', 'psct', 'pscw', 'pstb', 'pstbc', 'rain', 'sf', 'snow', 'snsub',
       'sshf', 'swavr', 'tp', 'uflx', 'vflx', 'vis', 'wevap', 'xhail', 'tmax',
       'tmin', 'ugst', 'vgst', 'ssi'],
      dtype='object')

In [111]:
df_sorted = df[new_col_order].fillna("")