# Main functions
 - StandardFileReader
 - maybeFST
 - get_basic_dataframe
 - StandardFileWriter
 - add_dask_column
 - add_grid_column
 - add_columns
 - compute
 - get_2d_lat_lon
 - metadata_cleanup
 - select_with_meta
 - to_dask
 - to_numpy
 - unit_convert
 - unit_convert_array
 - voir

# StandardFileReader and maybeFST
- reads in the record information of the provided file(s)

In [None]:
import glob
print(f'StandardFileReader:\n{fstpy.StandardFileReader.__doc__}')
print(f'maybeFST:\n{fstpy.maybeFST.__doc__}')
directory = os.path.join(ATM_MODEL_DFILES, 'bcmk' , '**')
files = glob.glob(directory)
# check that the files are FST files
files = [f for f in files[:10] if fstpy.maybeFST(f)]
df = fstpy.StandardFileReader(files).to_pandas()
cols = list(df.columns)
cols.remove('d')
df[cols]

# Get a lot of files

In [None]:
directory = os.path.join(CMCGRIDF, 'prog', 'reghyb' , '**')
files = glob.glob(directory)
files = [f for f in files if fstpy.maybeFST(f)]
print(f'found {len(files)} files')

# get_basic_dataframe (lightweight reader)

In [None]:
import pandas as pd
from tqdm import tqdm
print(f'get_basic_dataframe:\n{fstpy.get_basic_dataframe.__doc__}')

df_list = [fstpy.get_basic_dataframe(f) for f in tqdm(files)]
df = pd.concat(df_list,ignore_index=True)
df

# StandardFileWriter, add_grid_column, add_dask_column

In [None]:
from os import getenv

# setup our output file
user = getenv("USER")
out_file = f'/tmp/{user}/TT.std'
fstpy.delete_file(out_file)

print(f'StandardFileWriter:\n{fstpy.StandardFileWriter.__doc__}')
print(f'add_grid_column:\n{fstpy.add_grid_column.__doc__}')
print(f'add_dask_column:\n{fstpy.add_dask_column.__doc__}')

# the basic dataframe has no grid column
df = fstpy.add_grid_column(df)

# get all the TT
tt_df = df.loc[df.nomvar=='TT']

# get the first grid
grid = tt_df.iloc[0].grid

# get the TT of only the first grid
tt_df = tt_df.loc[tt_df.grid == grid]
# get TT's horizontal and vertical grid fields
grid_meta_df = df.loc[(df.nomvar.isin(['>>', '^^', '^>', '!!', 'P0', 'PT'])) & (df.grid==grid)]

# join all the rows together
tt_df = pd.concat([tt_df,grid_meta_df], ignore_index=True)

# the basic dataframe needs the 'd' column
tt_df = fstpy.add_dask_column(tt_df)

# write the results
fstpy.StandardFileWriter(out_file, tt_df).to_fst()

# add_columns

In [None]:
print(f'add_columns:\n{fstpy.add_columns.__doc__}')
df = fstpy.StandardFileReader('/fs/site4/eccc/cmd/w/sbf000/fstpy/source_data_5005.std').to_pandas()

print(df.columns)
orig_cols = set(df.columns)
# add decode ips
df = fstpy.add_columns(df, 'ip_info')
new_cols = set(df.columns)
print(df.columns)
diff = orig_cols.difference(new_cols)
print(f'new added columns {diff}')
df[list(diff)]
