# Main functions
 - StandardFileReader
 - maybeFST
 - get_basic_dataframe
 - StandardFileWriter
 - add_dask_column
 - add_grid_column
 - add_columns
 - compute
 - get_2d_lat_lon
 - select_with_meta
 - metadata_cleanup
 - to_dask
 - to_numpy
 - unit_convert
 - unit_convert_array
 - fststat
 - voir

In [2]:
import pandas as pd
import multiprocessing as mp
import numpy.ma as ma
import datetime
import fstpy
import glob
from os import getenv

pd.options.mode.chained_assignment = None  # default='warn'

CMCGRIDF  = '/fs/site3/eccc/ops/cmod/prod/hubs/gridpt/dbase'
ATM_MODEL_DFILES = '/fs/ssm/eccc/mrd/rpn/MIG/GEM/d/gem-data/gem-data_4.2.0/gem-data_4.2.0_all/share/data/dfiles'

# StandardFileReader and maybeFST
- reads in the record information of the provided file(s)

In [3]:
print(f'StandardFileReader:\n{fstpy.StandardFileReader.__doc__}')
print(f'maybeFST:\n{fstpy.maybeFST.__doc__}')
directory = os.path.join(ATM_MODEL_DFILES, 'bcmk' , '**')
files = glob.glob(directory)
# check that the files are FST files
files = [f for f in files[:10] if fstpy.maybeFST(f)]
df = fstpy.StandardFileReader(files).to_pandas()
cols = list(df.columns)
cols.remove('d')
df[cols]



StandardFileReader:
Class to handle fst files. Opens, reads the contents of an fst file or files into a pandas dataframe and closes. Extra metadata columns are added to the dataframe if specified.    

        :param filenames: path to file or list of paths to files  
        :type filenames: str|list[str], does not accept wildcards (numpy has 
                         many tools for this)  
        :param decode_metadata: adds extra columns, defaults to False  
            'unit':str, unit name   
            'unit_converted':bool  
            'description':str, field description   
            'date_of_observation':datetime, of the date of observation   
            'date_of_validity': datetime, of the date of validity   
            'level':float32, decoded ip1 level   
            'ip1_kind':int32, decoded ip1 kind   
            'ip1_pkind':str, string repr of ip1_kind int   
            'data_type_str':str, string repr of data type   
            'label':str, label derived from 

Unnamed: 0,nomvar,typvar,etiket,ni,nj,nk,dateo,ip1,ip2,ip3,...,npas,datyp,nbits,grtyp,ig1,ig2,ig3,ig4,datev,grid
0,P0,P,G133K80P,200,100,1,354514400,0,0,0,...,0,1,12,G,0,0,0,0,354514400,00
1,TT,P,G133K80P,200,100,1,354514400,97642568,0,0,...,0,1,12,G,0,0,0,0,354514400,00
2,TT,P,G133K80P,200,100,1,354514400,97738568,0,0,...,0,1,12,G,0,0,0,0,354514400,00
3,TT,P,G133K80P,200,100,1,354514400,97899568,0,0,...,0,1,12,G,0,0,0,0,354514400,00
4,TT,P,G133K80P,200,100,1,354514400,98152568,0,0,...,0,1,12,G,0,0,0,0,354514400,00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1582,I8,P,CCCMACISLDEO,200,100,1,323427143,0,8,0,...,0,1,12,G,0,0,0,0,323427143,00
1583,I8,P,CCCMACISLDEO,200,100,1,323427143,0,9,0,...,0,1,12,G,0,0,0,0,323427143,00
1584,I8,P,CCCMACISLDEO,200,100,1,323427143,0,10,0,...,0,1,12,G,0,0,0,0,323427143,00
1585,I8,P,CCCMACISLDEO,200,100,1,323427143,0,11,0,...,0,1,12,G,0,0,0,0,323427143,00


# Get a lot of files

In [4]:
fdate     = datetime.date.today().strftime('%Y%m%d') + '**'

directory = os.path.join(CMCGRIDF, 'prog', 'reghyb' , fdate)

files = glob.glob(directory)

with mp.Pool(processes=20) as pool:
    res = pool.map(fstpy.maybeFST, [file for file in files])  # runs in *only* one process

files = list(ma.masked_array(files, mask=[not elem for elem in res]))

print(f'found {len(files)} FST files')


found 170 FST files


# get_basic_dataframe (lightweight reader)

In [5]:
print(f'get_basic_dataframe:\n{fstpy.get_basic_dataframe.__doc__}')

with mp.Pool(processes=int(mp.cpu_count()/2)) as pool:
    df_list = pool.map(fstpy.get_basic_dataframe, [file for file in files])  # runs in *only* one process

df = pd.concat(df_list,ignore_index=True)
df

get_basic_dataframe:
Creates a dataframe of all non deleted records in an FST file, does not include data 'd'

    :param path: path of file to load
    :type path: str
    :return: dataframe of all non deleted records in an FST file
    :rtype: pd.DataFrame
    


Unnamed: 0,nomvar,typvar,etiket,ni,nj,nk,dateo,ip1,ip2,ip3,...,nbits,grtyp,ig1,ig2,ig3,ig4,datev,key,path,shape
0,HU,P,R1_V710_N,1108,1082,1,453771800,95529009,5,0,...,16,Z,33792,77761,1,0,453776300,1,/fs/site3/eccc/ops/cmod/prod/hubs/gridpt/dbase...,"(1108, 1082)"
1,HU,P,R1_V710_N,1108,1082,1,453771800,97351772,5,0,...,16,Z,33792,77761,1,0,453776300,1025,/fs/site3/eccc/ops/cmod/prod/hubs/gridpt/dbase...,"(1108, 1082)"
2,GZ,P,R1_V710_N,1108,1082,1,453771800,95364364,5,0,...,16,Z,33792,77761,1,0,453776300,2049,/fs/site3/eccc/ops/cmod/prod/hubs/gridpt/dbase...,"(1108, 1082)"
3,GZ,P,R1_V710_N,1108,1082,1,453771800,95357866,5,0,...,16,Z,33792,77761,1,0,453776300,3073,/fs/site3/eccc/ops/cmod/prod/hubs/gridpt/dbase...,"(1108, 1082)"
4,TT,P,R1_V710_N,1108,1082,1,453771800,95178882,5,0,...,16,Z,33792,77761,1,0,453776300,4097,/fs/site3/eccc/ops/cmod/prod/hubs/gridpt/dbase...,"(1108, 1082)"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
318617,HF,P,R1_V710_N,1104,1078,1,453766400,60168832,71,0,...,12,Z,35132,56748,1,0,453830300,3748865,/fs/site3/eccc/ops/cmod/prod/hubs/gridpt/dbase...,"(1104, 1078)"
318618,FATB,P,R1_V710_N,1104,1078,1,453766400,60368832,71,0,...,12,Z,35132,56748,1,0,453830300,3749889,/fs/site3/eccc/ops/cmod/prod/hubs/gridpt/dbase...,"(1104, 1078)"
318619,>>,X,R1_V710_N,1104,1,1,453766400,35132,56748,1,...,32,E,1470,560,54400,46560,453766400,3750913,/fs/site3/eccc/ops/cmod/prod/hubs/gridpt/dbase...,"(1104, 1)"
318620,^^,X,R1_V710_N,1,1078,1,453766400,35132,56748,1,...,32,E,1470,560,54400,46560,453766400,3751937,/fs/site3/eccc/ops/cmod/prod/hubs/gridpt/dbase...,"(1, 1078)"


# StandardFileWriter, add_grid_column, add_dask_column

In [6]:
# setup our output file
user = getenv("USER")
out_file = f'/home/{user}/TT.std'
if os.path.isfile(out_file):
    os.remove(out_file)


print(f'StandardFileWriter:\n{fstpy.StandardFileWriter.__doc__}')
print(f'add_grid_column:\n{fstpy.add_grid_column.__doc__}')
print(f'add_dask_column:\n{fstpy.add_dask_column.__doc__}')

# the basic dataframe has no grid column
df = fstpy.add_grid_column(df)

# get all the TT
tt_df = df.loc[(df.nomvar=='TT') & (df.datev==453774500)]

# get the first grid
grid = tt_df.iloc[0].grid


# get TT's horizontal and vertical grid fields
grid_meta_df = df.loc[(df.nomvar.isin(['>>', '^^', '^>', '!!', 'P0', 'PT'])) & (df.grid==grid)]

# join all the rows together
tt_df = pd.concat([tt_df,grid_meta_df], ignore_index=True)

# the basic dataframe needs the 'd' column
tt_df = fstpy.add_dask_column(tt_df)

# write the results
fstpy.StandardFileWriter(out_file, tt_df).to_fst()

print(fstpy.get_basic_dataframe(out_file))

os.remove(out_file)

StandardFileWriter:
Writes a standard file Dataframe to file. If no metada fields like ^^ and >> are found,
    an attempt will be made to load them from the original file so that they can be added to the output if not already present

    :param filename: path of file to write to
    :type filename: str
    :param df: dataframe to write
    :type df: pd.DataFrame
    :param mode: In 'dump' mode, no processing will be done on the dataframe 
                before writing, data must be present in the dataframe (df = compute(df)).
                If set to 'update', path must be an existing file. Only the 
                field metadata will be updated, the data itself will not be 
                modified. In 'write' mode, the data will be loaded, metadata 
                fields like '>>' will be added if not present default 'write'
    :type mode: str
    :param no_meta: if true these fields ["^>", ">>", "^^", "!!", "!!SF", "HY", "P0", "PT", "E1","PN"] will be removed from the datafra



    nomvar typvar     etiket    ni    nj  nk      dateo       ip1    ip2  ip3  \
0       TT      P  R1_V710_N  1108  1082   1  453766400  76696048      9    0   
1       TT      P  R1_V710_N  1108  1082   1  453771800  76696048      3    0   
2       TT      P  R1_V710_N  1108  1082   1  453766400  95178882      9    0   
3       TT      P  R1_V710_N  1108  1082   1  453766400  95154915      9    0   
4       TT      P  R1_V710_N  1108  1082   1  453766400  95529009      9    0   
..     ...    ...        ...   ...   ...  ..        ...       ...    ...  ...   
340     P0      P  R1_V710_N  1108  1082   1  453766400         0     23    0   
341     P0      P  R1_V710_N  1108  1082   1  453766400         0     14    0   
342     P0      P  R1_V710_N  1108  1082   1  453771800         0      0    0   
343     P0      P  R1_V710_N  1108  1082   1  453766400         0     71    0   
344     !!      X  R1_V710_N     3   175   1          0     33792  77761    0   

     ...  nbits  grtyp    i

# add_columns

In [17]:
print(f'add_columns:\n{fstpy.add_columns.__doc__}')

df = fstpy.StandardFileReader('/fs/site4/eccc/cmd/w/sbf000/fstpy/source_data_5005.std').to_pandas()

print(df.columns)

orig_cols = set(list(df.columns))

# add decode ips
df = fstpy.add_columns(df, 'ip_info')

new_cols = set(list(df.columns))

print(df.columns)

diff = new_cols.difference(orig_cols)

print(f'newly added columns {diff}')

df[list(diff)]

add_columns:
If valid columns are provided, they will be added. 
       These include ['flags','etiket','unit','dateo','datev','forecast_hour',
       'datyp','ip_info']

    :param df: dataframe to modify (meta data needs to be present in dataframe)
    :type df: pd.DataFrame
    :param decode: if decode is True, add the specified columns
    :type decode: bool
    :param columns: [description], defaults to 
                    ['flags','etiket','unit','dateo','datev','forecast_hour',
                    'datyp','ip_info']
    :type columns: list[str], optional
    
Index(['nomvar', 'typvar', 'etiket', 'ni', 'nj', 'nk', 'dateo', 'ip1', 'ip2',
       'ip3', 'deet', 'npas', 'datyp', 'nbits', 'grtyp', 'ig1', 'ig2', 'ig3',
       'ig4', 'datev', 'grid', 'd'],
      dtype='object')
Index(['nomvar', 'typvar', 'etiket', 'ni', 'nj', 'nk', 'dateo', 'ip1', 'ip2',
       'ip3', 'deet', 'npas', 'datyp', 'nbits', 'grtyp', 'ig1', 'ig2', 'ig3',
       'ig4', 'datev', 'grid', 'd', 'level', 'ip1_kind'

Unnamed: 0,follow_topography,ip1_kind,interval,ip3_dec,ip1_pkind,ip2_kind,level,ip3_pkind,ip3_kind,ascending,ip2_pkind,surface,ip2_dec,vctype
0,False,0,,0.000000E+00,m,0,0.000000E+00,m,0,True,m,False,0.000000E+00,UNKNOWN
1,False,0,,0.000000E+00,m,0,0.000000E+00,m,0,True,m,False,0.000000E+00,UNKNOWN
2,False,0,,0.000000E+00,m,0,0.000000E+00,m,0,True,m,False,0.000000E+00,UNKNOWN
3,False,2,,0.000000E+00,mb,10,0.000000E+00,H,10,False,H,False,6.000000E+00,UNKNOWN
4,False,2,,0.000000E+00,mb,10,0.000000E+00,H,10,False,H,False,6.000000E+00,UNKNOWN
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1869,True,5,,0.000000E+00,hy,10,1.267870E-01,H,10,False,H,False,6.000000E+00,UNKNOWN
1870,True,5,,0.000000E+00,hy,10,6.503550E-02,H,10,False,H,False,6.000000E+00,UNKNOWN
1871,True,5,,0.000000E+00,hy,10,5.947270E-02,H,10,False,H,False,6.000000E+00,UNKNOWN
1872,True,5,,0.000000E+00,hy,10,9.149660E-01,H,10,False,H,False,6.000000E+00,UNKNOWN


# compute

In [18]:
print(f'compute:\n{fstpy.compute.__doc__}')

df = fstpy.StandardFileReader('/fs/site4/eccc/cmd/w/sbf000/fstpy/source_data_5005.std').to_pandas()

df = df.loc[df.nomvar=='TT']

tt_df = pd.DataFrame([df.iloc[0].to_dict()])

print(f'\ndask array \n{tt_df.iloc[0].d}')

tt_df = fstpy.compute(tt_df)

print(f'\nnumpy array \n{tt_df.iloc[0].d}')

compute:
Converts all dask arrays contained in the 'd' column, by numpy arrays

    :param df: input DataFrame
    :type df: pd.DataFrame
    :param remove_path_and_key: remove path and key column after conversion, defaults to True
    :type remove_path_and_key: bool, optional
    :return: modified dataframe with numpy arrays instead of dask arrays
    :rtype: pd.DataFrame
    

dask array 
dask.array</fs/site4/eccc/cmd/w/sbf000/fstpy/source_data_5005.std:4107, shape=(1108, 1082), dtype=float32, chunksize=(1108, 1082), chunktype=numpy.ndarray>

numpy array 
[[15.86026  15.958893 16.014557 ... 10.861237 10.938385 10.963776]
 [15.885651 15.991119 16.063385 ... 10.924713 10.991119 10.979401]
 [15.901276 15.984283 16.064362 ... 11.001862 11.059479 11.045807]
 ...
 [14.913971 14.603424 14.750885 ... 10.505768 10.909088 11.204987]
 [14.729401 14.597565 14.745026 ... 10.599518 10.975494 11.177643]
 [14.51651  14.418854 14.594635 ... 10.662994 10.969635 11.151276]]


# get_2d_lat_lon

In [19]:
print(f'get_2d_lat_lon:\n{fstpy.get_2d_lat_lon.__doc__}')

df = fstpy.StandardFileReader('/fs/site4/eccc/cmd/w/sbf000/fstpy/source_data_5005.std').to_pandas()

tt_df = df.loc[df.nomvar=='TT']

lat_lon_df = fstpy.get_2d_lat_lon(tt_df)

lat_lon_df

get_2d_lat_lon:
get_2d_lat_lon Gets the latitudes and longitudes as 2d arrays associated with the supplied grids

    :return: a pandas Dataframe object containing the lat and lon meta data of the grids
    :rtype: pd.DataFrame
    :raises StandardFileError: no records to process
    


Unnamed: 0,nomvar,typvar,etiket,ni,nj,nk,dateo,ip1,ip2,ip3,...,ig1,ig2,ig3,ig4,datev,key,path,shape,grid,d
0,LA,X,R1_V710_N,1108,1082,1,442998800,33792,77761,1,...,1470,560,54400,46560,442998800,2265099,/fs/site4/eccc/cmd/w/sbf000/fstpy/source_data_...,"(1108, 1082)",3379277761,"[[-8.059764, -8.0017, -7.9436274, -7.885541, -..."
1,LO,X,R1_V710_N,1108,1082,1,442998800,33792,77761,1,...,1470,560,54400,46560,442998800,2264075,/fs/site4/eccc/cmd/w/sbf000/fstpy/source_data_...,"(1108, 1082)",3379277761,"[[231.28516, 231.21573, 231.14629, 231.07687, ..."


# select_with_meta

In [20]:
print(f'select_with_meta:\n{fstpy.select_with_meta.__doc__}')

df = fstpy.StandardFileReader('/fs/site4/eccc/cmd/w/sbf000/fstpy/source_data_5005.std').to_pandas()

uuvv_df = fstpy.select_with_meta(df, ['UU','VV'])

uuvv_df

select_with_meta:
Select fields with accompaning meta data  

    :param df: dataframe to select from  
    :type df: pd.DataFrame  
    :param nomvar: list of nomvars to select   
    :type nomvar: list  
    :raises SelectError: if dataframe is empty, if nothing to select or if variable not found in dataframe  
    :return: dataframe with selection results  
    :rtype: pd.DataFrame  
    


Unnamed: 0,nomvar,typvar,etiket,ni,nj,nk,dateo,ip1,ip2,ip3,...,ip2_kind,ip2_pkind,ip3_dec,ip3_kind,ip3_pkind,surface,follow_topography,ascending,interval,vctype
0,UU,P,R1_V710_N,1108,1082,1,442998800,75597472,6,0,...,10,H,0.000000E+00,10,H,True,True,True,,HYBRID_5005
1,VV,P,R1_V710_N,1108,1082,1,442998800,75597472,6,0,...,10,H,0.000000E+00,10,H,True,True,True,,HYBRID_5005
2,UU,P,R1_V710_N,1108,1082,1,442998800,96251080,6,0,...,10,H,0.000000E+00,10,H,False,True,False,,HYBRID_5005
3,UU,P,R1_V710_N,1108,1082,1,442998800,96188652,6,0,...,10,H,0.000000E+00,10,H,False,True,False,,HYBRID_5005
4,UU,P,R1_V710_N,1108,1082,1,442998800,95190655,6,0,...,10,H,0.000000E+00,10,H,False,True,False,,HYBRID_5005
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
169,VV,P,R1_V710_N,1108,1082,1,442998800,95117226,6,0,...,10,H,0.000000E+00,10,H,False,True,False,,HYBRID_5005
170,^^,X,R1_V710_N,1,1082,1,442998800,33792,77761,1,...,0,m,0.000000E+00,0,m,False,False,True,,UNKNOWN
171,>>,X,R1_V710_N,1108,1,1,442998800,33792,77761,1,...,0,m,0.000000E+00,0,m,False,False,True,,UNKNOWN
172,P0,P,R1_V710_N,1108,1082,1,442998800,0,6,0,...,10,H,0.000000E+00,10,H,False,False,False,,UNKNOWN


# metadata_cleanup

In [21]:
print(f'metadata_cleanup:\n{fstpy.metadata_cleanup.__doc__}')

df = fstpy.StandardFileReader('/fs/site4/eccc/cmd/w/sbf000/fstpy/source_data_5005.std').to_pandas()

meta_df = df.loc[df.nomvar.isin(['!!','^^','>>','^>','P0','PT'])]

uuvv_df = fstpy.select_with_meta(df, ['UU','VV'])

all_df = pd.concat([meta_df,uuvv_df], ignore_index=True)

print(f"before cleanup\n{all_df.loc[all_df.nomvar.isin(['!!','^^','>>','^>','P0','PT'])][['nomvar', 'typvar', 'etiket', 'ni', 'nj', 'nk','dateo', 'ip1', 'ip2', 'ip3', 'deet', 'npas', 'datyp', 'nbits', 'grtyp', 'ig1', 'ig2', 'ig3', 'ig4', 'datev']].to_string()}")

all_df = fstpy.metadata_cleanup(all_df)

print(f"after cleanup\n{all_df.loc[all_df.nomvar.isin(['!!','^^','>>','^>','P0','PT'])][['nomvar', 'typvar', 'etiket', 'ni', 'nj', 'nk','dateo', 'ip1', 'ip2', 'ip3', 'deet', 'npas', 'datyp', 'nbits', 'grtyp', 'ig1', 'ig2', 'ig3', 'ig4', 'datev']].to_string()}")

metadata_cleanup:
Cleans the metadata from a dataframe according to rules.   

    :param df: dataframe to clean  
    :type df: pd.DataFrame  
    :return: dataframe with only cleaned meta_data  
    :rtype: pd.DataFrame  
    
before cleanup
    nomvar typvar     etiket    ni    nj  nk      dateo    ip1    ip2  ip3  deet  npas  datyp  nbits grtyp    ig1    ig2    ig3    ig4      datev
0       P0      P  R1_V710_N  1108  1082   1  442998800      0      6    0   300    72    134     16     Z  33792  77761      1      0  443004200
1       >>      X  R1_V710_N  1108     1   1  442998800  33792  77761    1     0     0      5     32     E   1470    560  54400  46560  442998800
2       ^^      X  R1_V710_N     1  1082   1  442998800  33792  77761    1     0     0      5     32     E   1470    560  54400  46560  442998800
3       !!      X  R1_V710_N     3   175   1          0  33792  77761    0     0     0      5     64     X   5005      0    300   1500          0
4       >>      X  R1_V710

# to_dask / to_numpy

In [22]:
print(f'to_dask:\n{fstpy.to_dask.__doc__}')
print(f'to_numpy:\n{fstpy.to_numpy.__doc__}')

#get the data of the 0 row in the dataframe
dask_array = all_df.loc[0].d

print(dask_array)

print(fstpy.to_dask(dask_array))

numpy_array = fstpy.to_numpy(dask_array)

print(numpy_array)

print(fstpy.to_dask(numpy_array))

to_dask:
If the array is of dask type, no op, else comvert array to dask array

    :param arr: array to convert
    :type arr: np.ndarray|da.core.Array
    :raises ConversionError: Raised if not a numpy or dask array
    :return: a dask array
    :rtype: da.core.Array
    
to_numpy:
If the array is of numpy type, no op, else compute de daks array to get a numpy array

    :param arr: array to convert
    :type arr: np.ndarray|da.core.Array
    :raises ConversionError: Raised if not a numpy or dask array
    :return: a numpy array
    :rtype: np.ndarray
    
dask.array</fs/site4/eccc/cmd/w/sbf000/fstpy/source_data_5005.std:616459, shape=(1108, 1082), dtype=float32, chunksize=(1108, 1082), chunktype=numpy.ndarray>
dask.array</fs/site4/eccc/cmd/w/sbf000/fstpy/source_data_5005.std:616459, shape=(1108, 1082), dtype=float32, chunksize=(1108, 1082), chunktype=numpy.ndarray>
[[-6.270401  -6.6483307 -6.9207916 ... -2.714737  -3.1170807 -3.4950104]
 [-6.3768463 -6.7743073 -7.084854  ... -2.9510

# unit_convert_array

In [23]:
print(f'unit_convert_array:\n{fstpy.unit_convert_array.__doc__}')

print(fstpy.STDVAR.loc[fstpy.STDVAR.nomvar=='UU'].to_string())

print(fstpy.UNITS.loc[fstpy.UNITS.name=='knot'].to_string())

print(fstpy.UNITS.loc[fstpy.UNITS.name=='meter_per_second'].to_string())

print(f'UU in knots\n{numpy_array}')

converted_array = fstpy.unit_convert_array(numpy_array,'knot','meter_per_second')

print(f'UU in m/s\n{converted_array}')


unit_convert_array:
Converts the data to the specified unit provided in the to_unit_name parameter.

    :param arr: array to be converted
    :type df: np.ndarray
    :param from_unit_name: unit name to convert from
    :type from_unit_name: str
    :param to_unit_name: unit name to convert to, defaults to 'scalar'
    :type to_unit_name: str, optional
    :return: an array containing the converted data
    :rtype: np.ndarray
    
    nomvar                                           description_fr                                   description_en  unit
796     UU  Composante U du vent (selon l''axe des X sur la grille)  U-Component of the Wind (Along the grid X axis)  knot
    name symbol expression         bias       factor  mass  length  time  electricCurrent  temperature  amountOfSubstance  luminousIntensity
64  knot     kt   m·s^(-1) 0.000000E+00 5.144445E-01     0       1    -1                0            0                  0                  0
                name symbol expressi

# unit_convert

In [24]:
print(f'unit_convert:\n{fstpy.unit_convert.__doc__}')

uu_df = all_df.loc[all_df.nomvar=='UU']

uu_df = fstpy.add_unit_and_description_columns(uu_df)

print(f"Before conversion\n{uu_df[['nomvar', 'typvar', 'etiket', 'dateo', 'ip1', 'unit']]}")

uu_converted_df = fstpy.unit_convert(uu_df, 'meter_per_second')

print(f"After conversion\n{uu_converted_df[['nomvar', 'typvar', 'etiket', 'dateo', 'ip1', 'unit']]}")

unit_convert:
Converts the data portion 'd' of all the records of a dataframe to the specified unit
    provided in the to_unit_name parameter. If the standard_unit flag is True, the to_unit_name
    will be ignored and the unit will be based on the standard file variable dictionnary unit
    value instead. This ensures that if a unit conversion was done, the varaible will return
    to the proper standard file unit value. ex. : TT should be in celsius. o.dict can be consulted
    to get the appropriate unit values.

    :param df: dataframe containing records to be converted
    :type df: pd.DataFrame
    :param to_unit_name: unit name to convert to, defaults to 'scalar'
    :type to_unit_name: str, optional
    :param standard_unit: flag to indicate the use of dictionnary units, defaults to False
    :type standard_unit: bool, optional
    :return: a dataframe containing the converted data
    :rtype: pd.DataFrame
    
Before conversion
   nomvar typvar     etiket      dateo       ip

# fststat

In [25]:
print(f'fststat:\n{fstpy.fststat.__doc__}')

fstpy.fststat(all_df.loc[all_df.nomvar=='UU'])

fststat:
Produces summary statistics for a dataframe

    :param df: input dataframe
    :type df: pd.DataFrame
    
   nomvar typvar        level       ip1  ip2  ip3      dateo     etiket          mean          std       min_pos           min      max_pos          max
0      UU      P 1.000000E+01  75597472    6    0  442998800  R1_V710_N -9.081321E-01 8.378689E+00    (422, 660) -2.956825E+01   (830, 941) 3.202647E+01
1      UU      P 8.306640E-02  96251080    6    0  442998800  R1_V710_N -5.389307E+00 1.116624E+01     (610, 62) -4.683232E+01   (626, 426) 2.558956E+01
2      UU      P 7.682360E-02  96188652    6    0  442998800  R1_V710_N -7.306802E+00 1.178255E+01     (605, 79) -4.591711E+01   (612, 432) 1.854773E+01
3      UU      P 8.188150E-01  95190655    6    0  442998800  R1_V710_N -2.097447E-01 1.269552E+01    (705, 548) -4.305893E+01   (291, 598) 5.982975E+01
4      UU      P 7.954380E-01  95167278    6    0  442998800  R1_V710_N -2.559510E-01 1.300968E+01    (707, 548) -4.47

# voir

In [26]:
print(f'voir:\n{fstpy.voir.__doc__}')

fstpy.voir(all_df)

voir:
Displays the metadata of the supplied records in the rpn voir format

    nomvar typvar     etiket    ni    nj  nk               dateo       ip1    ip2  ip3  deet  npas datyp  nbits grtyp    ig1    ig2    ig3    ig4
0       !!      X  R1_V710_N     3   175   1                 NaT     33792  77761    0     0     0     E     64     X   5005      0    300   1500
1       >>      X  R1_V710_N  1108     1   1 2020-07-14 12:00:00     33792  77761    1     0     0     E     32     E   1470    560  54400  46560
2       P0      P  R1_V710_N  1108  1082   1 2020-07-14 12:00:00         0      6    0   300    72     f     16     Z  33792  77761      1      0
3       UU      P  R1_V710_N  1108  1082   1 2020-07-14 12:00:00  75597472      6    0   300    72     f     16     Z  33792  77761      1      0
4       UU      P  R1_V710_N  1108  1082   1 2020-07-14 12:00:00  95369342      6    0   300    72     f     16     Z  33792  77761      1      0
5       UU      P  R1_V710_N  1108  1082   1 202