# Main functions
 - StandardFileReader
 - maybeFST
 - get_basic_dataframe
 - StandardFileWriter
 - add_dask_column
 - add_grid_column
 - add_columns
 - compute
 - get_2d_lat_lon
 - select_with_meta
 - metadata_cleanup
 - to_dask
 - to_numpy
 - unit_convert
 - unit_convert_array
 - fststat
 - voir

In [1]:
import pandas as pd
import os
import multiprocessing as mp
import numpy.ma as ma
import datetime
import fstpy
import glob
from os import getenv

pd.options.mode.chained_assignment = None  # default='warn'

CMCGRIDF  = '/space/hall5/sitestore/eccc/prod/hubs/gridpt/dbase'
ATM_MODEL_DFILES = '/fs/ssm/eccc/mrd/rpn/MIG/GEM/d/gem-data/gem-data_4.2.0/gem-data_4.2.0_all/share/data/dfiles'

# StandardFileReader and maybeFST
- reads in the record information of the provided file(s)

In [2]:
print(f'StandardFileReader:\n{fstpy.StandardFileReader.__doc__}')
print(f'maybeFST:\n{fstpy.maybeFST.__doc__}')
directory = os.path.join(ATM_MODEL_DFILES, 'bcmk' , '**')
files = glob.glob(directory)
# check that the files are FST files
files = [f for f in files[:10] if fstpy.maybeFST(f)]
df = fstpy.StandardFileReader(files).to_pandas()
cols = list(df.columns)
cols.remove('d')
df[cols]

StandardFileReader:
Class to handle fst files. Opens, reads the contents of an fst file or files into a pandas dataframe and closes. Extra metadata columns are added to the dataframe if specified.    

        :param filenames: path to file or list of paths to files  
        :type filenames: str|pathlib.Path|list[str], does not accept wildcards (numpy has 
                         many tools for this)  
        :param decode_metadata: adds extra columns, defaults to False  
            'unit':str, unit name   
            'unit_converted':bool  
            'description':str, field description   
            'date_of_observation':datetime, of the date of observation   
            'date_of_validity': datetime, of the date of validity   
            'level':float32, decoded ip1 level   
            'ip1_kind':int32, decoded ip1 kind   
            'ip1_pkind':str, string repr of ip1_kind int   
            'data_type_str':str, string repr of data type   
            'label':str, label 



Unnamed: 0,nomvar,typvar,etiket,ni,nj,nk,dateo,ip1,ip2,ip3,...,npas,datyp,nbits,grtyp,ig1,ig2,ig3,ig4,datev,grid
0,P0,P,G133K80P,200,100,1,354514400,0,0,0,...,0,1,12,G,0,0,0,0,354514400,00
1,TT,P,G133K80P,200,100,1,354514400,97642568,0,0,...,0,1,12,G,0,0,0,0,354514400,00
2,TT,P,G133K80P,200,100,1,354514400,97738568,0,0,...,0,1,12,G,0,0,0,0,354514400,00
3,TT,P,G133K80P,200,100,1,354514400,97899568,0,0,...,0,1,12,G,0,0,0,0,354514400,00
4,TT,P,G133K80P,200,100,1,354514400,98152568,0,0,...,0,1,12,G,0,0,0,0,354514400,00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1582,I8,P,CCCMACISLDEO,200,100,1,323427143,0,8,0,...,0,1,12,G,0,0,0,0,323427143,00
1583,I8,P,CCCMACISLDEO,200,100,1,323427143,0,9,0,...,0,1,12,G,0,0,0,0,323427143,00
1584,I8,P,CCCMACISLDEO,200,100,1,323427143,0,10,0,...,0,1,12,G,0,0,0,0,323427143,00
1585,I8,P,CCCMACISLDEO,200,100,1,323427143,0,11,0,...,0,1,12,G,0,0,0,0,323427143,00


# Get a lot of files

In [3]:
fdate     = datetime.date.today().strftime('%Y%m%d') + '**'

directory = os.path.join(CMCGRIDF, 'prog', 'reghyb' , fdate)

files = glob.glob(directory)

with mp.Pool(processes=20) as pool:
    res = pool.map(fstpy.maybeFST, [file for file in files])  # runs in *only* one process

files = list(ma.masked_array(files, mask=[not elem for elem in res]))

print(f'found {len(files)} FST files')


found 170 FST files


# get_basic_dataframe (lightweight reader)

In [4]:
print(f'get_basic_dataframe:\n{fstpy.get_basic_dataframe.__doc__}')

with mp.Pool(processes=int(mp.cpu_count()/2)) as pool:
    df_list = pool.map(fstpy.get_basic_dataframe, [file for file in files])  # runs in *only* one process

df = pd.concat(df_list,ignore_index=True)
df

get_basic_dataframe:
Creates a dataframe of all non deleted records in an FST file, does not include data 'd'

    :param path: path of file to load
    :type path: str
    :return: dataframe of all non deleted records in an FST file
    :rtype: pd.DataFrame
    


Unnamed: 0,nomvar,typvar,etiket,ni,nj,nk,dateo,ip1,ip2,ip3,...,ig1,ig2,ig3,ig4,datev,lng,swa,key,path,shape
0,QC,P,R1_V810_N,1108,1082,1,465387200,95129878,70,0,...,32914,39184,1,0,465450200,111346,2335,1,/space/hall5/sitestore/eccc/prod/hubs/gridpt/d...,"(1108, 1082)"
1,HU,P,R1_V810_N,1108,1082,1,465387200,95129878,70,0,...,32914,39184,1,0,465450200,354720,58008,1025,/space/hall5/sitestore/eccc/prod/hubs/gridpt/d...,"(1108, 1082)"
2,GZ,P,R1_V810_N,1108,1082,1,465387200,95103960,70,0,...,32914,39184,1,0,465450200,203072,235368,2049,/space/hall5/sitestore/eccc/prod/hubs/gridpt/d...,"(1108, 1082)"
3,GZ,P,R1_V810_N,1108,1082,1,465387200,95064082,70,0,...,32914,39184,1,0,465450200,202284,336904,3073,/space/hall5/sitestore/eccc/prod/hubs/gridpt/d...,"(1108, 1082)"
4,ES,P,R1_V810_N,1108,1082,1,465387200,95103960,70,0,...,32914,39184,1,0,465450200,387402,438046,4097,/space/hall5/sitestore/eccc/prod/hubs/gridpt/d...,"(1108, 1082)"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
322867,UDST,P,R1_V810_N,1104,1078,1,465387200,59968832,30,0,...,57711,96444,1,0,465414200,33234,163749388,3774465,/space/hall5/sitestore/eccc/prod/hubs/gridpt/d...,"(1104, 1078)"
322868,QD,P,R1_V810_N,1104,1078,1,465387200,95749123,30,0,...,57711,96444,1,0,465414200,17406,163766005,3775489,/space/hall5/sitestore/eccc/prod/hubs/gridpt/d...,"(1104, 1078)"
322869,>>,X,R1_V810_N,1104,1,1,465387200,57711,96444,1,...,1470,560,54400,46560,465387200,1128,163774708,3776513,/space/hall5/sitestore/eccc/prod/hubs/gridpt/d...,"(1104, 1)"
322870,^^,X,R1_V810_N,1,1078,1,465387200,57711,96444,1,...,1470,560,54400,46560,465387200,1102,163775272,3777537,/space/hall5/sitestore/eccc/prod/hubs/gridpt/d...,"(1, 1078)"


# StandardFileWriter, add_grid_column, add_dask_column

In [25]:
# setup our output file
user = getenv("USER")
out_file = f'/home/{user}/TT.std'
if os.path.isfile(out_file):
    os.remove(out_file)


print(f'StandardFileWriter:\n{fstpy.StandardFileWriter.__doc__}')
print(f'add_grid_column:\n{fstpy.add_grid_column.__doc__}')
print(f'add_dask_column:\n{fstpy.add_dask_column.__doc__}')

# the basic dataframe has no grid column
df = fstpy.add_grid_column(df)

# get all the TT
tt_df = df.loc[(df.nomvar=='TT')]

# get the first grid
grid = tt_df.iloc[0].grid

# select TT with first grid only
tt_df = tt_df.loc[(tt_df.grid==grid) & (tt_df.dateo==465387200) & (tt_df.datev==465450200) & (tt_df.ip2==70) & (tt_df.ip1==95791989)]

# get TT's horizontal and vertical grid fields
grid_meta_df = df.loc[(df.nomvar.isin(['>>', '^^', '^>', '!!', 'PT'])) & (df.grid==grid)]
p0_meta_df = df.loc[(df.nomvar.isin(['P0'])) & (df.grid==grid) & (df.ip2==70)]

# join all the rows together
tt_df = pd.concat([tt_df,grid_meta_df,p0_meta_df], ignore_index=True)

# cleanup the dataframe
tt_df = tt_df.drop_duplicates(subset=['nomvar', 'typvar', 'etiket', 'ni', 'nj', 'nk', 'dateo', 'ip1', 'ip2','ip3', 'deet', 'npas', 'datyp', 'nbits', 'grtyp', 'ig1', 'ig2', 'ig3','ig4', 'datev'])

# the basic dataframe needs the 'd' column
tt_df = fstpy.add_dask_column(tt_df) # attention on dois changer le dateo pour la date courante

# write the results
fstpy.StandardFileWriter(out_file, tt_df).to_fst()

# check the results
display(fstpy.get_basic_dataframe(out_file))

os.remove(out_file)

StandardFileWriter:
Writes a standard file Dataframe to file. If no metada fields like ^^ and >> are found,
    an attempt will be made to load them from the original file so that they can be added to the output if not already present

    :param filename: path of file to write to
    :type filename: str
    :param df: dataframe to write
    :type df: pd.DataFrame
    :param mode: In 'dump' mode, no processing will be done on the dataframe 
                before writing, data must be present in the dataframe (df = compute(df)).
                If set to 'update', path must be an existing file. Only the 
                field metadata will be updated, the data itself will not be 
                modified. In 'write' mode, the data will be loaded, metadata 
                fields like '>>' will be added if not present default 'write'
    :type mode: str
    :param no_meta: if true these fields ["^>", ">>", "^^", "!!", "!!SF", "HY", "P0", "PT", "E1"] will be removed from the dataframe
  

  toctoc_fields_df = pd.concat(df_list, ignore_index=True)
  new_df = pd.concat([grid_deformation_fields_df, p0_fields_df,


Read(993) !!   X  R1_V810_N           3     175     1  000000000        32914     39184         0        0        0  E 64  X  5005     0   300  1500
Read(993) TT   P  R1_V810_N        1108    1082     1  465387200     95791989        70         0      300      840  f 16  Z 32914 39184     1     0
Read(993) P0   P  R1_V810_N        1108    1082     1  465387200            0        70         0      300      840  f 16  Z 32914 39184     1     0
Read(993) >>   X  R1_V810_N        1108       1     1  465387200        32914     39184         1        0        0  E 32  E  1470   560 54400 46560
Read(993) ^^   X  R1_V810_N           1    1082     1  465387200        32914     39184         1        0        0  E 32  E  1470   560 54400 46560
Read(993) !!   X  R1_V810_N           3     175     1  000000000        32914     39184         0        0        0  E 64  X  5005     0   300  1500
Read(992) P0   P  R1_V810_N        1108    1082     1  465392600            0        70         0      300

Unnamed: 0,nomvar,typvar,etiket,ni,nj,nk,dateo,ip1,ip2,ip3,...,ig1,ig2,ig3,ig4,datev,lng,swa,key,path,shape
0,^^,X,R1_V810_N,1,1082,1,465387200,32914,39184,1,...,1470,560,54400,46560,465387200,1106,2335,9,/home/sbf000/TT.std,"(1, 1082)"
1,>>,X,R1_V810_N,1108,1,1,465387200,32914,39184,1,...,1470,560,54400,46560,465387200,1132,2888,1033,/home/sbf000/TT.std,"(1108, 1)"
2,P0,P,R1_V810_N,1108,1082,1,465387200,0,70,0,...,32914,39184,1,0,465450200,208054,3454,2057,/home/sbf000/TT.std,"(1108, 1082)"
3,P0,P,R1_V810_N,1108,1082,1,465392600,0,70,0,...,32914,39184,1,0,465455600,206954,107481,3081,/home/sbf000/TT.std,"(1108, 1082)"
4,!!,X,R1_V810_N,3,175,1,0,32914,39184,0,...,5005,0,300,1500,0,1074,210958,4105,/home/sbf000/TT.std,"(3, 175)"
5,TT,P,R1_V810_N,1108,1082,1,465387200,95791989,70,0,...,32914,39184,1,0,465450200,228506,211495,5129,/home/sbf000/TT.std,"(1108, 1082)"


# add_columns

In [26]:
print(f'add_columns:\n{fstpy.add_columns.__doc__}')

df = fstpy.StandardFileReader('/home/sbf000/ss5/source_data_5005.std').to_pandas()

print(df.columns)

orig_cols = set(list(df.columns))

# add decode ips
df = fstpy.add_columns(df, 'ip_info')

new_cols = set(list(df.columns))

print(df.columns)

diff = new_cols.difference(orig_cols)

print(f'newly added columns {diff}')

df[list(diff)]

add_columns:
If valid columns are provided, they will be added. 
       These include ['flags','etiket','unit','dateo','datev','forecast_hour', 'datyp','ip_info']
       Replaces original column(s) if present.   

    :param df: dataframe to modify (meta data needs to be present in dataframe)
    :type df: pd.DataFrame
    :param decode: if decode is True, add the specified columns
    :type decode: bool
    :param columns: [description], defaults to  ['flags','etiket','unit','dateo','datev','forecast_hour', 'datyp','ip_info']
    :type columns: list[str], optional
    
Index(['nomvar', 'typvar', 'etiket', 'ni', 'nj', 'nk', 'dateo', 'ip1', 'ip2',
       'ip3', 'deet', 'npas', 'datyp', 'nbits', 'grtyp', 'ig1', 'ig2', 'ig3',
       'ig4', 'datev', 'grid', 'd'],
      dtype='object')
Index(['nomvar', 'typvar', 'etiket', 'ni', 'nj', 'nk', 'dateo', 'ip1', 'ip2',
       'ip3', 'deet', 'npas', 'datyp', 'nbits', 'grtyp', 'ig1', 'ig2', 'ig3',
       'ig4', 'datev', 'grid', 'd', 'level', 'ip1_ki

Unnamed: 0,ip3_dec,ip1_pkind,follow_topography,ip2_pkind,level,surface,ip3_kind,ip2_kind,ip3_pkind,ip2_dec,ip1_kind,interval,ascending,vctype
0,0.0,mb,False,H,0.0,False,100,10,,6.0,2,,False,HYBRID_5005
1,0.0,M,True,H,1.5,True,100,10,,6.0,4,,True,HYBRID_5005
2,0.0,M,True,H,1.5,True,100,10,,6.0,4,,True,HYBRID_5005
3,0.0,M,True,H,1.5,True,100,10,,6.0,4,,True,HYBRID_5005
4,0.0,M,True,H,1.5,True,100,10,,6.0,4,,True,HYBRID_5005
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1869,4.0,,False,,50460.0,False,100,100,,53326.0,100,,True,UNKNOWN
1870,4.0,,False,,50460.0,False,100,100,,53326.0,100,,True,UNKNOWN
1871,1.0,,False,,35132.0,False,100,100,,56748.0,100,,True,UNKNOWN
1872,1.0,,False,,35132.0,False,100,100,,56748.0,100,,True,UNKNOWN


# compute

In [27]:
print(f'compute:\n{fstpy.compute.__doc__}')

df = fstpy.StandardFileReader('/home/sbf000/ss5/source_data_5005.std').to_pandas()

df = df.loc[df.nomvar=='TT']

tt_df = pd.DataFrame([df.iloc[0].to_dict()])

print(f'\ndask array \n{tt_df.iloc[0].d}')

tt_df = fstpy.compute(tt_df)

print(f'\nnumpy array \n{tt_df.iloc[0].d}')

compute:
Converts all dask arrays contained in the 'd' column, by numpy arrays

    :param df: input DataFrame
    :type df: pd.DataFrame
    :param remove_path_and_key: remove path and key column after conversion, defaults to True
    :type remove_path_and_key: bool, optional
    :return: modified dataframe with numpy arrays instead of dask arrays
    :rtype: pd.DataFrame
    

dask array 
dask.array</home/sbf000/ss5/source_data_5005.std:4105, shape=(1108, 1082), dtype=float32, chunksize=(1108, 1082), chunktype=numpy.ndarray>
Read(991) TT   P  R1_V710_N        1108    1082     1  442998800     95178882         6         0      300       72  f 16  Z 33792 77761     1     0

numpy array 
[[15.86026  15.958893 16.014557 ... 10.861237 10.938385 10.963776]
 [15.885651 15.991119 16.063385 ... 10.924713 10.991119 10.979401]
 [15.901276 15.984283 16.064362 ... 11.001862 11.059479 11.045807]
 ...
 [14.913971 14.603424 14.750885 ... 10.505768 10.909088 11.204987]
 [14.729401 14.597565 14.745026

# get_2d_lat_lon_df

In [28]:
print(f'get_2d_lat_lon_df:\n{fstpy.get_2d_lat_lon_df.__doc__}')

df = fstpy.StandardFileReader('/home/sbf000/ss5//source_data_5005.std').to_pandas()

tt_df = df.loc[df.nomvar=='TT']

lat_lon_df = fstpy.get_2d_lat_lon_df(tt_df)

lat_lon_df

get_2d_lat_lon_df:
Gets the latitudes and longitudes as 2d arrays associated with the supplied grids

    :return: a pandas Dataframe object containing the lat and lon meta data of the grids
    :rtype: pd.DataFrame
    :raises Get2DLatLonError: no records to process
    
Read(990) ^^   X  R1_V710_N           1    1082     1  442998800        33792     77761         1        0        0  E 32  E  1470   560 54400 46560
Read(990) >>   X  R1_V710_N        1108       1     1  442998800        33792     77761         1        0        0  E 32  E  1470   560 54400 46560


Unnamed: 0,nomvar,typvar,etiket,ni,nj,nk,dateo,ip1,ip2,ip3,...,grtyp,ig1,ig2,ig3,ig4,datev,grid,d,path,key
0,LA,P,R1_V710_N,1108,1082,1,442998800,95178882,6,0,...,Z,33792,77761,1,0,443004200,3379277761,"[[-8.059764, -8.0017, -7.9436274, -7.885541, -...",/home/sbf000/ss5/source_data_5005.std,4106
1,LO,P,R1_V710_N,1108,1082,1,442998800,95178882,6,0,...,Z,33792,77761,1,0,443004200,3379277761,"[[231.28516, 231.21573, 231.14629, 231.07687, ...",/home/sbf000/ss5/source_data_5005.std,4106


# select_with_meta

In [29]:
print(f'select_with_meta:\n{fstpy.select_with_meta.__doc__}')

df = fstpy.StandardFileReader('/home/sbf000/ss5/source_data_5005.std').to_pandas()

uuvv_df = fstpy.select_with_meta(df, ['UU','VV'])

uuvv_df

select_with_meta:
Select fields with accompaning meta data  

    :param df: dataframe to select from  
    :type df: pd.DataFrame  
    :param nomvar: list of nomvars to select   
    :type nomvar: list  
    :raises SelectError: if dataframe is empty, if nothing to select or if variable not found in dataframe  
    :return: dataframe with selection results  
    :rtype: pd.DataFrame  
    
Read(991) !!   X  R1_V710_N           3     175     1  000000000        33792     77761         0        0        0  E 64  X  5005     0   300  1500
Read(991) !!   X  R1_V710_N           3     175     1  000000000        35132     56748         0        0        0  E 64  X  5005     0   300  1500


  p0_fields_df = pd.concat(df_list, ignore_index=True)
  toctoc_fields_df = pd.concat(df_list, ignore_index=True)
  new_df = pd.concat([grid_deformation_fields_df, p0_fields_df,


Unnamed: 0,nomvar,typvar,etiket,ni,nj,nk,dateo,ip1,ip2,ip3,...,ip2_kind,ip2_pkind,ip3_dec,ip3_kind,ip3_pkind,surface,follow_topography,ascending,interval,vctype
0,^^,X,R1_V710_N,1,1082,1,442998800,33792,77761,1,...,100,,1.0,100,,False,False,True,,UNKNOWN
1,>>,X,R1_V710_N,1108,1,1,442998800,33792,77761,1,...,100,,1.0,100,,False,False,True,,UNKNOWN
2,P0,P,R1_V710_N,1108,1082,1,442998800,0,6,0,...,10,H,0.0,100,,False,False,False,,UNKNOWN
3,!!,X,R1_V710_N,3,175,1,0,33792,77761,0,...,100,,0.0,100,,False,False,True,,UNKNOWN
4,UU,P,R1_V710_N,1108,1082,1,442998800,75597472,6,0,...,10,H,0.0,100,,True,True,True,,HYBRID_5005
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
169,VV,P,R1_V710_N,1108,1082,1,442998800,95621663,6,0,...,10,H,0.0,100,,False,True,False,,HYBRID_5005
170,VV,P,R1_V710_N,1108,1082,1,442998800,94571508,6,0,...,10,H,0.0,100,,False,True,False,,HYBRID_5005
171,VV,P,R1_V710_N,1108,1082,1,442998800,95142744,6,0,...,10,H,0.0,100,,False,True,False,,HYBRID_5005
172,VV,P,R1_V710_N,1108,1082,1,442998800,94701435,6,0,...,10,H,0.0,100,,False,True,False,,HYBRID_5005


# metadata_cleanup

In [30]:
print(f'metadata_cleanup:\n{fstpy.metadata_cleanup.__doc__}')

df = fstpy.StandardFileReader('/home/sbf000/ss5/source_data_5005.std').to_pandas()

meta_df = df.loc[df.nomvar.isin(['!!','^^','>>','^>','P0','PT'])]

uuvv_df = fstpy.select_with_meta(df, ['UU','VV'])

all_df = pd.concat([meta_df,uuvv_df], ignore_index=True)

print(f"before cleanup\n{all_df.loc[all_df.nomvar.isin(['!!','^^','>>','^>','P0','PT'])][['nomvar', 'typvar', 'etiket', 'ni', 'nj', 'nk','dateo', 'ip1', 'ip2', 'ip3', 'deet', 'npas', 'datyp', 'nbits', 'grtyp', 'ig1', 'ig2', 'ig3', 'ig4', 'datev']].to_string()}")

all_df = fstpy.metadata_cleanup(all_df)

print(f"after cleanup\n{all_df.loc[all_df.nomvar.isin(['!!','^^','>>','^>','P0','PT'])][['nomvar', 'typvar', 'etiket', 'ni', 'nj', 'nk','dateo', 'ip1', 'ip2', 'ip3', 'deet', 'npas', 'datyp', 'nbits', 'grtyp', 'ig1', 'ig2', 'ig3', 'ig4', 'datev']].to_string()}")

metadata_cleanup:
Cleans the metadata from a dataframe according to rules.   

    :param df: dataframe to clean  
    :type df: pd.DataFrame  
    :return: dataframe with only cleaned meta_data  
    :rtype: pd.DataFrame  
    
Read(991) !!   X  R1_V710_N           3     175     1  000000000        33792     77761         0        0        0  E 64  X  5005     0   300  1500
Read(991) !!   X  R1_V710_N           3     175     1  000000000        35132     56748         0        0        0  E 64  X  5005     0   300  1500
before cleanup
   nomvar typvar     etiket    ni    nj nk      dateo    ip1    ip2 ip3 deet npas datyp nbits grtyp    ig1    ig2    ig3    ig4      datev
0      P0      P  R1_V710_N  1108  1082  1  442998800      0      6   0  300   72   134    16     Z  33792  77761      1      0  443004200
1      >>      X  R1_V710_N  1108     1  1  442998800  33792  77761   1    0    0     5    32     E   1470    560  54400  46560  442998800
2      ^^      X  R1_V710_N     1  1082  

  p0_fields_df = pd.concat(df_list, ignore_index=True)
  toctoc_fields_df = pd.concat(df_list, ignore_index=True)
  new_df = pd.concat([grid_deformation_fields_df, p0_fields_df,
  all_df = pd.concat([meta_df,uuvv_df], ignore_index=True)


# to_dask / to_numpy

In [31]:
print(f'to_dask:\n{fstpy.to_dask.__doc__}')
print(f'to_numpy:\n{fstpy.to_numpy.__doc__}')

#get the data of the 0 row in the dataframe
dask_array = all_df.loc[0].d

print(dask_array)

print(fstpy.to_dask(dask_array))

numpy_array = fstpy.to_numpy(dask_array)

print(numpy_array)

print(fstpy.to_dask(numpy_array))

to_dask:
If the array is of dask type, no op, else comvert array to dask array

    :param arr: array to convert
    :type arr: np.ndarray|da.core.Array
    :raises ConversionError: Raised if not a numpy or dask array
    :return: a dask array
    :rtype: da.core.Array
    
to_numpy:
If the array is of numpy type, no op, else compute de daks array to get a numpy array

    :param arr: array to convert
    :type arr: np.ndarray|da.core.Array
    :raises ConversionError: Raised if not a numpy or dask array
    :return: a numpy array
    :rtype: np.ndarray
    
dask.array</home/sbf000/ss5/source_data_5005.std:2265098, shape=(1, 1082), dtype=float32, chunksize=(1, 1082), chunktype=numpy.ndarray>
dask.array</home/sbf000/ss5/source_data_5005.std:2265098, shape=(1, 1082), dtype=float32, chunksize=(1, 1082), chunktype=numpy.ndarray>
Read(991) ^^   X  R1_V710_N           1    1082     1  442998800        33792     77761         1        0        0  E 32  E  1470   560 54400 46560
[[-48.78     -

# unit_convert_array

In [32]:
print(f'unit_convert_array:\n{fstpy.unit_convert_array.__doc__}')

print(fstpy.STDVAR.loc[fstpy.STDVAR.nomvar=='UU'].to_string())

print(fstpy.UNITS.loc[fstpy.UNITS.name=='knot'].to_string())

print(fstpy.UNITS.loc[fstpy.UNITS.name=='meter_per_second'].to_string())

print(f'UU in knots\n{numpy_array}')

converted_array = fstpy.unit_convert_array(numpy_array,'knot','meter_per_second')

print(f'UU in m/s\n{converted_array}')


unit_convert_array:
Converts the data to the specified unit provided in the to_unit_name parameter.

    :param arr: array to be converted
    :type df: np.ndarray
    :param from_unit_name: unit name to convert from
    :type from_unit_name: str
    :param to_unit_name: unit name to convert to, defaults to 'scalar'
    :type to_unit_name: str, optional
    :return: an array containing the converted data
    :rtype: np.ndarray
    
    nomvar                                           description_fr                                   description_en  unit
796     UU  Composante U du vent (selon l''axe des X sur la grille)  U-Component of the Wind (Along the grid X axis)  knot
    name symbol expression  bias    factor  mass  length  time  electricCurrent  temperature  amountOfSubstance  luminousIntensity
64  knot     kt   m·s^(-1)   0.0  0.514444     0       1    -1                0            0                  0                  0
                name symbol expression  bias  factor  ma

# unit_convert

In [33]:
print(f'unit_convert:\n{fstpy.unit_convert.__doc__}')

uu_df = all_df.loc[all_df.nomvar=='UU']

uu_df = fstpy.add_unit_and_description_columns(uu_df)

print(f"Before conversion\n{uu_df[['nomvar', 'typvar', 'etiket', 'dateo', 'ip1', 'unit']]}")

uu_converted_df = fstpy.unit_convert(uu_df, 'meter_per_second')

print(f"After conversion\n{uu_converted_df[['nomvar', 'typvar', 'etiket', 'dateo', 'ip1', 'unit']]}")

unit_convert:
Converts the data portion 'd' of all the records of a dataframe to the specified unit
    provided in the to_unit_name parameter. If the standard_unit flag is True, the to_unit_name
    will be ignored and the unit will be based on the standard file variable dictionnary unit
    value instead. This ensures that if a unit conversion was done, the varaible will return
    to the proper standard file unit value. ex. : TT should be in celsius. o.dict can be consulted
    to get the appropriate unit values.

    :param df: dataframe containing records to be converted
    :type df: pd.DataFrame
    :param to_unit_name: unit name to convert to, defaults to 'scalar'
    :type to_unit_name: str, optional
    :param standard_unit: flag to indicate the use of dictionnary units, defaults to False
    :type standard_unit: bool, optional
    :return: a dataframe containing the converted data
    :rtype: pd.DataFrame
    
Before conversion
   nomvar typvar     etiket      dateo       ip

  res_df = pd.concat([res_df,meta_df],ignore_index=True)


# fststat

In [34]:
print(f'fststat:\n{fstpy.fststat.__doc__}')

fstpy.fststat(all_df.loc[all_df.nomvar=='UU'])

fststat:
Produces summary statistics for a dataframe

    :param df: input dataframe
    :type df: pd.DataFrame
    
Read(991) UU   P  R1_V710_N        1108    1082     1  442998800     75597472         6         0      300       72  f 16  Z 33792 77761     1     0
Read(991) UU   P  R1_V710_N        1108    1082     1  442998800     96251080         6         0      300       72  f 16  Z 33792 77761     1     0
Read(991) UU   P  R1_V710_N        1108    1082     1  442998800     96188652         6         0      300       72  f 16  Z 33792 77761     1     0
Read(991) UU   P  R1_V710_N        1108    1082     1  442998800     95190655         6         0      300       72  f 16  Z 33792 77761     1     0
Read(991) UU   P  R1_V710_N        1108    1082     1  442998800     95167278         6         0      300       72  f 16  Z 33792 77761     1     0
Read(991) UU   P  R1_V710_N        1108    1082     1  442998800     95540371         6         0      300       72  f 16  Z 33792 77761  

# voir

In [35]:
print(f'voir:\n{fstpy.voir.__doc__}')

fstpy.voir(all_df)

voir:
Displays the metadata of the supplied records in the rpn voir format

    nomvar typvar     etiket    ni    nj nk               dateo       ip1    ip2 ip3 deet npas datyp nbits grtyp    ig1    ig2    ig3    ig4
0       !!      X  R1_V710_N     3   175  1                 NaT     33792  77761   0    0    0     E    64     X   5005      0    300   1500
1       >>      X  R1_V710_N  1108     1  1 2020-07-14 12:00:00     33792  77761   1    0    0     E    32     E   1470    560  54400  46560
2       P0      P  R1_V710_N  1108  1082  1 2020-07-14 12:00:00         0      6   0  300   72     f    16     Z  33792  77761      1      0
3       UU      P  R1_V710_N  1108  1082  1 2020-07-14 12:00:00  75597472      6   0  300   72     f    16     Z  33792  77761      1      0
4       UU      P  R1_V710_N  1108  1082  1 2020-07-14 12:00:00  95369342      6   0  300   72     f    16     Z  33792  77761      1      0
5       UU      P  R1_V710_N  1108  1082  1 2020-07-14 12:00:00  95364364     