In [1]:
%load_ext jupyter_black

import os
from glob import glob
import pandas as pd
from wxlab.forecast import Grib


galwem_data = pd.Series(sorted(glob(os.path.join("data", "galwem", "*.GR2"))), name="GALWEM")
galwem_times: pd.DataFrame = galwem_data.str.extract(r"FH.(?P<forecast_hour>\d{3})_DF__(?P<valid_time>\d{8})")
galwem_data.index = pd.to_datetime(galwem_times["valid_time"]) + pd.to_timedelta(
    galwem_times["forecast_hour"].astype(int), unit="h"
)
galwem_data

  class GeoAccessor:


2022-05-20 00:00:00    data/galwem/PS.557WW_SC.U_DI.C_GP.GALWEM-GD_GR...
2022-05-20 03:00:00    data/galwem/PS.557WW_SC.U_DI.C_GP.GALWEM-GD_GR...
2022-05-20 06:00:00    data/galwem/PS.557WW_SC.U_DI.C_GP.GALWEM-GD_GR...
Name: GALWEM, dtype: object

In [16]:
grib = Grib(galwem_data)
grib

<wxlab.forecast.Grib at 0x7f2c54330850>

In [20]:
# the dataset is 4 demensional ( T, X, Y, Z )
ds = grib.to_dataset()
ds

In [21]:
# when called to a dataframe the Z axis is moved to the second axis of dataframe
idx: slice = pd.IndexSlice
df = grib.to_dataframe()
df[idx[3000, "temp"]]

validTime            lat   lon   
1653004800000000000  20.0  230.00    218.069992
                           230.25    218.129990
                           230.50    218.220001
                           230.75    218.199997
                           231.00    218.059998
                                        ...    
1653026400000000000  55.0  299.00    224.259995
                           299.25    224.299988
                           299.50    224.349991
                           299.75    224.449997
                           300.00    224.580002
Name: (3000.0, temp), Length: 118863, dtype: float32

In [22]:
# with pyarrow the data can be turned into a table
table = grib.to_table()
table

pyarrow.Table
temp: float
u_wind: float
v_wind: float
validTime: int64
lat: double
lon: double
hPa: double
----
temp: [[218.06999,207.84999,199.87,202.09,209.4,...,237.04999,248.26999,265.43,268.47,275.25]]
u_wind: [[-9.2,-6.2799997,-4.3399997,6.8799996,24.769997,...,7.240002,7.27,8.03,9.889999,4.62]]
v_wind: [[0.31,-1.43,2.1299999,13.25,19.359999,...,3.4499998,1.1899999,-1.88,-5.46,2.53]]
validTime: [[1653004800000000000,1653004800000000000,1653004800000000000,1653004800000000000,1653004800000000000,...,1653026400000000000,1653026400000000000,1653026400000000000,1653026400000000000,1653026400000000000]]
lat: [[20,20,20,20,20,...,55,55,55,55,55]]
lon: [[230,230,230,230,230,...,300,300,300,300,300]]
hPa: [[3000,5000,7000,10000,15000,...,40000,50000,70000,85000,100000]]

In [23]:
# that table can be saved as a parquet file which can maintain the context of the multi index
grib.to_parquet("data/GALWEM.parquet")

In [24]:
# that parquet can be read back into Grib
grib2 = Grib("data/GALWEM.parquet")
grib2

<wxlab.forecast.Grib at 0x7f2c54355540>

In [25]:
# loading the grib from the parquet took 0.6 seconds versus the from the raw grib data which took almost 30 seconds
ds2 = grib2.to_dataset()
ds2

In [19]:
grib.to_dataframe()

Unnamed: 0_level_0,Unnamed: 1_level_0,hPa,3000.0,5000.0,7000.0,10000.0,15000.0,20000.0,25000.0,30000.0,40000.0,50000.0,...,10000.0,15000.0,20000.0,25000.0,30000.0,40000.0,50000.0,70000.0,85000.0,100000.0
Unnamed: 0_level_1,Unnamed: 1_level_1,elements,temp,temp,temp,temp,temp,temp,temp,temp,temp,temp,...,v_wind,v_wind,v_wind,v_wind,v_wind,v_wind,v_wind,v_wind,v_wind,v_wind
validTime,lat,lon,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2
1653004800000000000,20.0,230.00,218.069992,207.849991,199.869995,202.089996,209.399994,217.589996,229.019989,239.429993,252.839996,264.070007,...,13.250000,19.359999,21.720005,22.680000,21.879995,10.370000,2.42,0.02,-3.280002,-5.17
1653004800000000000,20.0,230.25,218.129990,207.739990,199.839996,202.019989,209.389999,217.440002,229.080002,239.479996,253.039993,264.149994,...,13.090000,19.580000,22.160004,23.789999,21.439995,10.130000,2.67,0.37,-3.070002,-5.27
1653004800000000000,20.0,230.50,218.220001,207.629990,199.819992,201.959991,209.259995,217.289993,229.059998,239.519989,253.279999,264.119995,...,12.840000,20.020000,22.320004,24.529999,20.999994,9.849999,2.82,0.31,-3.020002,-5.41
1653004800000000000,20.0,230.75,218.199997,207.529999,199.879990,201.899994,209.069992,217.139999,228.929993,239.619995,253.509995,264.089996,...,12.599999,20.459999,22.310005,25.070000,20.659994,9.429999,2.85,-0.11,-3.030002,-5.55
1653004800000000000,20.0,231.00,218.059998,207.440002,199.879990,201.869995,208.889999,217.259995,228.860001,239.669998,253.569992,264.209991,...,12.360000,20.740000,22.300005,25.519999,20.219995,8.920000,3.03,-0.62,-3.040002,-5.49
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1653026400000000000,55.0,299.00,224.259995,221.699997,221.879990,221.019989,226.190002,226.529999,224.220001,223.690002,236.720001,248.189987,...,3.380000,4.000000,5.960000,5.550000,2.030000,-3.230000,-1.01,-2.29,-2.350000,1.21
1653026400000000000,55.0,299.25,224.299988,221.720001,221.750000,221.099991,226.000000,226.589996,224.129990,223.940002,236.739990,248.139999,...,3.500000,4.050000,6.030000,5.650000,4.220000,-2.010000,0.11,-1.85,-2.890000,2.13
1653026400000000000,55.0,299.50,224.349991,221.720001,221.639999,221.139999,225.839996,226.549988,224.049988,224.139999,236.759995,248.080002,...,3.590000,4.130000,6.070000,5.750000,6.640000,-0.350000,1.13,-1.87,-3.650000,3.11
1653026400000000000,55.0,299.75,224.449997,221.709991,221.559998,221.220001,225.669998,226.549988,223.899994,224.159988,236.860001,248.110001,...,3.640000,4.220000,6.090000,5.930000,8.990000,1.460000,1.52,-1.97,-4.550000,3.10
