### Extract GRIB Data

In [1]:
import pandas as pd
import xarray as xr
import os

In [None]:
def GRIB_to_df(region, filepath):
    """
    Converts a GRIB file to a CSV file. 
    Inputs:
        - filepath: string containing the path to the GRIB data
        - region: string denoting the region of the data
    Returns:
    """
    # Open GRIB file using cfgrib engine
    ds = xr.open_dataset(filepath, engine="cfgrib")

    print(ds) # debugging

    # Average across latitude and longitude
    regional_means = ds.mean(dim=["latitude", "longitude"])

    # Make DataFrame
    df = regional_means.to_dataframe()

    # Drop unnecessary columns
    df.drop(columns=['valid_time', 'surface', 'step'], inplace=True)

    # Rename columns
    df.rename(columns={
        't2m': f'temp_2m_{region}',
        'u100': f'wind_u_100m_{region}',
        'v100': f'wind_v_100m_{region}'
    }, inplace=True)

    return df


In [None]:
# Define file path
filepath_dict = {'south':'raw_era5/era5_south.grib', 
             'north':'raw_era5/era5_north.grib',
             'east':'raw_era5/era5_east.grib', 
             'west':'raw_era5/era5_west.grib'}

region_dfs = []

# Convert each GRIB file into a dataframe and save in list
for region in filepath_dict:
    region_df = GRIB_to_df(region, filepath_dict[region])
    region_dfs.append(region_df)

# Merge dataframes into one
df_merged = pd.concat(region_dfs, axis=1)

In [None]:
# Save to CSV
clean_data_path = os.path.join('..', 'clean_data')
output_path = os.path.join(clean_data_path, 'era5_wind_conditions_data.csv')
df_merged.to_csv(output_path)

print('Sucessfully saved as a CSV!')

Sucessfully saved as a CSV!


## Extract the Validation Dataset

This data ranges from March 1, 2025 to April 17, 2025. 

In [3]:
# Define file path
filepath_dict = {'south':'raw_era5_validation/era5_south_validation.grib', 
             'north':'raw_era5_validation/era5_north_validation.grib',
             'east':'raw_era5_validation/era5_east_validation.grib', 
             'west':'raw_era5_validation/era5_west_validation.grib'}

region_dfs = []

# Convert each GRIB file into a dataframe and save in list
for region in filepath_dict:
    region_df = GRIB_to_df(region, filepath_dict[region])
    region_dfs.append(region_df)

# Merge dataframes into one
df_validation_merged = pd.concat(region_dfs, axis=1)

  vars, attrs, coord_names = xr.conventions.decode_cf_variables(


<xarray.Dataset> Size: 7MB
Dimensions:     (time: 1131, latitude: 22, longitude: 23)
Coordinates:
    number      int64 8B ...
  * time        (time) datetime64[ns] 9kB 2025-03-01 ... 2025-04-17T02:00:00
    step        timedelta64[ns] 8B ...
    surface     float64 8B ...
  * latitude    (latitude) float64 176B 31.13 30.88 30.63 ... 26.38 26.13 25.88
  * longitude   (longitude) float64 184B -101.4 -101.1 -100.9 ... -96.14 -95.89
    valid_time  (time) datetime64[ns] 9kB ...
Data variables:
    t2m         (time, latitude, longitude) float32 2MB ...
    u100        (time, latitude, longitude) float32 2MB ...
    v100        (time, latitude, longitude) float32 2MB ...
Attributes:
    GRIB_edition:            1
    GRIB_centre:             ecmf
    GRIB_centreDescription:  European Centre for Medium-Range Weather Forecasts
    GRIB_subCentre:          0
    Conventions:             CF-1.7
    institution:             European Centre for Medium-Range Weather Forecasts
    history:        

  vars, attrs, coord_names = xr.conventions.decode_cf_variables(


<xarray.Dataset> Size: 4MB
Dimensions:     (time: 1131, latitude: 14, longitude: 19)
Coordinates:
    number      int64 8B ...
  * time        (time) datetime64[ns] 9kB 2025-03-01 ... 2025-04-17T02:00:00
    step        timedelta64[ns] 8B ...
    surface     float64 8B ...
  * latitude    (latitude) float64 112B 34.47 34.22 33.97 ... 31.72 31.47 31.22
  * longitude   (longitude) float64 152B -100.0 -99.79 -99.54 ... -95.79 -95.54
    valid_time  (time) datetime64[ns] 9kB ...
Data variables:
    t2m         (time, latitude, longitude) float32 1MB ...
    u100        (time, latitude, longitude) float32 1MB ...
    v100        (time, latitude, longitude) float32 1MB ...
Attributes:
    GRIB_edition:            1
    GRIB_centre:             ecmf
    GRIB_centreDescription:  European Centre for Medium-Range Weather Forecasts
    GRIB_subCentre:          0
    Conventions:             CF-1.7
    institution:             European Centre for Medium-Range Weather Forecasts
    history:        

  vars, attrs, coord_names = xr.conventions.decode_cf_variables(


<xarray.Dataset> Size: 968kB
Dimensions:     (time: 1131, latitude: 10, longitude: 7)
Coordinates:
    number      int64 8B ...
  * time        (time) datetime64[ns] 9kB 2025-03-01 ... 2025-04-17T02:00:00
    step        timedelta64[ns] 8B ...
    surface     float64 8B ...
  * latitude    (latitude) float64 80B 31.09 30.84 30.59 ... 29.34 29.09 28.84
  * longitude   (longitude) float64 56B -95.8 -95.55 -95.3 ... -94.55 -94.3
    valid_time  (time) datetime64[ns] 9kB ...
Data variables:
    t2m         (time, latitude, longitude) float32 317kB ...
    u100        (time, latitude, longitude) float32 317kB ...
    v100        (time, latitude, longitude) float32 317kB ...
Attributes:
    GRIB_edition:            1
    GRIB_centre:             ecmf
    GRIB_centreDescription:  European Centre for Medium-Range Weather Forecasts
    GRIB_subCentre:          0
    Conventions:             CF-1.7
    institution:             European Centre for Medium-Range Weather Forecasts
    history:      

  vars, attrs, coord_names = xr.conventions.decode_cf_variables(


<xarray.Dataset> Size: 6MB
Dimensions:     (time: 1131, latitude: 21, longitude: 21)
Coordinates:
    number      int64 8B ...
  * time        (time) datetime64[ns] 9kB 2025-03-01 ... 2025-04-17T02:00:00
    step        timedelta64[ns] 8B ...
    surface     float64 8B ...
  * latitude    (latitude) float64 168B 33.85 33.6 33.35 ... 29.35 29.1 28.85
  * longitude   (longitude) float64 168B -105.0 -104.8 -104.5 ... -100.3 -100.0
    valid_time  (time) datetime64[ns] 9kB ...
Data variables:
    t2m         (time, latitude, longitude) float32 2MB ...
    u100        (time, latitude, longitude) float32 2MB ...
    v100        (time, latitude, longitude) float32 2MB ...
Attributes:
    GRIB_edition:            1
    GRIB_centre:             ecmf
    GRIB_centreDescription:  European Centre for Medium-Range Weather Forecasts
    GRIB_subCentre:          0
    Conventions:             CF-1.7
    institution:             European Centre for Medium-Range Weather Forecasts
    history:          

In [4]:
# Save to CSV
clean_data_path = os.path.join('..', 'clean_data')
output_path = os.path.join(clean_data_path, 'era5_validation_wind_conditions_data.csv')
df_validation_merged.to_csv(output_path)

print('Sucessfully saved as a CSV!')

Sucessfully saved as a CSV!
