Pre-Processing Steps for Seasonal Forecast Data Downloaded from CDS

This code will create similar folders as in the ERA5 for the to be downloaded seasonal forecast files, mind it for the pressure level variables!

In [None]:
import os
import shutil

# specify the source and destination paths
src_path = "/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/larger_alps/SampleSF/SEAS5"
dst_path = "/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/larger_alps/SampleSF/dailySEAS5"

# use os.walk() to iterate through all directories and subdirectories in the source path
for root, dirs, files in os.walk(src_path):

    # create the corresponding directory structure in the destination path
    for directory in dirs:
        src_dir = os.path.join(root, directory)
        dst_dir = src_dir.replace(src_path, dst_path)
        os.makedirs(dst_dir, exist_ok=True)



This cell will create similar path and also regrid all the files and place them accordingly in their respective folders

In [None]:
import os
import shutil
import subprocess

# specify the source and destination paths
src_path = "/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/test_sf/Downloads"
dst_path = "/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/test_sf/Processed"

# use os.walk() to iterate through all directories and subdirectories in the source path
for root, dirs, files in os.walk(src_path):
    # create the corresponding directory structure in the destination path
    for directory in dirs:
        src_dir = os.path.join(root, directory)
        dst_dir = src_dir.replace(src_path, dst_path)
        os.makedirs(dst_dir, exist_ok=True)

    # process each file in the current directory
    for file in files:
        if file.endswith(".nc"):  # You can specify the file extension you want to process
            src_file = os.path.join(root, file)
            dst_file = src_file.replace(src_path, dst_path)
            
            # Modify the destination file name to include "_gridded" before the extension
            base_name, extension = os.path.splitext(dst_file)
            dst_file = f"{base_name}_gridded{extension}"
            
            # Run the cdo command to process the file and save the output in the destination directory
            cdo_command = f"cdo remapbil,/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/larger_alps/DEM/interTwin_dem.nc {src_file} {dst_file}"
            try:
                subprocess.run(cdo_command, shell=True, check=True)
                print(f"Processed: {src_file}")
            except subprocess.CalledProcessError as e:
                print(f"Error processing {src_file}: {e}")

print("Processing complete.")


For Pressure Level Variables

In [None]:
import xarray as xr

ds1 = xr.open_dataset("/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/larger_alps/SampleSF/SEAS5/v_component_of_wind/500/date_2016-01-01_gridded.nc")
ds2 = xr.open_dataset("/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/larger_alps/SampleSF/SEAS5/v_component_of_wind/850/date_2016-01-01_gridded.nc")

#Joining by a new dimension 'level'
combined_ds = xr.concat([ds1, ds2], dim='level')

#Providing name for the joined dataset in the dimension 'level'
combined_ds['level'] = xr.DataArray([500, 850], dims='level')

#Converting the datatype of level so as to match the ERA5
combined_ds['level'] = combined_ds['level'].astype('int32')

#Streamlining the same order of the index that the other preprocesors has 
desired_order = ['time', 'level', 'y', 'x', 'number']
ds = combined_ds.transpose(*desired_order)

#For all the pressure level variables, we use only mean
ds = ds.resample(time='D').mean(dim='time')

ds.to_netcdf("/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/larger_alps/SampleSF/dailySEAS5/v_component_of_wind/SEAS5_coarse_v_component_of_wind_2016_01.nc")

For Single Level Variables

In [None]:
import xarray as xr

ds = xr.open_dataset("/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/larger_alps/SampleSF/SEAS5/2m_temperature/date_2016-01-01_gridded.nc")

#Streamlining the same order of the index that the other preprocesors has 
desired_order = ['time', 'y', 'x', 'number']
ds = ds.transpose(*desired_order)

#For single level variables, we have to deal with 2m_temperature and total_precipitation, so use the resampler accordingly
ds = ds.resample(time='D').mean(dim='time')
#ds = ds.resample(time='D').sum(dim='time')*1000

ds.to_netcdf("/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/larger_alps/SampleSF/dailySEAS5/v_component_of_wind/SEAS5_coarse_v_component_of_wind_2016_01.nc")