Pre-Processing Steps for Seasonal Forecast Data Downloaded from CDS

This code will create similar folders as in the ERA5 for the to be downloaded seasonal forecast files, mind it for the pressure level variables!

The dataset mistakenly downloaded in grib format but named as .nc, so this folder SEAS5_convert had just a change in the extension naming from .nc to .grib. The downloaded dataset was in the path: "/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5", i duplicated the folder to /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_convert/ in order to secure the original files downloaded.

The 20160101.nc files for both the surface and pressure level variables are downloaded using the web catalogue and not using web api, so those files are well formatted, since all other files are in grib, we need to make sure that all the files are in the same format

In [5]:
import xarray as xr

dsurface = xr.open_dataset("/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/test_process_SEAS5/SEAS5_surface_level_20160101.nc")
dpressure = xr.open_dataset("/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/test_process_SEAS5/SEAS5_pressure_level_20160101.nc")

dsurface



In [6]:
dpressure

To make sure rest of the datasets are aligned in this format, we need to first rewrite the extension of the files

In [26]:
import os

folder_paths = ["/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/preprocess_SEAS5/pressure_level", "/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/preprocess_SEAS5/surface_level"]

for paths in folder_paths:
    # Get a list of all files in the folder
    files = os.listdir(paths)
    
    # Iterate through each file
    for file in files:
        # Check if the item is a file (not a directory)
        if os.path.isfile(os.path.join(paths, file)):
            # Split the file name and extension
            name, extension = os.path.splitext(file)
            
            # Rename the file with the new extension
            new_name = name + ".grib"
            new_path = os.path.join(paths, new_name)
            
            # Rename the file
            os.rename(os.path.join(paths, file), new_path)

print("File extensions replaced with .grib")


File extensions replaced with .grib


Create the paths required

In [25]:
import os
import shutil
import subprocess

# specify the source and destination paths
src_path = f"/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/hydroModelDownscale/REANALYSIS/ERA5/"
dst_path = f"/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/preprocess_SEAS5/"

# use os.walk() to iterate through all directories and subdirectories in the source path
for root, dirs, files in os.walk(src_path):
    # create the corresponding directory structure in the destination path
    for directory in dirs:
        src_dir = os.path.join(root, directory)
        dst_dir = src_dir.replace(src_path, dst_path)
        os.makedirs(dst_dir, exist_ok=True)

In [1]:
import os
import xarray as xr
import re

directory_path = '/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/preprocess_SEAS5/pressure_level/'
#directory_path = '/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/test_process_SEAS5/pressure_level'

# Get the list of files in the directory
file_list = os.listdir(directory_path)
file_list = [file for file in file_list if file.endswith(".grib")]


naming = {"msl": "mean_sea_level_pressure",
          "t2m": "2m_temperature",
          "tp": "total_precipitation"}

# Output directories for each of the surface level variables
directory = {"msl": "/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/preprocess_SEAS5/mean_sea_level_pressure/",
          "t2m": "/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/preprocess_SEAS5/2m_temperature/",
          "tp": "/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/preprocess_SEAS5/total_precipitation/"}

naming_p = {"z": "geopotential",
          "t": "temperature",
          "q": "specific_humidity",
          "u": "u_component_of_wind",
          "v": "v_component_of_wind"}

# Output directories for each of the pressure level variables
directory_p = {"z": "/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/preprocess_SEAS5/geopotential/",
              "t": "/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/preprocess_SEAS5/temperature/",
              "q": "/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/preprocess_SEAS5/specific_humidity/",
              "u": "/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/preprocess_SEAS5/u_component_of_wind/",
              "v": "/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/preprocess_SEAS5/v_component_of_wind/"}

Run this for surface Level Variables

In [33]:

for file in file_list:
    print(directory_path+file)
    ds = xr.open_dataset(directory_path+file)
    ds = ds.drop_vars(["time","step", "surface"])
    ds = ds.rename({'valid_time': 'time'})
    ds = ds.set_index(step='time')
    ds = ds.rename({'step': 'time'})
    ds = ds.transpose('time', 'number','latitude', 'longitude')
    variable_names = list(ds.data_vars)
    for var in range(len(variable_names)):
        tmp = ds.data_vars[variable_names[var]].to_dataset()
        a = list(tmp.data_vars)
        string = naming[a[0]]
        split_result = re.split('[_.]', file)
        modified_list = [string if item == 'level' else item for item in split_result]
        modified_list.pop(1)  # Remove the second element
        modified_list.pop(-1)
        resulting_string = directory[a[0]] + '_'.join(modified_list) + '.nc'
        tmp.to_netcdf(resulting_string)

/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/preprocess_SEAS5/surface_level/SEAS5_surface_level_20201101.grib
/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/preprocess_SEAS5/surface_level/SEAS5_surface_level_20200601.grib
/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/preprocess_SEAS5/surface_level/SEAS5_surface_level_20160301.grib
/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/preprocess_SEAS5/surface_level/SEAS5_surface_level_20161001.grib
/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/preprocess_SEAS5/surface_level/SEAS5_surface_level_20200901.grib
/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/preprocess_SEAS5/surface_level/SEAS5_surface_level_20161101.grib
/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/preprocess_SEAS5/surface_level/SEAS5_surface_level_20200501.grib
/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/preprocess_SEAS5/surface_level/SEAS5_surface_level_20200701.grib
/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate

Run this for pressure Level Variables

In [None]:
for file in file_list:
    print(directory_path+file)
    ds = xr.open_dataset(directory_path+file)
    ds = ds.drop_vars(["time","step"])
    ds['isobaricInhPa'] = ds['isobaricInhPa'].astype(int)
    ds = ds.rename({"isobaricInhPa": "level"}) 
    ds = ds.rename({'valid_time': 'time'})
    ds = ds.set_index(step='time')
    ds = ds.rename({'step': 'time'})
    ds = ds.transpose('time', 'number', 'level','latitude', 'longitude')
    variable_names = list(ds.data_vars)
    for var in range(len(variable_names)):
        tmp = ds.data_vars[variable_names[var]].to_dataset()
        levels = tmp.level.values
        for L in levels:
            dss = tmp.sel(level=L)
            a = list(dss.data_vars)
            string = naming_p[a[0]]
            split_result = re.split('[_.]', file)
            modified_list = [string if item == 'level' else item for item in split_result]
            modified_list.pop(1)  # Remove the second element
            modified_list.pop(-1)
            resulting_string = directory_p[a[0]] + str(L) + '/'+'_'.join(modified_list) + '.nc'
            dss.to_netcdf(resulting_string)


/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/preprocess_SEAS5/pressure_level/SEAS5_pressure_level_20201101.grib
/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/preprocess_SEAS5/pressure_level/SEAS5_pressure_level_20200201.grib


This cell will create similar path and also regrid all the files and place them accordingly in their respective folders

In [3]:
import os
import shutil
import subprocess

# specify the source and destination paths
src_path = f"/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/preprocess_SEAS5/"
dst_path = f"/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/"

# use os.walk() to iterate through all directories and subdirectories in the source path
for root, dirs, files in os.walk(src_path):
    # create the corresponding directory structure in the destination path
    for directory in dirs:
        src_dir = os.path.join(root, directory)
        dst_dir = src_dir.replace(src_path, dst_path)
        os.makedirs(dst_dir, exist_ok=True)

    # process each file in the current directory
    for file in files:
        if file.endswith(".nc"):  # You can specify the file extension you want to process
            src_file = os.path.join(root, file)
            dst_file = src_file.replace(src_path, dst_path)
            
            # Modify the destination file name to include "_gridded" before the extension
            base_name, extension = os.path.splitext(dst_file)
            dst_file = f"{base_name}_gridded{extension}"
            
            # Run the cdo command to process the file and save the output in the destination directory
            cdo_command = f"cdo remapbil,/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/qa_spatial_domain/qa_interTwin_domain_grid_file {src_file} {dst_file}"
            try:
                subprocess.run(cdo_command, shell=True, check=True)
                print(f"Processed: {src_file}")
            except subprocess.CalledProcessError as e:
                print(f"Error processing {src_file}: {e}")

print("Processing complete.")


Segmentation fault (core dumped)


Error processing /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20200301.nc: Command 'cdo remapbil,/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/qa_spatial_domain/qa_interTwin_domain_grid_file /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20200301.nc /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/surface/total_precipitation/SEAS5_total_precipitation_20200301_gridded.nc' returned non-zero exit status 139.


Segmentation fault (core dumped)


Error processing /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20161101.nc: Command 'cdo remapbil,/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/qa_spatial_domain/qa_interTwin_domain_grid_file /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20161101.nc /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/surface/total_precipitation/SEAS5_total_precipitation_20161101_gridded.nc' returned non-zero exit status 139.


Segmentation fault (core dumped)


Error processing /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20160701.nc: Command 'cdo remapbil,/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/qa_spatial_domain/qa_interTwin_domain_grid_file /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20160701.nc /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/surface/total_precipitation/SEAS5_total_precipitation_20160701_gridded.nc' returned non-zero exit status 139.


Segmentation fault (core dumped)


Error processing /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20160401.nc: Command 'cdo remapbil,/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/qa_spatial_domain/qa_interTwin_domain_grid_file /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20160401.nc /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/surface/total_precipitation/SEAS5_total_precipitation_20160401_gridded.nc' returned non-zero exit status 139.


Segmentation fault (core dumped)


Error processing /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20160901.nc: Command 'cdo remapbil,/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/qa_spatial_domain/qa_interTwin_domain_grid_file /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20160901.nc /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/surface/total_precipitation/SEAS5_total_precipitation_20160901_gridded.nc' returned non-zero exit status 139.


Segmentation fault (core dumped)


Error processing /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20201101.nc: Command 'cdo remapbil,/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/qa_spatial_domain/qa_interTwin_domain_grid_file /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20201101.nc /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/surface/total_precipitation/SEAS5_total_precipitation_20201101_gridded.nc' returned non-zero exit status 139.


Segmentation fault (core dumped)


Error processing /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20200501.nc: Command 'cdo remapbil,/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/qa_spatial_domain/qa_interTwin_domain_grid_file /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20200501.nc /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/surface/total_precipitation/SEAS5_total_precipitation_20200501_gridded.nc' returned non-zero exit status 139.


Segmentation fault (core dumped)


Error processing /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20200701.nc: Command 'cdo remapbil,/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/qa_spatial_domain/qa_interTwin_domain_grid_file /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20200701.nc /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/surface/total_precipitation/SEAS5_total_precipitation_20200701_gridded.nc' returned non-zero exit status 139.


Segmentation fault (core dumped)


Error processing /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20200101.nc: Command 'cdo remapbil,/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/qa_spatial_domain/qa_interTwin_domain_grid_file /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20200101.nc /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/surface/total_precipitation/SEAS5_total_precipitation_20200101_gridded.nc' returned non-zero exit status 139.


Segmentation fault (core dumped)


Error processing /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20160201.nc: Command 'cdo remapbil,/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/qa_spatial_domain/qa_interTwin_domain_grid_file /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20160201.nc /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/surface/total_precipitation/SEAS5_total_precipitation_20160201_gridded.nc' returned non-zero exit status 139.


Segmentation fault (core dumped)


Error processing /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20200401.nc: Command 'cdo remapbil,/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/qa_spatial_domain/qa_interTwin_domain_grid_file /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20200401.nc /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/surface/total_precipitation/SEAS5_total_precipitation_20200401_gridded.nc' returned non-zero exit status 139.


Segmentation fault (core dumped)


Error processing /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20160101.nc: Command 'cdo remapbil,/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/qa_spatial_domain/qa_interTwin_domain_grid_file /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20160101.nc /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/surface/total_precipitation/SEAS5_total_precipitation_20160101_gridded.nc' returned non-zero exit status 139.


Segmentation fault (core dumped)


Error processing /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20161201.nc: Command 'cdo remapbil,/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/qa_spatial_domain/qa_interTwin_domain_grid_file /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20161201.nc /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/surface/total_precipitation/SEAS5_total_precipitation_20161201_gridded.nc' returned non-zero exit status 139.


Segmentation fault (core dumped)


Error processing /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20161001.nc: Command 'cdo remapbil,/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/qa_spatial_domain/qa_interTwin_domain_grid_file /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20161001.nc /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/surface/total_precipitation/SEAS5_total_precipitation_20161001_gridded.nc' returned non-zero exit status 139.


Segmentation fault (core dumped)


Error processing /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20201001.nc: Command 'cdo remapbil,/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/qa_spatial_domain/qa_interTwin_domain_grid_file /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20201001.nc /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/surface/total_precipitation/SEAS5_total_precipitation_20201001_gridded.nc' returned non-zero exit status 139.


Segmentation fault (core dumped)


Error processing /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20200901.nc: Command 'cdo remapbil,/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/qa_spatial_domain/qa_interTwin_domain_grid_file /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20200901.nc /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/surface/total_precipitation/SEAS5_total_precipitation_20200901_gridded.nc' returned non-zero exit status 139.


Segmentation fault (core dumped)


Error processing /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20200601.nc: Command 'cdo remapbil,/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/qa_spatial_domain/qa_interTwin_domain_grid_file /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20200601.nc /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/surface/total_precipitation/SEAS5_total_precipitation_20200601_gridded.nc' returned non-zero exit status 139.


Segmentation fault (core dumped)


Error processing /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20160801.nc: Command 'cdo remapbil,/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/qa_spatial_domain/qa_interTwin_domain_grid_file /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20160801.nc /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/surface/total_precipitation/SEAS5_total_precipitation_20160801_gridded.nc' returned non-zero exit status 139.


Segmentation fault (core dumped)


Error processing /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20160601.nc: Command 'cdo remapbil,/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/qa_spatial_domain/qa_interTwin_domain_grid_file /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20160601.nc /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/surface/total_precipitation/SEAS5_total_precipitation_20160601_gridded.nc' returned non-zero exit status 139.


Segmentation fault (core dumped)


Error processing /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20201201.nc: Command 'cdo remapbil,/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/qa_spatial_domain/qa_interTwin_domain_grid_file /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20201201.nc /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/surface/total_precipitation/SEAS5_total_precipitation_20201201_gridded.nc' returned non-zero exit status 139.


Segmentation fault (core dumped)


Error processing /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20160501.nc: Command 'cdo remapbil,/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/qa_spatial_domain/qa_interTwin_domain_grid_file /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20160501.nc /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/surface/total_precipitation/SEAS5_total_precipitation_20160501_gridded.nc' returned non-zero exit status 139.


Segmentation fault (core dumped)


Error processing /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20200801.nc: Command 'cdo remapbil,/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/qa_spatial_domain/qa_interTwin_domain_grid_file /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20200801.nc /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/surface/total_precipitation/SEAS5_total_precipitation_20200801_gridded.nc' returned non-zero exit status 139.


Segmentation fault (core dumped)


Error processing /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20160301.nc: Command 'cdo remapbil,/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/qa_spatial_domain/qa_interTwin_domain_grid_file /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20160301.nc /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/surface/total_precipitation/SEAS5_total_precipitation_20160301_gridded.nc' returned non-zero exit status 139.


Segmentation fault (core dumped)


Error processing /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20200201.nc: Command 'cdo remapbil,/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/qa_spatial_domain/qa_interTwin_domain_grid_file /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/total_precipitation/SEAS5_total_precipitation_20200201.nc /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/surface/total_precipitation/SEAS5_total_precipitation_20200201_gridded.nc' returned non-zero exit status 139.


Segmentation fault (core dumped)


Error processing /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/mean_sea_level_pressure/SEAS5_mean_sea_level_pressure_20200201.nc: Command 'cdo remapbil,/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/qa_spatial_domain/qa_interTwin_domain_grid_file /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/mean_sea_level_pressure/SEAS5_mean_sea_level_pressure_20200201.nc /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/surface/mean_sea_level_pressure/SEAS5_mean_sea_level_pressure_20200201_gridded.nc' returned non-zero exit status 139.


Segmentation fault (core dumped)


Error processing /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/mean_sea_level_pressure/SEAS5_mean_sea_level_pressure_20161101.nc: Command 'cdo remapbil,/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/qa_spatial_domain/qa_interTwin_domain_grid_file /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/mean_sea_level_pressure/SEAS5_mean_sea_level_pressure_20161101.nc /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/surface/mean_sea_level_pressure/SEAS5_mean_sea_level_pressure_20161101_gridded.nc' returned non-zero exit status 139.


Segmentation fault (core dumped)


Error processing /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/mean_sea_level_pressure/SEAS5_mean_sea_level_pressure_20160601.nc: Command 'cdo remapbil,/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/qa_spatial_domain/qa_interTwin_domain_grid_file /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/mean_sea_level_pressure/SEAS5_mean_sea_level_pressure_20160601.nc /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/surface/mean_sea_level_pressure/SEAS5_mean_sea_level_pressure_20160601_gridded.nc' returned non-zero exit status 139.


Segmentation fault (core dumped)


Error processing /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/mean_sea_level_pressure/SEAS5_mean_sea_level_pressure_20200101.nc: Command 'cdo remapbil,/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/qa_spatial_domain/qa_interTwin_domain_grid_file /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/mean_sea_level_pressure/SEAS5_mean_sea_level_pressure_20200101.nc /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/surface/mean_sea_level_pressure/SEAS5_mean_sea_level_pressure_20200101_gridded.nc' returned non-zero exit status 139.


Segmentation fault (core dumped)


Error processing /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/mean_sea_level_pressure/SEAS5_mean_sea_level_pressure_20200401.nc: Command 'cdo remapbil,/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/qa_spatial_domain/qa_interTwin_domain_grid_file /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS/surface/mean_sea_level_pressure/SEAS5_mean_sea_level_pressure_20200401.nc /mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/surface/mean_sea_level_pressure/SEAS5_mean_sea_level_pressure_20200401_gridded.nc' returned non-zero exit status 139.


KeyboardInterrupt: 

For Pressure Level Variables

In [None]:
import xarray as xr
import os

# Output directories for each of the pressure level variables
directory = {"z": "/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/geopotential/",
              "t": "/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/temperature/",
              "q": "/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/specific_humidity/",
              "u": "/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/u_component_of_wind/",
              "v": "/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/v_component_of_wind/"}

# Output directories for each of the pressure level variables
o_directory = {"z": "/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_daily/geopotential/",
              "t": "/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_daily/temperature/",
              "q": "/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_daily/specific_humidity/",
              "u": "/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_daily/u_component_of_wind/",
              "v": "/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_daily/v_component_of_wind/"}

levels = ["500/", "850/"]
for subd in directory.values():
    if(os.listdir(subd+levels[0])==os.listdir(subd+levels[1])):
        file_list = os.listdir(subd+levels[0])
        for file in file_list:
            ds1 = xr.open_dataset(f"{subd+levels[0]}{file}")
            ds2 = xr.open_dataset(f"{subd+levels[1]}{file}")
            #Joining by a new dimension 'level'
            ds = xr.concat([ds1, ds2], dim='level')
            ds['level'] = xr.DataArray([500, 850], dims='level')
            ds['level'] = ds['level'].astype('int32')
            ds['number'] = ds['number'].astype(int)
            ds = ds.rename({'lon': 'x'})
            ds = ds.rename({'lat': 'y'})
            #Streamlining the same order of the index that the other preprocesors has 
            desired_order = ['time', 'level', 'y', 'x', 'number']
            ds = ds.transpose(*desired_order)
            ds = ds.resample(time='D').mean(dim='time')
            var = list(ds.data_vars)
            # Splitting the file name and extension
            base_name, extension = os.path.splitext(file)
            new_base_name = f"{base_name}_daily"
            output_path = f"{new_base_name}{extension}"
            ds.to_netcdf(f"{o_directory[var[0]]}{output_path}")
            print(f"{o_directory[var[0]]}{output_path}")
    else:
        print("The names of the files are not similar or probably there's a mismatch in file names")

print("Pressure Task accomplished")
    


For Single Level Variables

In [None]:
import xarray as xr
import os

directory_path = "/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/"

# Output directories for each of the surface level variables
directory = {"msl": "/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/mean_sea_level_pressure/",
          "t2m": "/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/2m_temperature/"}

o_directory = {"msl": "/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_daily/mean_sea_level_pressure/",
          "t2m": "/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_daily/2m_temperature/"}

for subd in directory.values():
    file_list = os.listdir(subd)
    for file in file_list:
        ds = xr.open_dataset(f"{subd}{file}")
        ds['number'] = ds['number'].astype(int)
        ds = ds.rename({'lon': 'x'})
        ds = ds.rename({'lat': 'y'})
        #Streamlining the same order of the index that the other preprocesors has 
        desired_order = ['time', 'y', 'x', 'number']
        ds = ds.transpose(*desired_order)
        #For single level variables, we have to deal with 2m_temperature and total_precipitation, so use the resampler accordingly
        ds = ds.resample(time='D').mean(dim='time')
        #ds = ds.resample(time='D').sum(dim='time')*1000
        var = list(ds.data_vars)
        # Splitting the file name and extension
        base_name, extension = os.path.splitext(file)
        new_base_name = f"{base_name}_daily"
        output_path = f"{new_base_name}{extension}"
        
        ds.to_netcdf(f"{o_directory[var[0]]}{output_path}")
        print(f"{o_directory[var[0]]}{output_path}")

print("Task accomplished")

In [None]:
import xarray as xr
import os

directory_path = "/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/"

# Output directories for each of the surface level variables
directory = {"tp": "/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_processed/total_precipitation/"}

o_directory = {"tp": "/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/SEAS5_daily/total_precipitation/"}

for subd in directory.values():
    file_list = os.listdir(subd)
    for file in file_list:
        ds = xr.open_dataset(f"{subd}{file}")
        ds['number'] = ds['number'].astype(int)
        ds = ds.rename({'lon': 'x'})
        ds = ds.rename({'lat': 'y'})
        #Streamlining the same order of the index that the other preprocesors has 
        desired_order = ['time', 'y', 'x', 'number']
        ds = ds.transpose(*desired_order)
        #For single level variables, we have to deal with 2m_temperature and total_precipitation, so use the resampler accordingly
        #ds = ds.resample(time='D').mean(dim='time')
        ds = ds.resample(time='D').sum(dim='time')
        var = list(ds.data_vars)
        # Splitting the file name and extension
        base_name, extension = os.path.splitext(file)
        new_base_name = f"{base_name}_daily"
        output_path = f"{new_base_name}{extension}"
        
        ds.to_netcdf(f"{o_directory[var[0]]}{output_path}")

Directory Creator

In [7]:
import os
import shutil
import subprocess

# specify the source and destination paths
src_path = "/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/hydroModelDownscale/extended_SEAS5/"
dst_path = "/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/hydroModelDownscale/hindcast_SEAS5/"

# use os.walk() to iterate through all directories and subdirectories in the source path
for root, dirs, files in os.walk(src_path):
    # create the corresponding directory structure in the destination path
    for directory in dirs:
        src_dir = os.path.join(root, directory)
        dst_dir = src_dir.replace(src_path, dst_path)
        os.makedirs(dst_dir, exist_ok=True)

One Month Lead-Time SEAS5 Predictor Fields

In [6]:
import xarray as xr
import os
import fnmatch

directory_path = "/mnt/CEPH_PROJECTS/InterTwin/02_Original_Climate_Data/daily_SEAS5/"
output_path = "/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/hydroModelDownscale/extended_SEAS5/"

folder_names = os.listdir(directory_path)

in_paths_folder_names = [os.path.join(directory_path, folder) for folder in folder_names]
out_paths_folder_names = [os.path.join(output_path, folder) for folder in folder_names]
out_paths_folder_names = sorted(out_paths_folder_names)
in_paths_folder_names = sorted(in_paths_folder_names)
folder_names = sorted(folder_names)

years = [2017, 2018, 2019, 2020]

file_list = []

for in_path, out_path, var in zip(in_paths_folder_names, out_paths_folder_names, folder_names):
    print(in_path, out_path, var)
    for year in years:
        file_list = []
        pattern = f'SEAS5_{var}_{year}*'
        for root, dirs, files in os.walk(in_path):
            for filename in fnmatch.filter(files, pattern):
                file_path = os.path.join(root, filename)
                file_list.append(file_path)
        
        datasets = []
        
        file_list = sorted(file_list)
        i = 1
        for file in file_list:
            ds = xr.open_dataset(file)
            i+=1
            if i == 13:
                i = 1
                month_data = ds.sel(time=ds['time.month'] == i)
            else:
                month_data = ds.sel(time=ds['time.month'] == i)
            # Append the extracted dataset to the list
            datasets.append(month_data)
            month_data = None
            ds = None
            # Concatenate the datasets along the time dimension
        result = xr.concat(datasets, dim='time')
        
        result.to_netcdf(f"{out_path}/SEAS5_{var}_{year}.nc")
        result = None
        print(f"{out_path}/SEAS5_{var}_{year}.nc")
        
print("Task Completed") 

Based on the availability of the dataset and years, the hindcast and forecast period are seperated and aggregated and saved in the downscaling pipeline in one-month lead-time.

Run through all the files and do a task, an example script

In [None]:
import xarray as xr
import os
import fnmatch

directory_path = "/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/hydroModelDownscale/REANALYSIS/ERA5/"
output_path = "/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/hydroModelDownscale/REANALYSIS/ERA5_renamed/"

folder_names = os.listdir(directory_path)

in_paths_folder_names = [os.path.join(directory_path, folder) for folder in folder_names]
out_paths_folder_names = [os.path.join(output_path, folder) for folder in folder_names]
out_paths_folder_names = sorted(out_paths_folder_names)
in_paths_folder_names = sorted(in_paths_folder_names)
folder_names = sorted(folder_names)

in_paths_folder_names.remove("/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/hydroModelDownscale/REANALYSIS/ERA5/2m_dewpoint_temperature")
out_paths_folder_names.remove("/mnt/CEPH_PROJECTS/InterTwin/Climate_Downscaling/hydroModelDownscale/REANALYSIS/ERA5_renamed/2m_dewpoint_temperature")
folder_names.remove("2m_dewpoint_temperature")

for in_path, out_path, var in zip(in_paths_folder_names, out_paths_folder_names, folder_names):
    print(in_path, out_path, var)
    
    file_list = os.listdir(in_path)
    for file in file_list:
        ds = xr.open_dataset(f"{in_path}/{file}")
        ds = ds.rename({'lon': 'x','lat': 'y'})
        ds.to_netcdf(f"{out_path}/{file}")
        
    
    
    

print("Task Completed") 