In [1]:
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime

# Import climdata BCSD modules
from climdata.sdba import BCSD, BiasCorrection, StatisticalDownscaling, regrid_to_coarse

print("âœ“ All imports successful!")

âœ“ All imports successful!


In [2]:
# Time periods
hist_start = '2004-01-01'
hist_end = '2014-12-31'
fut_start = '2015-01-01'
fut_end = '2050-12-31'

print(f"Historical period: {hist_start} to {hist_end}")
print(f"Future period: {fut_start} to {fut_end}")

Historical period: 2004-01-01 to 2014-12-31
Future period: 2015-01-01 to 2050-12-31


### Load Fine-Resolution Reference Data (MSWX)

MSWX provides global weather data at 0.1Â° resolution, blending multiple data sources.

In [3]:
from climdata import ClimData

options = [
    "dataset=mswx",  # Select the MSWX dataset for extraction
    "region=europe", # Select the region
    "variables=[tas]",
    f"time_range.start_date={hist_start}",  # Start date of extraction
    f"time_range.end_date={hist_end}",    # End date of extraction
    "data_dir=/beegfs/muduchuru/data",    # Local directory to store downloaded/intermediate files
]
mswx = ClimData(overrides=options)

obs_fine_real = mswx.extract()

print(f"\nâœ“ MSWX data loaded!")
print(f"  Dimensions: {obs_fine_real.dims}")
print(f"  Variables: {list(obs_fine_real.data_vars)}")
print(f"  Resolution: ~0.1Â° (~10 km)")
print(f"  Time range: {obs_fine_real.time.values[0]} to {obs_fine_real.time.values[-1]}")

âœ… All 4018 tas files already exist locally.

âœ“ MSWX data loaded!
  Variables: ['tas']
  Resolution: ~0.1Â° (~10 km)
  Time range: 2004-01-01T00:00:00.000000000 to 2014-12-31T00:00:00.000000000


### Load Coarse GCM Data (CMIP6)

Load historical and future CMIP6 data for the same region.

In [4]:
# Choose a CMIP6 model (using MPI-ESM1-2-HR as example)
cmip_model = 'MPI-ESM1-2-HR'

options = [
    "dataset=cmip",  # Select the MSWX dataset for extraction
    "region=europe", # Select the region
    "variables=[tas]",
    f"time_range.start_date={hist_start}",  # Start date of extraction
    f"time_range.end_date={hist_end}",    # End date of extraction
    "data_dir=/beegfs/muduchuru/data",    # Local directory to store downloaded/intermediate files
    f"source_id={cmip_model}",
    f"experiment_id=historical",
]
cmip_hist = ClimData(overrides=options)

sim_hist_coarse_real = cmip_hist.extract()

print(f"\nâœ“ Historical CMIP6 data loaded!")
print(f"  Model: {cmip_model}")
print(f"  Dimensions: {sim_hist_coarse_real.dims}")
print(f"  Resolution: ~1-2Â° (model-dependent)")
print(f"  Time range: {sim_hist_coarse_real.time.values[0]} to {sim_hist_coarse_real.time.values[-1]}")


âœ“ Historical CMIP6 data loaded!
  Model: MPI-ESM1-2-HR
  Resolution: ~1-2Â° (model-dependent)
  Time range: 2004-01-01T00:00:00.000000000 to 2014-12-31T00:00:00.000000000


In [5]:
# Choose a CMIP6 model (using MPI-ESM1-2-HR as example)
cmip_model = 'MPI-ESM1-2-HR'

options = [
    "dataset=cmip",  # Select the MSWX dataset for extraction
    "region=europe", # Select the region
    "variables=[tas]",
    f"time_range.start_date={fut_start}",  # Start date of extraction
    f"time_range.end_date={fut_end}",    # End date of extraction
    "data_dir=/beegfs/muduchuru/data",    # Local directory to store downloaded/intermediate files
    f"source_id={cmip_model}",
    f"experiment_id=ssp585",
]
cmip_hist = ClimData(overrides=options)

sim_fut_coarse_real = cmip_hist.extract()

print(f"\nâœ“ Future CMIP6 data loaded!")
print(f"  Scenario: SSP5-8.5")
print(f"  Dimensions: {sim_fut_coarse_real.dims}")
print(f"  Time range: {sim_fut_coarse_real.time.values[0]} to {sim_fut_coarse_real.time.values[-1]}")



âœ“ Future CMIP6 data loaded!
  Scenario: SSP5-8.5
  Time range: 2015-01-01T00:00:00.000000000 to 2050-12-31T00:00:00.000000000


### Prepare Data for BCSD

Rename MSWX variable to match CMIP naming convention and ensure consistent coordinates.

In [6]:

# Check data summary
print("Data Summary:")
print(f"\nObservations (MSWX):")
print(f"  Variable: tas (temperature)")
print(f"  Shape: {obs_fine_real['tas'].shape}")
print(f"  Resolution: ~{(obs_fine_real.lat.values[1] - obs_fine_real.lat.values[0]):.2f}Â°")

print(f"\nHistorical GCM (CMIP6):")
print(f"  Variable: tas")
print(f"  Shape: {sim_hist_coarse_real['tas'].shape}")
print(f"  Resolution: ~{(sim_hist_coarse_real.lat.values[1] - sim_hist_coarse_real.lat.values[0]):.2f}Â°")

print(f"\nFuture GCM (CMIP6):")
print(f"  Variable: tas")
print(f"  Shape: {sim_fut_coarse_real['tas'].shape}")

# Check if grids are compatible for downscaling
lat_ratio = len(obs_fine_real.lat) / len(sim_hist_coarse_real.lat)
lon_ratio = len(obs_fine_real.lon) / len(sim_hist_coarse_real.lon)

print(f"\nðŸ“Š Downscaling factors:")
print(f"  Latitude: {lat_ratio:.2f}x")
print(f"  Longitude: {lon_ratio:.2f}x")

if lat_ratio == int(lat_ratio) and lon_ratio == int(lon_ratio):
    print("  âœ“ Grids are compatible for ISIMIP3BASD downscaling!")
else:
    print("  âš  Warning: Grids are not exact integer multiples.")
    print("    ISIMIP3BASD downscaling requires integer downscaling factors.")
    print("    Consider regridding to compatible resolutions first.")

Data Summary:

Observations (MSWX):
  Variable: tas (temperature)
  Shape: (4018, 370, 450)
  Resolution: ~0.10Â°

Historical GCM (CMIP6):
  Variable: tas
  Shape: (1, 4018, 40, 49)
  Resolution: ~0.94Â°

Future GCM (CMIP6):
  Variable: tas
  Shape: (1, 13149, 40, 49)

ðŸ“Š Downscaling factors:
  Latitude: 9.25x
  Longitude: 9.18x
    ISIMIP3BASD downscaling requires integer downscaling factors.
    Consider regridding to compatible resolutions first.


In [7]:
# Initialize BCSD for temperature
bcsd = BCSD(
    variable='tas',
    regridding_tool='xesmf',  # or 'cdo' if xESMF not available
    regridding_method='conservative',  # area-weighted conservative regridding
    bias_correction_kwargs={
        'n_processes': 4  # Use parallel processing
    },
    downscaling_kwargs={
        'n_iterations': 20,  # MBCn iterations
        'n_processes': 4
    }
)

print("BCSD pipeline configured!")

ðŸ”§ BiasCorrection initialized for tas
   Distribution: normal
   Trend preservation: additive
   Detrend: True
ðŸ”§ StatisticalDownscaling initialized for tas
   Iterations: 20

BCSD Pipeline initialized for tas
Regridding: xesmf (conservative)
BCSD pipeline configured!


In [8]:
# Reload the module to pick up code changes
import importlib
import climdata.sdba.bcsd
importlib.reload(climdata.sdba.bcsd)
from climdata.sdba import BCSD, BiasCorrection, StatisticalDownscaling, regrid_to_coarse

# Reinitialize BCSD
bcsd = BCSD(
    variable='tas',
    regridding_tool='xesmf',
    regridding_method='conservative',
    bias_correction_kwargs={'n_processes': 4},
    downscaling_kwargs={'n_iterations': 20, 'n_processes': 4}
)

print("âœ“ Module reloaded and BCSD reconfigured!")

ðŸ”§ BiasCorrection initialized for tas
   Distribution: normal
   Trend preservation: additive
   Detrend: True
ðŸ”§ StatisticalDownscaling initialized for tas
   Iterations: 20

BCSD Pipeline initialized for tas
Regridding: xesmf (conservative)
âœ“ Module reloaded and BCSD reconfigured!


### Run the Complete Workflow

The workflow automatically:
1. Regrids fine observations to coarse GCM grid
2. Performs bias correction at coarse resolution
3. Downscales corrected data to fine resolution

In [None]:
# Test BCSD workflow step by step

# Step 1: Regrid fine observations to coarse GCM grid
print("Step 1: Regridding observations to coarse resolution...")
obs_hist_coarse = regrid_to_coarse(
    obs_fine_real,
    sim_hist_coarse_real,
    method='conservative',
    regridding_tool='xesmf'
)
print(f"  âœ“ Coarse observations shape: {obs_hist_coarse['tas'].shape}")

# Ensure all datasets have exactly the same spatial grid
print("\nStep 1b: Aligning spatial grids...")

# Remove extra dimensions from CMIP data (source_id dimension)
if 'source_id' in sim_hist_coarse_real.dims:
    sim_hist_coarse_real = sim_hist_coarse_real.squeeze('source_id', drop=True)
    print("  âœ“ Removed source_id dimension from historical data")
if 'source_id' in sim_fut_coarse_real.dims:
    sim_fut_coarse_real = sim_fut_coarse_real.squeeze('source_id', drop=True)
    print("  âœ“ Removed source_id dimension from future data")

# Use sim_hist as the reference grid - ensure obs and sim_fut match it exactly
obs_hist_coarse = obs_hist_coarse.sel(
    lat=sim_hist_coarse_real.lat,
    lon=sim_hist_coarse_real.lon,
    method='nearest'
)
# Explicitly assign coordinates to ensure perfect match
obs_hist_coarse = obs_hist_coarse.assign_coords({
    'lat': sim_hist_coarse_real.lat,
    'lon': sim_hist_coarse_real.lon
})

sim_fut_coarse_aligned = sim_fut_coarse_real.sel(
    lat=sim_hist_coarse_real.lat,
    lon=sim_hist_coarse_real.lon,
    method='nearest'
)
# Explicitly assign coordinates
sim_fut_coarse_aligned = sim_fut_coarse_aligned.assign_coords({
    'lat': sim_hist_coarse_real.lat,
    'lon': sim_hist_coarse_real.lon
})

print(f"  âœ“ Aligned shapes - obs: {obs_hist_coarse['tas'].shape}, hist: {sim_hist_coarse_real['tas'].shape}, fut: {sim_fut_coarse_aligned['tas'].shape}")
print(f"  âœ“ Coordinate match - lat: {(obs_hist_coarse.lat == sim_hist_coarse_real.lat).all().values}, lon: {(obs_hist_coarse.lon == sim_hist_coarse_real.lon).all().values}")

# Step 2: Bias correction at coarse resolution
print("\nStep 2: Bias correction...")
bc = BiasCorrection(
    variable='tas'
)
sim_fut_ba = bc.correct(
    obs_hist=obs_hist_coarse,
    sim_hist=sim_hist_coarse_real,
    sim_fut=sim_fut_coarse_aligned
)
print(f"  âœ“ Bias-corrected shape: {sim_fut_ba['tas'].shape}")


Step 1: Regridding observations to coarse resolution...
ðŸ”„ Regridding from fine to coarse resolution using xesmf...
   Creating conservative regridder...
   Regridding variable: tas
   âœ… Regridding complete!
  âœ“ Coarse observations shape: (4018, 40, 49)

Step 1b: Aligning spatial grids...
  âœ“ Removed source_id dimension from historical data
  âœ“ Removed source_id dimension from future data
  âœ“ Aligned shapes - obs: (4018, 40, 49), hist: (4018, 40, 49), fut: (13149, 40, 49)
  âœ“ Coordinate match - lat: True, lon: True

Step 2: Bias correction...
ðŸ”§ BiasCorrection initialized for tas
   Distribution: normal
   Trend preservation: additive
   Detrend: True

ðŸ”„ Starting bias correction for tas...
   Obs hist period: 2004-01-01T00:00:00.000000000 to 2014-12-31T00:00:00.000000000
   Sim hist period: 2004-01-01T00:00:00.000000000 to 2014-12-31T00:00:00.000000000
   Sim fut period: 2015-01-01T00:00:00.000000000 to 2050-12-31T00:00:00.000000000
   Converting xarray datasets to i

In [None]:

# Step 3: Statistical downscaling
print("\nStep 3: Statistical downscaling...")

# Check if grids are compatible for downscaling
lat_factor = len(obs_fine_real.lat) / len(sim_fut_ba.lat)
lon_factor = len(obs_fine_real.lon) / len(sim_fut_ba.lon)

print(f"  Current downscaling factors: lat={lat_factor:.2f}x, lon={lon_factor:.2f}x")

if lat_factor != int(lat_factor) or lon_factor != int(lon_factor):
    print("  âš  Grids not compatible - creating compatible fine grid...")
    
    # Create a compatible fine grid (integer multiples)
    target_lat_factor = round(lat_factor)
    target_lon_factor = round(lon_factor)
    
    # Create target coordinates
    coarse_lat = sim_fut_ba.lat.values
    coarse_lon = sim_fut_ba.lon.values
    
    # Generate fine grid with exact integer spacing
    fine_lat = np.linspace(coarse_lat[0], coarse_lat[-1], len(coarse_lat) * target_lat_factor)
    fine_lon = np.linspace(coarse_lon[0], coarse_lon[-1], len(coarse_lon) * target_lon_factor)
    
    # Create a dummy dataset with the target fine grid
    target_fine_grid = xr.Dataset({
        'dummy': (('lat', 'lon'), np.zeros((len(fine_lat), len(fine_lon))))
    }, coords={'lat': fine_lat, 'lon': fine_lon})
    
    # Regrid obs_fine to the compatible grid
    print(f"  Regridding to {len(fine_lat)}Ã—{len(fine_lon)} ({target_lat_factor}Ã—{target_lon_factor} factors)...")
    
    import xesmf as xe
    regridder = xe.Regridder(obs_fine_real, target_fine_grid, 'bilinear')
    obs_fine_compatible = regridder(obs_fine_real)
    # regridder.clean_weight_file()
    
    # Ensure coordinates have proper metadata for iris
    obs_fine_compatible['lat'].attrs.update({
        'standard_name': 'latitude',
        'long_name': 'latitude',
        'units': 'degrees_north',
        'axis': 'Y'
    })
    obs_fine_compatible['lon'].attrs.update({
        'standard_name': 'longitude',
        'long_name': 'longitude',
        'units': 'degrees_east',
        'axis': 'X'
    })
    obs_fine_compatible['time'].attrs.update({
        'standard_name': 'time',
        'long_name': 'time',
        'axis': 'T'
    })
    
    print(f"  âœ“ Compatible fine grid created: {obs_fine_compatible['tas'].shape}")
else:
    obs_fine_compatible = obs_fine_real
    print("  âœ“ Grids already compatible!")

sd = StatisticalDownscaling(
    variable='tas',
    n_iterations=20
)
result = sd.downscale(
    obs_fine=obs_fine_compatible,
    sim_coarse=sim_fut_ba
)
print(f"  âœ“ Downscaled shape: {result['tas'].shape}")

print("\nâœ… BCSD workflow completed successfully!")
print(f"Output dimensions: {result.dims}")
print(f"Output resolution: {(result.lat.values[1] - result.lat.values[0]):.3f}Â°")
    



Step 3: Statistical downscaling...
  Current downscaling factors: lat=9.25x, lon=9.18x
  âš  Grids not compatible - creating compatible fine grid...
  Regridding to 360Ã—441 (9Ã—9 factors)...
  âœ“ Compatible fine grid created: (4018, 360, 441)
ðŸ”§ StatisticalDownscaling initialized for tas
   Iterations: 20

ðŸ”„ Starting statistical downscaling for tas...
   Converting xarray datasets to iris cubes...
   Creating bilinearly interpolated intermediate data...
   Created npy_stack directory: /beegfs/muduchuru/pkgs_fnl/climdata/docs/examples/sdba/tmp_bcsd_downscale/sim_fine.nc.npy_stack/
   Running ISIMIP3BASD statistical downscaling...
   (This may take a while for large datasets)
downscaling at coarse location ...
(0, 0)
(0, 1)
(0, 2)
(0, 3)
(0, 4)
(0, 5)
(0, 6)
(0, 7)
(0, 8)
(0, 9)
(0, 10)
(0, 11)
(0, 12)
(0, 13)
(0, 14)
(0, 15)
(0, 16)
(0, 17)
(0, 18)
(0, 19)
(0, 20)
(0, 21)
(0, 22)
(0, 23)
(0, 24)
(0, 25)
(0, 26)
(0, 27)
(0, 28)
(0, 29)
(0, 30)
(0, 31)
(0, 32)
(0, 33)
(0, 34)
(0, 

ValueError: cannot reshape array of size 13149 into shape (4018,1,1)

In [None]:
# Re-run downscaling
print("\nStep 3: Statistical downscaling (with fix)...")

sd = StatisticalDownscaling(
    variable='tas',
    n_iterations=20
)
result = sd.downscale(
    obs_fine=obs_fine_compatible,
    sim_coarse=sim_fut_ba
)
print(f"  âœ“ Downscaled shape: {result['tas'].shape}")

print("\nâœ… BCSD workflow completed successfully!")
print(f"Output dimensions: {result.dims}")
print(f"Output resolution: {(result.lat.values[1] - result.lat.values[0]):.3f}Â°")