# BCSD: Bias Correction and Spatial Disaggregation

## Setup

First, let's import the necessary libraries.

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats
import xarray as xr

from skdownscale.pointwise_models import BcsdPrecipitation, BcsdTemperature

## Helper Functions for Visualization

We'll define helper functions to visualize cumulative distribution functions (CDFs), which are useful for comparing the statistical properties of our input and downscaled data.

In [None]:
def plot_cdf(ax=None, **kwargs):
    """Plot cumulative distribution function for multiple datasets.

    Parameters
    ----------
    ax : matplotlib axis, optional
        Axis to plot on
    **kwargs : dict
        Datasets to plot, where key is the label and value is the data
    """
    if ax:
        plt.sca(ax)
    else:
        ax = plt.gca()

    for label, X in kwargs.items():
        vals = np.sort(X, axis=0)
        pp = scipy.stats.mstats.plotting_positions(vals)
        ax.plot(pp, vals, label=label)
    ax.legend()
    ax.set_xlabel('Cumulative Probability')
    ax.set_ylabel('Value')
    return ax


def plot_cdf_by_month(**kwargs):
    """Plot CDFs separately for each month of the year.

    Parameters
    ----------
    **kwargs : dict
        Datasets to plot, where key is the label and value is the data
    """
    fig, axes = plt.subplots(4, 3, sharex=True, sharey=False, figsize=(12, 8))
    fig.suptitle('CDFs by Month', fontsize=14)

    for label, X in kwargs.items():
        for month, ax in zip(range(1, 13), axes.flat):
            vals = np.sort(X[X.index.month == month], axis=0)
            pp = scipy.stats.mstats.plotting_positions(vals)
            ax.plot(pp, vals, label=label)
            ax.set_title(f'Month {month}')

    # Add legend to last subplot
    axes.flat[-1].legend()
    fig.tight_layout()
    return fig, axes

## Step 1: Load Climate Data

We'll load sample climate data that includes:
- **Training data**: Coarse-resolution climate model output (predictor)
- **Target data**: High-resolution observations (predictand)

The data is stored in a cloud-optimized Zarr format for efficient access.

In [None]:
# Define the training period
training_time_slice = slice('1980', '2001')

# Load the data from cloud storage
data = xr.open_datatree(
    's3://carbonplan/share/scikit-downscale/test-data.zarr',
    engine='zarr',
    chunks={},
    storage_options={'anon': True, 'endpoint_url': 'https://rice1.osn.mghpcc.org'},
)

# Extract training and target datasets
training = data['training'].to_dataset().sel(time=training_time_slice)
targets = data['targets'].to_dataset().sel(time=training_time_slice)

print('Training data:')
display(training)
print('\nTarget data:')
display(targets)

## Step 2: Prepare Data for a Single Location

For this tutorial, we'll focus on a single spatial point. We'll extract and prepare both temperature and precipitation data.

### Temperature Data

- Convert from Kelvin to Celsius
- Resample to monthly means

In [None]:
# Extract temperature data for point 0
# Training: daily maximum temperature from climate model
X_temp = (
    training.isel(point=0)
    .to_dataframe()[['T2max']]
    .resample('MS')  # Monthly start frequency
    .mean()
    - 273.15  # Convert Kelvin to Celsius
)

# Target: observed daily maximum temperature
y_temp = targets.isel(point=0).to_dataframe()[['Tmax']].resample('MS').mean()

print('Training temperature (first 5 months):')
display(X_temp.head())
print('\nTarget temperature (first 5 months):')
display(y_temp.head())

### Precipitation Data

- Convert units to mm/day by multiplying by 24
- Resample to monthly totals

In [None]:
# Extract precipitation data for point 0
# Training: total precipitation from climate model
X_pcp = (
    training.isel(point=0).to_dataframe()[['PREC_TOT']].resample('MS').sum()
    * 24  # Convert to mm/day equivalent
)

# Target: observed precipitation
y_pcp = targets.isel(point=0).to_dataframe()[['Prec']].resample('MS').sum()

print('Training precipitation (first 5 months):')
display(X_pcp.head())
print('\nTarget precipitation (first 5 months):')
display(y_pcp.head())

## Step 3: Downscale Temperature with BCSD

The BCSD temperature model uses quantile mapping to correct biases in the climate model output.

### How it works:
1. **Fit**: Learn the relationship between model and observed quantiles
2. **Predict**: Apply the correction to new data
3. **Adjust**: Add the bias-corrected anomaly back to the original data

In [None]:
# Initialize the BCSD temperature model
bcsd_temp = BcsdTemperature()

# Fit the model using training data
bcsd_temp.fit(X_temp, y_temp)

# Generate downscaled predictions
# Note: We add back X_temp because the model predicts anomalies
temp_downscaled = bcsd_temp.predict(X_temp) + X_temp

print('Downscaled temperature (first 5 months):')
display(temp_downscaled.head())

### Evaluate Temperature Downscaling

Let's visualize how well the downscaling corrects the model bias by comparing CDFs.

In [None]:
# Plot overall CDF
fig, ax = plt.subplots(figsize=(10, 6))
plot_cdf(
    ax=ax,
    Training=X_temp.values.flatten(),
    Observed=y_temp.values.flatten(),
    Downscaled=temp_downscaled.values.flatten(),
)
ax.set_title('Temperature CDF Comparison')
ax.set_ylabel('Temperature (°C)')
plt.show()

# Plot time series for a sample period
fig, ax = plt.subplots(figsize=(12, 5))
temp_downscaled['2000':'2001'].plot(ax=ax, label='Downscaled')
y_temp['2000':'2001'].plot(ax=ax, label='Observed')
X_temp['2000':'2001'].plot(ax=ax, label='Training', alpha=0.7)
ax.set_title('Temperature Time Series (2000-2001)')
ax.set_ylabel('Temperature (°C)')
ax.legend()
plt.show()

### Monthly CDFs for Temperature

Since climate statistics vary by season, let's examine the CDFs for each month separately.

In [None]:
fig, axes = plot_cdf_by_month(Training=X_temp, Observed=y_temp, Downscaled=temp_downscaled)
fig.suptitle('Temperature CDFs by Month', fontsize=14)
plt.show()

## Step 4: Downscale Precipitation with BCSD

Precipitation requires special handling due to its non-Gaussian distribution and the presence of zero values.

### Key differences from temperature:
- Uses multiplicative (rather than additive) adjustments
- Handles zero-precipitation days separately
- Uses specialized quantile mapping for non-negative values

In [None]:
# Initialize the BCSD precipitation model
bcsd_pcp = BcsdPrecipitation()

# Fit the model using training data
bcsd_pcp.fit(X_pcp, y_pcp)

# Generate downscaled predictions
# Note: We multiply by X_pcp because the model predicts a scaling factor
pcp_downscaled = bcsd_pcp.predict(X_pcp) * X_pcp

print('Downscaled precipitation (first 5 months):')
display(pcp_downscaled.head())

### Evaluate Precipitation Downscaling

Let's compare the statistical properties of our downscaled precipitation with observations.

In [None]:
# Plot overall CDF
fig, ax = plt.subplots(figsize=(10, 6))
plot_cdf(
    ax=ax,
    Training=X_pcp.values.flatten(),
    Observed=y_pcp.values.flatten(),
    Downscaled=pcp_downscaled.values.flatten(),
)
ax.set_title('Precipitation CDF Comparison')
ax.set_ylabel('Precipitation (mm)')
plt.show()

# Plot time series for a sample period
fig, ax = plt.subplots(figsize=(12, 5))
pcp_downscaled['2000':'2001'].plot(ax=ax, label='Downscaled')
y_pcp['2000':'2001'].plot(ax=ax, label='Observed')
X_pcp['2000':'2001'].plot(ax=ax, label='Training', alpha=0.7)
ax.set_title('Precipitation Time Series (2000-2001)')
ax.set_ylabel('Precipitation (mm)')
ax.legend()
plt.show()

### Monthly CDFs for Precipitation

Precipitation patterns often vary significantly by month, so let's examine monthly CDFs.

In [None]:
fig, axes = plot_cdf_by_month(Training=X_pcp, Observed=y_pcp, Downscaled=pcp_downscaled)
fig.suptitle('Precipitation CDFs by Month', fontsize=14)
plt.show()

## Summary

In this tutorial, we demonstrated how to:

1. ✅ Load and prepare climate data from cloud storage
2. ✅ Apply BCSD temperature downscaling using quantile mapping
3. ✅ Apply BCSD precipitation downscaling with special handling for non-negative values
4. ✅ Evaluate results using CDFs and time series plots

### Key Takeaways

- **Temperature**: BCSD corrects biases using additive quantile mapping
- **Precipitation**: BCSD uses multiplicative adjustments due to its non-Gaussian nature
- **Evaluation**: CDFs are effective for comparing statistical properties across datasets
- **Monthly analysis**: Examining results by month reveals seasonal performance

### Next Steps

- Try applying BCSD to multiple spatial points using `PointWiseDownscaler`
- Explore other downscaling methods like GARD or Analog methods
- Compare different methods on the same dataset
- Apply downscaling to your own climate data

### References

Wood, A. W., Leung, L. R., Sridhar, V., & Lettenmaier, D. P. (2004). Hydrological implications of dynamical and statistical approaches to downscaling climate model outputs. *Climatic Change*, 62(1-3), 189-216.