In [2]:
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr
import pandas as pd
import os
import seaborn as sns
import random 
import dask
import babet as bb
from moarpalettes import get_palette

sns.set_theme(style="white")
sns.set_style("white")

random.seed(10)
# inidates = ['2023-10-11', '2023-10-15', '2023-10-17']
inidates = ['2023-10-11', '2023-10-13', '2023-10-15', '2023-10-17']
experiments = ['pi', 'curr', 'incr']

dask.config.set(**{'array.slicing.split_large_chunks': True})

<dask.config.set at 0x7fe8bceccad0>

In [3]:
aberdeen = [-4, -2, 55.5, 57.5]
uk = [-10.5, 9.5, 45.5, 61.3] # longitude min, longitude max, latitude min, latitude max

def bootstrap_sample(data, n_iterations=100):
    """Bootstrap resampling with replacement.""" 
    means = np.array([
        np.mean(np.random.choice(data, size=len(data), replace=True))
        for _ in range(n_iterations)
    ])
    return np.percentile(means, [2.5, 97.5])  # 95% confidence interval

In [4]:
def calc_precip_scaling(t2m, tp, latitude_name = 'latitude', longitude_name = 'longitude', climate='1950'):
    """
    Function to calculate the scaling of precipitation with temperature. Averaging over area first

    Parameters
    ----------
    t2m : xarray DataArray
        Temperature data for the region of interest. Needs to have dimension climate.
    tp : xarray DataArray
        Precipitation data for the region of interest. Needs to have dimension climate.
    latitude_name : str
        Name of the latitude dimension in the DataArray. 
    longitude_name : str
        Name of the longitude dimension in the DataArray.
    climate : str, optional
        Climate scenario to compare against. The default is '1950'.

    Returns
    -------
    scaling_factor : xarray DataArray
        Scaling factor for precipitation with temperature.
    """
    
    # Calculate the mean temperature and precipitation
    mean_t2m = t2m.mean(dim=[latitude_name, longitude_name])
    mean_tp = tp.mean(dim=[latitude_name, longitude_name])

    # Calculate change in temperature over Aberdeenshire
    t2m_present = mean_t2m.sel(climate='present')
    t2m_climate = mean_t2m.sel(climate=climate)
    delta_t2m = t2m_present - t2m_climate

    # Calculate fractional precipitation change
    tp_present = mean_tp.sel(climate='present')
    tp_climate = mean_tp.sel(climate=climate)
    delta_tp = (tp_present - tp_climate)/tp_present

    # Calculate the scaling factor
    scaling_factor = delta_tp / delta_t2m
    return scaling_factor


In [5]:
color_palette = get_palette.Petroff6().to_sn_palette()

In [6]:
color_palette

# Load data and post-process

In [8]:
# ERA5 analogues
era5_analogues = bb.data.Data.get_era5_analogues()
era5_analogues['tp'] = era5_analogues['tp'].sel(lat=slice(uk[3], uk[2]), lon=slice(uk[0], uk[1]))
era5_analogues['msl'] = era5_analogues['msl'].sel(lat=slice(uk[3], uk[2]), lon=slice(uk[0], uk[1]))/100
era5_analogues['t2m'] = era5_analogues['t2m'].sel(lat=slice(uk[3], uk[2]), lon=slice(uk[0], uk[1]))

# RACMO analogues
racmo_msl, racmo_tp = bb.data.Data.get_racmo_analogues()
racmo_tp['tp'] = racmo_tp['tp'].sel(lat=slice(uk[2], uk[3]), lon=slice(uk[0], uk[1]))
racmo_msl['msl'] = racmo_msl['msl'].sel(lat=slice(uk[3], uk[2]), lon=slice(uk[0], uk[1]))

# PGW 
pgw = bb.data.Data.get_pgw()
pgw['tp'] = (((pgw.tp.sel(time=slice('2023-10-19 00', '2023-10-22 00'))*3*3600).sum(dim='time'))/1e5).sel(lat=slice(uk[2], uk[3]), lon=slice(uk[0], uk[1]))
pgw['msl'] = (pgw.msl.sel(time=slice('2023-10-19 00', '2023-10-22 00'), lat=slice(uk[2], uk[3]), lon=slice(uk[0], uk[1])).mean('time').squeeze())/100
pgw['t2m'] = (pgw.t2m.sel(time=slice('2023-10-19 00', '2023-10-22 00'), lat=slice(uk[2], uk[3]), lon=slice(uk[0], uk[1])).mean('time').squeeze())

# FBA IFS
ifs = bb.data.Data.get_fba_ifs()
ifs['tp'] = ((ifs.tp.sel(time='2023-10-22 00') - ifs.tp.sel(time='2023-10-19 00'))*1000).sel(latitude=slice(uk[3], uk[2]), longitude=slice(uk[0], uk[1]))
ifs['msl'] = ifs.msl.sel(time=slice('2023-10-19 00', '2023-10-22 00'), latitude=slice(uk[3], uk[2]), longitude=slice(uk[0], uk[1])).mean(dim='time')/100
ifs['t2m'] = ifs.t2m.sel(time=slice('2023-10-19 00', '2023-10-22 00'), latitude=slice(uk[3], uk[2]), longitude=slice(uk[0], uk[1])).mean(dim='time')

# FBA MICAS
micas = bb.data.Data.get_fba_micas()
micas['tp'] = micas.tp.sel(time=slice('2023-10-19 12', '2023-10-21 12'), lat=slice(uk[2], uk[3]), lon=slice(uk[0], uk[1])).sum(dim='time')*24*3600
micas['msl'] = micas.msl.sel(time=slice('2023-10-19 12', '2023-10-21 12'), lat=slice(uk[2], uk[3]), lon=slice(uk[0], uk[1])).mean(dim='time')/100
micas['tas'] = micas.tas.sel(time=slice('2023-10-19 12', '2023-10-21 12'), lat=slice(uk[2], uk[3]), lon=slice(uk[0], uk[1])).mean(dim='time')

Importing data from pre-existing file
Importing data from pre-existing file
Importing data from pre-existing file
Importing data from pre-existing file


# Testing different calculations

In [9]:
# Slice latitude and longitude just once
region_t2m = ifs.t2m.sel(latitude=slice(aberdeen[3], aberdeen[2]),
                       longitude=slice(aberdeen[0], aberdeen[1])).sel(inidate=slice('2023-10-15', '2023-10-17'))
region_tp = ifs.tp.sel(latitude=slice(aberdeen[3], aberdeen[2]),
                       longitude=slice(aberdeen[0], aberdeen[1])).sel(inidate=slice('2023-10-15', '2023-10-17'))

# Define list of climates to compare against 'present'
climates = ['1870', '1950', 'future1']

# Use dictionary comprehension, compute after loop to maximize parallelism
scaling_factors = {
    climate: calc_precip_scaling(region_t2m, region_tp, climate=climate).mean('number').values
    for climate in climates
}

# Compute all at once (parallelized if using Dask)
from dask import compute
computed_values = compute(*scaling_factors.values())
scaling_factors = dict(zip(climates, computed_values))

print("Precipitation scaling using local ABD temperatures")
print(scaling_factors)

Precipitation scaling using local ABD temperatures
{'1870': array([5.401507  , 0.03213355], dtype=float32), '1950': array([-0.9181732 ,  0.05896278], dtype=float32), 'future1': array([0.95739   , 0.05651156], dtype=float32)}


In [15]:
# Slice latitude and longitude just once
region_t2m = ifs.t2m.sel(inidate=slice('2023-10-15', '2023-10-17'))
region_tp = ifs.tp.sel(latitude=slice(aberdeen[3], aberdeen[2]),
                       longitude=slice(aberdeen[0], aberdeen[1])).sel(inidate=slice('2023-10-15', '2023-10-17'))

# Define list of climates to compare against 'present'
climates = ['1870', '1950', 'future1']

# Use dictionary comprehension, compute after loop to maximize parallelism
scaling_factors = {
    climate: calc_precip_scaling(region_t2m, region_tp, climate=climate).mean('number').values
    for climate in climates
}

# Compute all at once (parallelized if using Dask)
from dask import compute
computed_values = compute(*scaling_factors.values())
scaling_factors = dict(zip(climates, computed_values))

print("Precipitation scaling using UK temperatures")
print(scaling_factors)

Precipitation scaling using UK temperatures
{'1870': array([-0.60658985,  0.10509308], dtype=float32), '1950': array([-0.14231068,  0.0939611 ], dtype=float32), 'future1': array([2.0398295 , 0.02380418], dtype=float32)}


In [20]:
ifs_ = bb.data.Data.get_fba_ifs()
starttime = '2023-10-19 00'
endtime = '2023-10-22 00'

# Slice latitude and longitude just once
region_t2m = ifs_.t2m.sel(inidate=slice('2023-10-15', '2023-10-17'), time=slice(starttime, endtime)).mean(dim='time')
region_tp = ifs_.tp.sel(latitude=slice(aberdeen[3], aberdeen[2]),
                       longitude=slice(aberdeen[0], aberdeen[1])).sel(inidate=slice('2023-10-15', '2023-10-17'), time=slice(starttime, endtime)).mean(dim='time')

# Define list of climates to compare against 'present'
climates = ['1870', '1950', 'future1']

# Use dictionary comprehension, compute after loop to maximize parallelism
scaling_factors = {
    climate: calc_precip_scaling(region_t2m, region_tp, climate=climate).mean('number').values
    for climate in climates
}

# Compute all at once (parallelized if using Dask)
from dask import compute
computed_values = compute(*scaling_factors.values())
scaling_factors = dict(zip(climates, computed_values))

print("Precipitation scaling using European temperatures")
print(scaling_factors)

Precipitation scaling using European temperatures
{'1870': array([0.00228653, 0.1133773 ], dtype=float32), '1950': array([0.09381542, 0.10747275], dtype=float32), 'future1': array([0.3311451 , 0.12129134], dtype=float32)}


In [21]:
# Load gloabl data
glob_dir = '/gf5/predict/AWH019_ERMIS_ATMICP/Babet/DATA/MED-R/EXP/{}/GLO100/sfc/pf'
glob_file_dir = {'curr': glob_dir.format('curr'),
            'pi': glob_dir.format('pi'),
            'incr': glob_dir.format('incr')}
climates = ['1870', 'present', 'future1']
tmp = []
for e, exp in enumerate(['pi', 'curr', 'incr']):
    tmp.append(xr.open_mfdataset(os.path.join(glob_file_dir[exp], '*.nc'), preprocess=bb.data.Data.preproc_ds, engine='netcdf4').expand_dims(climate=[climates[e]]))
ifs_glob = xr.concat(tmp, dim='climate')

In [22]:
# Slice latitude and longitude just once
region_t2m = ifs_glob.t2m.sel(inidate=slice('2023-10-15', '2023-10-17'), time=slice(starttime, endtime)).mean(dim='time')
region_tp = ifs.tp.sel(latitude=slice(aberdeen[3], aberdeen[2]),
                       longitude=slice(aberdeen[0], aberdeen[1])).sel(inidate=slice('2023-10-15', '2023-10-17'))

# Define list of climates to compare against 'present'
climates = ['1870','future1']

# Use dictionary comprehension, compute after loop to maximize parallelism
scaling_factors = {
    climate: calc_precip_scaling(region_t2m, region_tp, climate=climate).mean('number').values
    for climate in climates
}

# Compute all at once (parallelized if using Dask)
from dask import compute
computed_values = compute(*scaling_factors.values())
scaling_factors = dict(zip(climates, computed_values))

print("Precipitation scaling using global temperatures")
print(scaling_factors)

Precipitation scaling using global temperatures
{'1870': array([-0.19274288,  0.11758389], dtype=float32), 'future1': array([0.22639696, 0.05634849], dtype=float32)}


# RACMO analogues

In [7]:
xr.open_dataset('/gf5/predict/AWH019_ERMIS_ATMICP/Babet/DATA/RACMO_analogues/analogs_RACMO_2023-10-20_pr_scaled_response_SCOT_1991-2020__1951-1980.nc')

# Results from all methods

In [53]:
from matplotlib import colors


stats = [{'med': 16.8, 'q1': -17.0, 'q3': 60.3, 'whislo' : 0, 'whishi': 0},  # ERA5 analogues
        #  {},  # RACMO analogues
         {'med': 10.34, 'q1': 6.38, 'q3': 13.09, 'whislo' : 0, 'whishi': 0},  # PGW past
         {'med': 10.99, 'q1': 3.85, 'q3': 21.13, 'whislo' : 0, 'whishi': 0},  # PGW FUT1
         {'med': 9.55, 'q1': 4.63, 'q3': 16.68, 'whislo' : 0, 'whishi': 0},  # PGW FUT2
         {'med': -25.2, 'q1': -41.6, 'q3': -8.7, 'whislo' : 0, 'whishi': 0},  # FBA IFS, past, 15
         {'med': 46.5, 'q1': 13.3, 'q3': 84.6, 'whislo' : 0, 'whishi': 0},  # FBA IFS, fut, 15
         {'med': 13.6, 'q1': 9.4, 'q3': 17.6, 'whislo' : 0, 'whishi': 0},  # FBA IFS, past, 17
         {'med': 14.4, 'q1': 7.1, 'q3': 21.7, 'whislo' : 0, 'whishi': 0},  # FBA IFS, fut, 17
         {'med': 12.9, 'q1': 12.9, 'q3': 12.9, 'whislo' : 0, 'whishi': 0},  # FBA ACCESS, future
         {'med': -8.7, 'q1': -8.7, 'q3': -8.7, 'whislo' : 0, 'whishi': 0},  # FBA ACCESS, past
        #  {}   # Probabilistic
]

# To help with plotting
g = 1.5  # Space between groups
b = 0.5  # Space within groups
labels = ['ERA5 analogues', 'PGW, past -1.5K', 'PGW, future +1.5K', 'PGW, future +3K', 'FBA IFS, past, init 15th', 'FBA IFS, future, init 15th', 'FBA IFS, past, init 17th', 'FBA IFS, future, init 17th', 'FBA ACCESS, future', 'FBA ACCESS, past']
positions = [b, # ERA5 analogues
             # RACMO analogues
             b+g, 2*b+g, 3*b+g, # PGW
             3*b+2*g, 4*b+2*g, 5*b+2*g, 6*b+2*g, # FBA IFS
             6*b+3*g, 7*b+3*g] # FBA ACCESS
colour_options = [ color_palette[4], color_palette[1]]  #["gray", "darkorange"]
colors = [colour_options[0], # ERA5 analogues
          # RACMO analogues
          colour_options[0], colour_options[1], colour_options[1], # PGW 
          colour_options[0], colour_options[1], colour_options[0], colour_options[1], # FBA IFS
          colour_options[0], colour_options[1], # FBA ACCESS
          ]

In [8]:
# # https://stackoverflow.com/questions/54033076/can-i-generate-a-boxplot-without-a-dataset-and-only-having-the-relevant-values

# # Main plot
# _, ax = plt.subplots();

# # Horizontal line at 7 %/K and 14 %/K and 0
# plt.axhline(y=7, color='black', linestyle='--', linewidth=1)
# plt.axhline(y=14, color='black', linestyle='--', linewidth=1)
# plt.axhline(y=0, color=color_palette[2], linestyle='-', linewidth=1)

# boxplot = ax.bxp(stats, showfliers=False, 
#               showcaps=False, 
#               whiskerprops={'linestyle': ''}, 
#               medianprops={'linestyle': '-', 'color': 'black'}, 
#               boxprops={'linestyle': '', 'alpha': 0.7},
#               patch_artist=True,
#               positions=positions);

# # Apply colors
# for patch, color in zip(boxplot['boxes'], colors):
#     patch.set_facecolor(color)

# # Plot settings
# plt.ylabel("Precipitation scaling (%/K)")
# ax.set_xticks(positions)
# ax.set_xticklabels(labels, rotation=30, ha='right')
# sns.despine()
# plt.tight_layout()
# # Add a faint grid in dotted lines
# plt.grid(axis='y', linestyle=':', alpha=0.5)

# plt.savefig("figures/PAPER3_precip_scaling_boxplot.png", dpi=600, bbox_inches='tight')
# plt.savefig("figures/PAPER3_precip_scaling_boxplot.pdf")