### This notebook is to calculate regression patterns plotted in Figure 2 in Liu & Grise 2025.

#### import libraries

In [1]:
import numpy as np
import netCDF4 as nc
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import linregress
import xarray as xr
import pickle
import os
from utils import *

### combine yearly CRH data and convert height to pressure levels

In [None]:
file_path = '/bjerknes_raid5/cloudsat/CRH_LW_CloudSat_20*.nc'

combined_dataset = xr.open_mfdataset(file_path, combine='by_coords')

output_file = '/bjerknes_raid5/cloudsat/CRH_LW_CloudSat_combined.nc'
combined_dataset.to_netcdf(output_file)

print(f"Combined dataset saved to {output_file}")

In [20]:
filename = '/bjerknes_raid5/cloudsat/CRH_LW_CloudSat_combined.nc'
cloudsat_crh_ds = xr.open_dataset(filename)
pressure_lev = height2pressure(cloudsat_crh_ds.height.values*1000)
cloudsat_crh_ds = cloudsat_crh_ds.assign_coords(pressure_lev=("height", pressure_lev)).swap_dims({"height":"pressure_lev"}).drop_vars("height")
cloudsat_crh_ds = cloudsat_crh_ds.rename({"pressure_lev": "lev"})
# Interpolate to standard pressure levels
cloudsat_crh_ds_interped = cloudsat_crh_ds.interp(lev=define_targetlevels())

In [21]:
cloudsat_crh_ds_interped.to_netcdf('/bjerknes_raid5/cloudsat/CRH_LW_CloudSat_combined_interped.nc')

In [3]:
filename = '/bjerknes_raid5/cloudsat/radarlidar_monthly_2006-2020.nc'
cloudsat_cl_ds = xr.open_dataset(filename)
pressure_lev = height2pressure(cloudsat_cl_ds.height.values)
cloudsat_cl_ds = cloudsat_cl_ds.assign_coords(pressure_lev=("height", pressure_lev)).swap_dims({"height":"pressure_lev"}).drop_vars("height")
cloudsat_cl_ds = cloudsat_cl_ds.rename({"pressure_lev": "lev"}).mean(dim='lon')
# Interpolate to standard pressure levels
cloudsat_cl_ds_interped = cloudsat_cl_ds.interp(lev=define_targetlevels())

In [4]:
cloudsat_cl_ds_interped.to_netcdf('/bjerknes_raid5/cloudsat/CL_CloudSat_monthly_combined_interped.nc')

### convert clcalipso to pressure levels

In [9]:
for model_name in ['MIROC6', 'MRI-ESM2-0']:
    filename = '/bjerknes_raid5/CMIP6_xl7pd/amip_monthly/clcalipso/clcalipso_CFmon_'+model_name+'_amip_r1i1p1f1_gn_197901-201412_zonalmean.nc'
    clcalipso_cl_ds = xr.open_dataset(filename)
    pressure_lev = height2pressure(clcalipso_cl_ds.alt40.values)
    clcalipso_cl_ds = clcalipso_cl_ds.assign_coords(pressure_lev=("alt40", pressure_lev)).swap_dims({"alt40":"pressure_lev"}).drop_vars("alt40")
    clcalipso_cl_ds = clcalipso_cl_ds.rename({"pressure_lev": "lev"})
    # Interpolate to standard pressure levels
    clcalipso_cl_ds_interped = clcalipso_cl_ds.interp(lev=define_targetlevels())
    clcalipso_cl_ds_interped.to_netcdf('/bjerknes_raid5/CMIP6_xl7pd/amip_monthly/clcalipso/clcalipso_CFmon_'+model_name+'_amip_r1i1p1f1_gn_197901-201412_zonalmean'+'pressure_levels'+'.nc')

#### load monthly SAM index for different models calculated in calculate_SAM_models.ipynb

In [3]:
save_directory = '../data'
with open(os.path.join(save_directory, 'sam_models_monthly_2000_2014.pkl'), 'rb') as f:
    sam_models_monthly = pickle.load(f)

#### functions to calculate regression coefficient and p-value

In [4]:
def calc_slope_and_pval(x, y):
    # Remove NaN values from x and y
    mask = ~np.isnan(x) & ~np.isnan(y)  # Only keep pairs where neither is NaN
    if np.sum(mask) < 2:  # If not enough valid points, return NaN
        return np.nan, np.nan
    # Perform linear regression using only valid points
    slope, intercept, r_value, p_value, std_err = linregress(x[mask], y[mask])
    return slope, p_value

In [11]:
def calculate_regression_with_pvalues(file, jet_latitudes_noseason, lat_range, file_type, MERRA2=False, cloudsat=False, cloudsat_CRH=False):
    # Load and select time and latitude range based on file type
    if cloudsat:
        dataset = xr.open_dataset(file).sel(time=slice('2007', '2018')).sel(lat=slice(*lat_range)).mean(dim='lon')
        full_time_range = pd.date_range(start=dataset.time.min().values, end=dataset.time.max().values, freq='MS')
        dataset = dataset.reindex(time=full_time_range)
    elif cloudsat_CRH:
        dataset = xr.open_dataset(file).sel(time=slice('2007', '2018')).sel(lat=slice(*lat_range))
        full_time_range = pd.date_range(start=dataset.time.min().values, end=dataset.time.max().values, freq='5D')
        dataset = dataset.reindex(time=full_time_range)
    else:
        dataset = xr.open_dataset(file).sel(time=slice('2000', '2014')).sel(lat=slice(*lat_range))
    
    common_lat_grid = np.arange(-90, 1, 1)
    
    dayofyear_mean = dataset.groupby('time.dayofyear').mean(dim='time')
    anomaly = dataset.groupby('time.dayofyear') - dayofyear_mean
    
    if file_type == 'cloud_fraction':
        if MERRA2:
            data_var = anomaly['CLOUD']
        elif cloudsat:
            data_var = anomaly['cloud_fraction_on_levels']
        else:
            data_var = anomaly['cl']
    elif file_type == 'cloud_radiative_heating':
        if MERRA2:
            #data_var = anomaly['DTDTLWR'] + anomaly['DTDTSWR'] - anomaly['DTDTLWRCLR'] - anomaly['DTDTSWRCLR']
            data_var = anomaly['DTDTLWR'] - anomaly['DTDTLWRCLR']
        elif cloudsat_CRH:
            data_var = dataset['CRH_LWcloud']
        else:
            data_var = anomaly['temp_tendency_K_day']
    
    data_var_interp = data_var.interp(lat=common_lat_grid)
    
    regression_result = xr.apply_ufunc(
        calc_slope_and_pval,
        jet_latitudes_noseason,
        np.squeeze(data_var_interp),
        input_core_dims=[['time'], ['time']],
        vectorize=True,
        dask='parallelized',
        output_core_dims=[[], []],
        output_dtypes=[float, float]
    )
    
    regression_coeff, p_values = regression_result
    
    model_name = file.split('/')[-1].split('_')[2]
    if MERRA2:
        regression_coeff_da = xr.DataArray(regression_coeff * (86400 if file_type == 'cloud_radiative_heating' else 100),
                                           dims=['lev', 'lat'], coords={'lat': common_lat_grid, 'lev': dataset['lev']})
        p_values_da = xr.DataArray(p_values, dims=['lev', 'lat'], coords={'lat': common_lat_grid, 'lev': dataset['lev']})
    elif cloudsat:
        regression_coeff_da = xr.DataArray(regression_coeff.mean(dim = 'doop'), dims=['lat', 'height'], coords={'lat': common_lat_grid, 'height': dataset['height']/1000})
        p_values_da = xr.DataArray(p_values.mean(dim = 'doop'), dims=['lat', 'height'], coords={'lat': common_lat_grid, 'height': dataset['height']/1000})
    elif cloudsat_CRH:
        regression_coeff_da = xr.DataArray(regression_coeff, dims=['lat', 'height'], coords={'lat': common_lat_grid, 'height': dataset['height']})
        p_values_da = xr.DataArray(p_values, dims=['lat', 'height'], coords={'lat': common_lat_grid, 'height': dataset['height']})
    elif model_name in ['CAS-ESM2-0', 'CIESM']:
        regression_coeff_da = xr.DataArray(regression_coeff * 100, dims=['lev', 'lat'],
                                           coords={'lat': common_lat_grid, 'lev': dataset['lev'] / 100})
        p_values_da = xr.DataArray(p_values, dims=['lev', 'lat'], coords={'lat': common_lat_grid, 'lev': dataset['lev']/100})
    else:
        if file_type == 'cloud_fraction':
            regression_coeff_da = xr.DataArray(regression_coeff, dims=['lev', 'lat'], coords={'lat': common_lat_grid, 'lev': dataset['lev'] / 100})
            p_values_da = xr.DataArray(p_values, dims=['lev', 'lat'], coords={'lat': common_lat_grid, 'lev': dataset['lev'] / 100})
        else:
            regression_coeff_da = xr.DataArray(regression_coeff, dims=['lev', 'lat'],
                                           coords={'lat': common_lat_grid, 'lev': dataset['lev']/100})
            p_values_da = xr.DataArray(p_values, dims=['lev', 'lat'], coords={'lat': common_lat_grid, 'lev': dataset['lev']/100})
    
    return regression_coeff_da, p_values_da

### calculating for cloud fraction (cl)

In [5]:
directory = '/bjerknes_raid5/CMIP6_xl7pd/amip_monthly/cl_combined/zonal_mean_interped/'
file_names = os.listdir(directory)
files = []
for file in file_names:
     files.append(directory+file)
print(len(files))

27


In [6]:
# To store all regression coefficients for model-mean and spread calculation
regression_coeffs_list_cl = []
regression_coeffs_cl_sam_models = {}
regression_coeffs_cl_sam_models_p_values = {}

# Define latitude range
lat_range = (-90, 0)

file_type = 'cloud_fraction'

# Loop through each model and plot on corresponding axis
i = 0
for file in files:
    model_name = file.split("_")[8]
    if model_name in sam_models_monthly.keys():
        print(f'Processing {model_name}...')
        print(file)
        sam_df = sam_models_monthly[model_name].resample('ME').mean().loc['2000':'2014']
        sam_anomaly = (sam_df - sam_df.mean()) / sam_df.std()
        regression_coeff, p_values = calculate_regression_with_pvalues(file, sam_anomaly.values, lat_range, file_type)
        
        # Store regression_coeff for model-mean and spread calculation
        regression_coeffs_list_cl.append(regression_coeff)
        regression_coeffs_cl_sam_models[model_name] = regression_coeff
        regression_coeffs_cl_sam_models_p_values[model_name] = p_values

# Calculate the model-mean and inter-model spread (standard deviation)
regression_coeffs_stack = np.stack([coeff.values for coeff in regression_coeffs_list_cl])
model_mean_coeff = np.mean(regression_coeffs_stack, axis=0)
model_spread_coeff = np.std(regression_coeffs_stack, axis=0)

# Convert the mean and spread back to xarray DataArray with appropriate coordinates
mean_da_cl = xr.DataArray(model_mean_coeff, dims=regression_coeffs_list_cl[0].dims, coords=regression_coeffs_list_cl[0].coords)
spread_da_cl = xr.DataArray(model_spread_coeff, dims=regression_coeffs_list_cl[0].dims, coords=regression_coeffs_list_cl[0].coords)

Processing TaiESM1...
/bjerknes_raid5/CMIP6_xl7pd/amip_monthly/cl_combined/zonal_mean_interped/cl_amip_TaiESM1_zonalmean_interped_pressure_lev.nc
Processing IITM-ESM...
/bjerknes_raid5/CMIP6_xl7pd/amip_monthly/cl_combined/zonal_mean_interped/cl_amip_IITM-ESM_zonalmean_interped_pressure_lev.nc
Processing HadGEM3-GC31-MM...
/bjerknes_raid5/CMIP6_xl7pd/amip_monthly/cl_combined/zonal_mean_interped/cl_amip_HadGEM3-GC31-MM_zonalmean_interped_pressure_lev.nc
Processing KACE-1-0-G...
/bjerknes_raid5/CMIP6_xl7pd/amip_monthly/cl_combined/zonal_mean_interped/cl_amip_KACE-1-0-G_zonalmean_interped_pressure_lev.nc
Processing MPI-ESM-1-2-HAM...
/bjerknes_raid5/CMIP6_xl7pd/amip_monthly/cl_combined/zonal_mean_interped/cl_amip_MPI-ESM-1-2-HAM_zonalmean_interped_pressure_lev.nc
Processing NorESM2-LM...
/bjerknes_raid5/CMIP6_xl7pd/amip_monthly/cl_combined/zonal_mean_interped/cl_amip_NorESM2-LM_zonalmean_interped_pressure_lev.nc
Processing MPI-ESM1-2-LR...
/bjerknes_raid5/CMIP6_xl7pd/amip_monthly/cl_combin

### save regression maps for models

In [7]:
save_directory = '/bjerknes_raid5/CMIP6_xl7pd/data'
with open(os.path.join(save_directory, 'regression_maps_cloud_fraction.pkl'), 'wb') as f:
    pickle.dump(regression_coeffs_cl_sam_models, f)

with open(os.path.join(save_directory, 'regression_maps_cloud_fraction_p_values.pkl'), 'wb') as f:
    pickle.dump(regression_coeffs_cl_sam_models_p_values, f)

#### calculating for cloud radiative heating (CRH)

In [None]:
directory = '/OWC/huiyu/CMIP6/vertical_CRH/interpolated'
files = [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.nc')]

In [14]:
# To store all regression coefficients for model-mean and spread calculation
regression_coeffs_list_crh = []
regression_coeffs_crh_sam_models = {}
regression_coeffs_crh_sam_models_p_values = {}

# Define latitude range
lat_range = (-90, 0)

file_type = 'cloud_radiative_heating'

# Loop through each model and plot on corresponding axis
i = 0
for file in files:
    model_name = file.split("/")[6].split("_")[0]
    print(f'Processing {model_name}...')
    if model_name in sam_models_monthly.keys():
        sam_df = sam_models_monthly[model_name].loc['2000':'2014']
        sam_anomaly = (sam_df - sam_df.mean()) / sam_df.std()
        regression_coeff, p_values = calculate_regression_with_pvalues(file, sam_anomaly.values, lat_range, file_type)
        print(regression_coeff.shape)
        
        # Store regression_coeff for model-mean and spread calculation
        regression_coeffs_list_crh.append(regression_coeff)
        regression_coeffs_crh_sam_models[model_name] = regression_coeff
        regression_coeffs_crh_sam_models_p_values[model_name] = p_values

# Calculate the model-mean and inter-model spread (standard deviation)
regression_coeffs_stack = np.stack([coeff.values for coeff in regression_coeffs_list_crh])
model_mean_coeff = np.mean(regression_coeffs_stack, axis=0)
model_spread_coeff = np.std(regression_coeffs_stack, axis=0)

# Convert the mean and spread back to xarray DataArray with appropriate coordinates
mean_da_crh = xr.DataArray(model_mean_coeff, dims=regression_coeffs_list_crh[0].dims, coords=regression_coeffs_list_crh[0].coords)
spread_da_crh = xr.DataArray(model_spread_coeff, dims=regression_coeffs_list_crh[0].dims, coords=regression_coeffs_list_crh[0].coords)

Processing MRI-ESM2-0...
(40, 91)
Processing MPI-ESM1-2-LR...
(40, 91)
Processing MPI-ESM1-2-HR...
(40, 91)
Processing MPI-ESM-1-2-HAM...
(40, 91)
Processing MIROC6...
(40, 91)
Processing INM-CM4-8...
(40, 91)
Processing INM-CM5-0...
(40, 91)
Processing BCC-CSM2-MR...
(40, 91)


In [15]:
save_directory = '../data'
with open(os.path.join(save_directory, 'regression_maps_crh.pkl'), 'wb') as f:
    pickle.dump(regression_coeffs_crh_sam_models, f)

with open(os.path.join(save_directory, 'regression_maps_crh_p_values.pkl'), 'wb') as f:
    pickle.dump(regression_coeffs_crh_sam_models_p_values, f)