# Import necessary libraries


In [None]:
import xarray as xr
import numpy as np
import glob
import cftime
import geopandas as gpd


# Define helper functions for data processing


In [None]:
def filter_months(data, start_month=11, end_month=3):
    """
    Filter data to include only the months from November to March.

    Parameters:
    data (xarray.DataArray): The input data array with a time dimension.
    start_month (int): The starting month (inclusive).
    end_month (int): The ending month (inclusive).

    Returns:
    xarray.DataArray: Data array filtered to include only the specified months.
    """
    # Handling the case where the period spans over the year end
    if start_month > end_month:
        months = list(range(start_month, 13)) + list(range(1, end_month + 1))
    else:
        months = list(range(start_month, end_month + 1))
    return data.sel(time=data.time.dt.month.isin(months))


In [None]:
def calculate_days_below_threshold(data):
    """
    Calculate chilling degree days (CDD) with temperatures below zero.

    Parameters:
    data (xarray.DataArray): The input temperature data array in Kelvin.

    Returns:
    xarray.DataArray: Annual average chilling degree days.
    """
    # Calculate degree days where temperature is below zero
    cdd = xr.where(data < 0, -data, 0)
    # Sum degree days per year
    annual_cdd = cdd.resample(time='AS').sum()
    # Average over the years
    return annual_cdd.mean(dim='time')


In [None]:
def process_file(file_path, start_year, end_year):
    """
    Process a single NetCDF file to calculate the climatology of days below a threshold.

    Parameters:
    file_path (str): The path to the NetCDF file.
    start_year (int): The starting year of the period.
    end_year (int): The ending year of the period.

    Returns:
    xarray.DataArray: The climatology of days below the threshold.
    """
    ds = xr.open_dataset(file_path, use_cftime=True)
    
    # Convert TREFHT from Kelvin to Celsius
    trefht_celsius = ds['TREFHT'] - 273.15
    
    # Adjust longitude range for CONUS and filter by months November-March
    conus_data = trefht_celsius.sel(lon=slice(235, 295), lat=slice(25, 50))
    conus_data = filter_months(conus_data)
    
    # Select data for the specified period
    period_data = conus_data.sel(time=slice(cftime.DatetimeNoLeap(start_year, 1, 1), cftime.DatetimeNoLeap(end_year, 12, 31)))
    
    # Calculate days below the threshold
    days_below_threshold_climatology = calculate_days_below_threshold(period_data)
    
    return days_below_threshold_climatology


# Define file paths and periods for analysis


In [None]:
# Path to the .nc files
nc_files_directory = '/data/project/agaid/singh_cesm_ts/cesm_ts_concatenated/*.nc'
nc_files = glob.glob(nc_files_directory)
nc_files.sort()

# Define periods
start_year_1991_2020, end_year_1991_2020 = 1991, 2020
start_year_2031_2060, end_year_2031_2060 = 2031, 2060


# Process files for the climatology period 1991-2020


In [None]:
days_below_threshold_1991_2020_results = [process_file(file, start_year_1991_2020, end_year_1991_2020) for file in nc_files]


# Calculate the ensemble mean climatology for 1991-2020


In [None]:
ensemble_mean_1991_2020 = np.mean(np.array(days_below_threshold_1991_2020_results), axis=0)


# Process files for the climatology period 2031-2060


In [None]:
days_below_threshold_2031_2060_results = [process_file(file, start_year_2031_2060, end_year_2031_2060) for file in nc_files]


# Calculate the ensemble mean climatology for 2031-2060


In [None]:
ensemble_mean_2031_2060 = np.mean(np.array(days_below_threshold_2031_2060_results), axis=0)


# Create Xarray DataArrays for ensemble results and means


In [None]:
# Assuming 'lat' and 'lon' are the latitude and longitude coordinates from your data
lat = days_below_threshold_1991_2020_results[0].lat.values
lon = days_below_threshold_1991_2020_results[0].lon.values

# Creating DataArrays
days_below_threshold_1991_2020_da = xr.DataArray(
    np.stack(days_below_threshold_1991_2020_results),
    dims=["ensemble", "lat", "lon"],
    coords={"ensemble": np.arange(len(days_below_threshold_1991_2020_results)), "lat": lat, "lon": lon},
    name='days_below_0_ensemble_1991_2020'
)

days_below_threshold_2031_2060_da = xr.DataArray(
    np.stack(days_below_threshold_2031_2060_results),
    dims=["ensemble", "lat", "lon"],
    coords={"ensemble": np.arange(len(days_below_threshold_2031_2060_results)), "lat": lat, "lon": lon},
    name='days_below_0_ensemble_2031_2060'
)

ensemble_mean_1991_2020_da = xr.DataArray(
    ensemble_mean_1991_2020,
    dims=["lat", "lon"],
    coords={"lat": lat, "lon": lon},
    name='days_below_0_ensemble_mean_1991_2020'
)

ensemble_mean_2031_2060_da = xr.DataArray(
    ensemble_mean_2031_2060,
    dims=["lat", "lon"],
    coords={"lat": lat, "lon": lon},
    name='days_below_0_ensemble_mean_2031_2060'
)


# Create Xarray Datasets with descriptive variable names


In [None]:
ds_1991_2020 = xr.Dataset({
    "days_below_0_ensemble_1991_2020": days_below_threshold_1991_2020_da,
    "days_below_0_ensemble_mean_1991_2020": ensemble_mean_1991_2020_da
})

ds_2031_2060 = xr.Dataset({
    "days_below_0_ensemble_2031_2060": days_below_threshold_2031_2060_da,
    "days_below_0_ensemble_mean_2031_2060": ensemble_mean_2031_2060_da
})


# Save the Datasets to NetCDF files


In [None]:
output_path_1991_2020 = "/home/shawn_preston/daysbelow0ensemble/tavgdaysbelow0_1991_2020.nc"
output_path_2031_2060 = "/home/shawn_preston/daysbelow0ensemble/tavgdaysbelow0_2031_2060.nc"

ds_1991_2020.to_netcdf(output_path_1991_2020)
ds_2031_2060.to_netcdf(output_path_2031_2060)
