# Import necessary libraries


In [None]:
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import glob
import cftime


# Define functions to process data


In [None]:
def calculate_gdd(data, baseline=6, max_threshold=28):
    """Calculate Growing Degree Days (GDD) with specified baseline and max threshold."""
    # Calculate daily GDD ensuring the data does not exceed the max threshold and is above the baseline
    gdd = np.where(data > baseline, np.minimum(data, max_threshold) - baseline, 0)
    # Sum over the 'time' dimension to get total GDD
    return gdd.sum(axis=0)


In [None]:
def filter_months(data, start_month=1, end_month=4):
    """Filter data to include only the months from January to April."""
    # Filter based on the month of the time coordinate
    return data.sel(time=data.time.dt.month.isin(range(start_month, end_month + 1)))


In [None]:
def process_file(file_path, period):
    """Process a single NetCDF file to calculate GDD for a specified climatology period."""
    # Load the dataset
    ds = xr.open_dataset(file_path, use_cftime=True)
    # Convert TREFHT from Kelvin to Celsius
    trefht_celsius = ds['TREFHT'] - 273.15
    
    # Adjust longitude range for CONUS and filter by months January-April
    conus_data = trefht_celsius.sel(lon=slice(235, 295), lat=slice(25, 50))
    conus_data = filter_months(conus_data)
    
    # Select data for the specified period
    if period == '1991-2020':
        period_data = conus_data.sel(time=slice(cftime.DatetimeNoLeap(1991, 1, 1), cftime.DatetimeNoLeap(2020, 4, 30)))
    elif period == '2031-2060':
        period_data = conus_data.sel(time=slice(cftime.DatetimeNoLeap(2031, 1, 1), cftime.DatetimeNoLeap(2060, 4, 30)))
    else:
        raise ValueError(f"Invalid period specified: {period}")

    # Calculate GDD
    gdd = calculate_gdd(period_data)
    
    return gdd


# Define file paths and load .nc files


In [None]:
# Path to the .nc files
nc_files_directory = '/data/project/agaid/singh_cesm_ts/cesm_ts_concatenated/*.nc'
nc_files = glob.glob(nc_files_directory)
nc_files.sort()


# Process files for the first climatology period (1991-2020)


In [None]:
gdd_1991_2020_results = []
for file_path in nc_files:
    gdd = process_file(file_path, '1991-2020')
    gdd_1991_2020_results.append(gdd)
# Calculate ensemble mean
ensemble_mean = np.mean(gdd_1991_2020_results, axis=0) / 30


# Process files for the second climatology period (2031-2060)


In [None]:
gdd_2031_2060_results = []
for file_path in nc_files:
    gdd = process_file(file_path, '2031-2060')
    gdd_2031_2060_results.append(gdd)
# Calculate ensemble mean
ensemble_mean_2031_2060 = np.mean(gdd_2031_2060_results, axis=0) / 30


# Convert results to Xarray DataArrays and Datasets


In [None]:
# Assuming `lat_1d` and `lon_1d` are the latitude and longitude arrays that match your GDD results

# Stack the list of numpy arrays along a new 'ensemble' dimension
gdd_1991_2020_da = xr.DataArray(data=np.stack(gdd_1991_2020_results),
                                dims=["ensemble", "lat", "lon"],
                                coords={"ensemble": np.arange(len(gdd_1991_2020_results)),
                                        "lat": lat_1d, "lon": lon_1d},
                                name='gdd_ensemble_1991_2020')

gdd_2031_2060_da = xr.DataArray(data=np.stack(gdd_2031_2060_results),
                                dims=["ensemble", "lat", "lon"],
                                coords={"ensemble": np.arange(len(gdd_2031_2060_results)),
                                        "lat": lat_1d, "lon": lon_1d},
                                name='gdd_ensemble_2031_2060')

# Convert ensemble means to DataArrays
ensemble_mean_1991_2020_da = xr.DataArray(data=ensemble_mean,
                                           dims=["lat", "lon"],
                                           coords={"lat": lat_1d, "lon": lon_1d},
                                           name='gdd_ensemble_mean_1991_2020')

ensemble_mean_2031_2060_da = xr.DataArray(data=ensemble_mean_2031_2060,
                                           dims=["lat", "lon"],
                                           coords={"lat": lat_1d, "lon": lon_1d},
                                           name='gdd_ensemble_mean_2031_2060')


# Create Xarray Datasets with descriptive variable names


In [None]:
ds_1991_2020 = xr.Dataset({"gdd_ensemble_1991_2020": gdd_1991_2020_da,
                           "gdd_ensemble_mean_1991_2020": ensemble_mean_1991_2020_da})

ds_2031_2060 = xr.Dataset({"gdd_ensemble_2031_2060": gdd_2031_2060_da,
                           "gdd_ensemble_mean_2031_2060": ensemble_mean_2031_2060_da})


# Save the Datasets to NetCDF files


In [None]:
output_path_1991_2020 = "/home/shawn_preston/gddcesm2janapr/GDD_1991_2020.nc"
output_path_2031_2060 = "/home/shawn_preston/gddcesm2janapr/GDD_2031_2060.nc"

ds_1991_2020.to_netcdf(output_path_1991_2020)
ds_2031_2060.to_netcdf(output_path_2031_2060)
