# Import necessary libraries


In [None]:
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import glob
import cftime


# Define functions to process data


In [None]:
def count_days_above_threshold(data, threshold=15):
    """Count the number of days with temperatures above a certain threshold."""
    # Identify days where temperature is above the threshold and set others to NaN
    days_above = data.where(data > threshold)
    # Count non-NaN entries (i.e., days above the threshold)
    return days_above.count(dim='time')


In [None]:
def filter_months(data, start_month=8, end_month=9):
    """Filter data to include only the months from January to September."""
    # Filter based on the month of the time coordinate
    return data.sel(time=data.time.dt.month.isin(range(start_month, end_month + 1)))


In [None]:
def process_file(file_path, period):
    """Process a single NetCDF file to calculate GDD for a specified climatology period."""
    # Load the dataset
    ds = xr.open_dataset(file_path, use_cftime=True)
    # Convert TREFHT from Kelvin to Celsius
    trefht_celsius = ds['TREFHTMN'] - 273.15
    
    # Adjust longitude range for CONUS and filter by months January-September
    conus_data = trefht_celsius.sel(lon=slice(235, 295), lat=slice(25, 50))
    conus_data = filter_months(conus_data)
    
    # Select data for the specified period
    if period == '1991-2020':
        period_data = conus_data.sel(time=slice(cftime.DatetimeNoLeap(1991, 1, 1), cftime.DatetimeNoLeap(2020, 12, 31)))
    elif period == '2031-2060':
        period_data = conus_data.sel(time=slice(cftime.DatetimeNoLeap(2031, 1, 1), cftime.DatetimeNoLeap(2060, 12, 31)))
    else:
        raise ValueError(f"Invalid period specified: {period}")

    # Calculate GDD
    night = count_days_above_threshold(period_data)
    
    return night


# Define file paths and load .nc files


In [None]:
# Path to the .nc files
nc_files_directory = '/data/project/agaid/singh_cesm_ts/cesm_tmin_concatenated/*.nc'
nc_files = glob.glob(nc_files_directory)
nc_files.sort()


# Process files for the first climatology period (1991-2020)


In [None]:
night_1991_2020_results = []
for file_path in nc_files:
    nightime = process_file(file_path, '1991-2020')
    night_1991_2020_results.append(nightime)
print("Number of Files Processed:", len(nc_files))
# Print the shape of gdd_1991_2020_results before calculating the ensemble mean
print("Shape of night_1991_2020_results before calculating ensemble mean:", np.shape(night_1991_2020_results))
# Calculate ensemble mean
ensemble_mean = np.mean(night_1991_2020_results, axis=0) / 30

print("Ensemble Mean Array:\n", ensemble_mean)
print("Ensemble Mean Array Shape:", ensemble_mean.shape)


# Process files for the second climatology period (2031-2060)


In [None]:
night_2031_2060_results = []
for file_path in nc_files:
    nightime = process_file(file_path, '2031-2060')
    night_2031_2060_results.append(nightime)
# Calculate ensemble mean
ensemble_mean_2031_2060 = np.mean(night_2031_2060_results, axis=0) / 30


# Convert results to Xarray DataArrays and Datasets


In [None]:
# Assuming `lat_1d` and `lon_1d` are the latitude and longitude arrays that match your GDD results

# Stack the list of numpy arrays along a new 'ensemble' dimension
night_1991_2020_da = xr.DataArray(data=np.stack(night_1991_2020_results),
                                dims=["ensemble", "lat", "lon"],
                                coords={"ensemble": np.arange(len(night_1991_2020_results)),
                                        "lat": lat_1d, "lon": lon_1d},
                                name='night_ensemble_1991_2020')

night_2031_2060_da = xr.DataArray(data=np.stack(night_2031_2060_results),
                                dims=["ensemble", "lat", "lon"],
                                coords={"ensemble": np.arange(len(night_2031_2060_results)),
                                        "lat": lat_1d, "lon": lon_1d},
                                name='night_ensemble_2031_2060')

# Convert ensemble means to DataArrays
ensemble_mean_1991_2020_da = xr.DataArray(data=ensemble_mean,
                                           dims=["lat", "lon"],
                                           coords={"lat": lat_1d, "lon": lon_1d},
                                           name='night_ensemble_mean_1991_2020')

ensemble_mean_2031_2060_da = xr.DataArray(data=ensemble_mean_2031_2060,
                                           dims=["lat", "lon"],
                                           coords={"lat": lat_1d, "lon": lon_1d},
                                           name='night_ensemble_mean_2031_2060')


# Create Xarray Datasets with descriptive variable names


In [None]:
ds_1991_2020 = xr.Dataset({"night_ensemble_1991_2020": night_1991_2020_da,
                           "night_ensemble_mean_1991_2020": ensemble_mean_1991_2020_da})

ds_2031_2060 = xr.Dataset({"night_ensemble_2031_2060": night_2031_2060_da,
                           "night_ensemble_mean_2031_2060": ensemble_mean_2031_2060_da})


# Save the Datasets to NetCDF files
