# Import necessary libraries


In [None]:
import xarray as xr
import numpy as np
import glob
import cftime
from joblib import Parallel, delayed


# Define helper functions for data processing


In [None]:
def filter_months(data, start_month=1, end_month=7):
    """
    Filter data to include only the specified range of months.

    Parameters:
    data (xarray.DataArray): The input data array with a time dimension.
    start_month (int): The starting month (inclusive).
    end_month (int): The ending month (inclusive).

    Returns:
    xarray.DataArray: Data array filtered to include only the specified months.
    """
    months = list(range(start_month, end_month + 1))
    return data.sel(time=data.time.dt.month.isin(months))


In [None]:
def calculate_last_frost_date(data):
    """
    Calculate the last frost date for each year.

    Parameters:
    data (xarray.DataArray): The input temperature data array in Celsius.

    Returns:
    xarray.DataArray: The last frost day of the year for each year.
    """
    # Identify days with temperatures <= 0°C
    frost_days = (data - 273.15) <= 0
    # Calculate the day of year for frost days
    frost_day_of_year = frost_days.where(frost_days, other=0) * data['time'].dt.dayofyear
    # Find the last frost day of the year
    last_frost_day_of_year = frost_day_of_year.groupby('time.year').max('time')
    return last_frost_day_of_year


In [None]:
def process_file(file_path, start_year, end_year):
    """
    Process a single NetCDF file to calculate the last frost dates for a specified period.

    Parameters:
    file_path (str): The path to the NetCDF file.
    start_year (int): The starting year of the period.
    end_year (int): The ending year of the period.

    Returns:
    xarray.DataArray: The last frost dates for the specified period.
    """
    # Load the dataset
    ds = xr.open_dataset(file_path, use_cftime=True)
    # Select temperature data and geographic region
    temperature_data = ds['TREFHTMN'].sel(lon=slice(235, 295), lat=slice(25, 50))
    # Filter data by months
    filtered_data = filter_months(temperature_data)
    # Select data for the specified period
    period_data = filtered_data.sel(time=slice(cftime.DatetimeNoLeap(start_year, 1, 1), cftime.DatetimeNoLeap(end_year, 12, 31)))
    # Calculate last frost dates
    last_frost_dates = calculate_last_frost_date(period_data)
    # Close the dataset
    ds.close()
    return last_frost_dates


# Define file paths and periods for analysis


In [None]:
# Path to the .nc files
nc_files_directory = '/data/project/agaid/singh_cesm_ts/cesm_tmin_concatenated/*.nc'
nc_files = glob.glob(nc_files_directory)
nc_files.sort()

# Define periods
periods = {
    '1991_2020': (1991, 2020),
    '2031_2060': (2031, 2060)
}


# Initialize results dictionary to store data for each period


In [None]:
results = {period: [] for period in periods}


# Process all files for each period using parallel processing


In [None]:
for period, (start_year, end_year) in periods.items():
    # Use parallel processing to handle multiple files simultaneously
    results[period] = Parallel(n_jobs=-1)(delayed(process_file)(file, start_year, end_year) for file in nc_files)

    # Combine results for the period into an ensemble dataset
    ensemble_results = xr.concat(results[period], dim='ensemble')
    ensemble_mean = ensemble_results.mean(dim='ensemble')
    
    # Calculate the climatology for the average last day of frost
    climatology = ensemble_mean.mean(dim='year')
    
    # Save the data to NetCDF
    ds_ensemble = xr.Dataset({
        'last_frost_dates_ensemble': ensemble_results,
        'last_frost_dates_ensemble_mean': ensemble_mean,
        'average_last_frost_climatology': climatology
    })
    ds_ensemble.to_netcdf(f"/home/shawn_preston/lastdayfrostensemble/3mean_last_frost_{period}.nc")

print("Data for both periods processed and saved.")
