In [218]:
import os
import xarray as xr
import pandas as pd
import numpy as np
from IPython.display import clear_output

# location of this script: /glade/u/home/sglanvil/analysis/python/analysis/concatenate_data_S2SforESP.ipynb
# contact: sglanvil@ucar.edu

# -------------------------------------- USER SPECIFIES --------------------------------------
varname = "tas_2m"  # ["tas_2m", "pr_sfc"]
method = "allLeads"  # ["allLeads", "weeks12lead", "weeks34lead", "weeks45lead"]
calculate_ensemble_mean = False  # [True, False]
start_date = "1999-01-01" # [eg, 1999-01-01] Note: doesn't need to be a monday
end_date = "1999-12-31"  # [eg, 2020-12-31] Note: doesn't need to be a monday
destDir = "/glade/campaign/cesm/development/cross-wg/S2S/sglanvil/forJudith/" 
# ---------------------------------------------------------------------------------------------

# Check the time range if calculate_ensemble_mean is False
if not calculate_ensemble_mean:
    start = pd.to_datetime(start_date)
    end = pd.to_datetime(end_date)
    time_range = (end - start).days
    if time_range > 370:
        print(f"Warning: Your time range is {time_range} days, which exceeds the 370-day limit.")
        print("When choosing to keep all members, you need to shorten your date range to less than 370 days.")
        raise ValueError("Time range exceeds the allowed limit.")  # This will stop execution

base_dir = f"/glade/campaign/cesm/development/cross-wg/S2S/CESM2/S2SHINDCASTS/p1/{varname}"
mondays = pd.date_range(start_date, end_date, freq="W-MON")
methods = {
    "allLeads": (None, np.arange(1, 47)),  # No time slicing, set time coords
    "weeks12lead": (slice(0, 14), None),
    "weeks34lead": (slice(14, 28), None),
    "weeks56lead": (slice(28, 42), None),
}
time_slice, time_coords = methods.get(method, (None, None))

data_results, init_array = [], []
for monday in mondays:
    short_date = monday.strftime("%d%b%Y").lower()
    init_date = pd.Timestamp(monday.strftime("%Y%m%d"))
    dir_path = os.path.join(base_dir, str(monday.year), f"{monday.month:02d}")
    clear_output(wait=True)  # Clears the current output
    print(short_date)
    if not os.path.exists(dir_path): 
        continue
    files = [os.path.join(dir_path, f) for f in os.listdir(dir_path) if f.endswith(".nc") and short_date in f]
    if not files:
        continue
    datasets = [xr.open_dataset(fp)[[varname]] for fp in files]
    combined = xr.concat(datasets, dim="member")

    # Apply time slicing if specified
    if time_slice:
        combined = combined.isel(time=time_slice).mean(dim="time")

    # Calculate or keep all ensemble members
    data = combined.mean(dim="member").expand_dims(init=[init_date]) if calculate_ensemble_mean else combined.expand_dims(init=[init_date])

    # Assign time coordinates if available
    if time_coords is not None:
        data = data.assign_coords({"time": time_coords})
    data_results.append(data[varname])

# Combine results across all dates
data_combined = xr.concat(data_results, dim="init")
dataset = data_combined.to_dataset(name='data')
dataset = dataset.convert_calendar(calendar="noleap", dim="init")
date = dataset.init.dt.strftime("%Y%m%d")
dataset['date'] = date

# Save raw concatenated DATA
dataset.to_netcdf(f"{destDir}/{varname}_cesm2cam6v2_allLeads_EM.nc")

# Calculate and save CLIM
climatology = dataset.groupby('init.dayofyear').mean()
climatology.to_netcdf(f"{destDir}/{varname}_clim_cesm2cam6v2_allLeads_EM.nc")

# Calculate and save SMOOTH CLIM
climCyclical = xr.concat([climatology, climatology, climatology], dim="dayofyear")
climSmooth0 = climCyclical.rolling(dayofyear=31, center=True).mean()
climSmooth0 = climSmooth0.rolling(dayofyear=31, center=True).mean()
climSmooth = climSmooth0.isel(dayofyear=slice(365, 365+365))
climSmooth.to_netcdf(f"{destDir}/{varname}_climSmooth_cesm2cam6v2_allLeads_EM.nc")

# Calculate and save ANOMALIES
anomalies = dataset.groupby('init.dayofyear')-climSmooth
anomalies['date'] = date
anomalies.to_netcdf(f"{destDir}/{varname}_anom_cesm2cam6v2_allLeads_EM.nc")


27dec1999
