# Import necessary libraries


In [None]:
import xarray as xr
import cftime


# Define the file paths explicitly
# We concatenated each individual .nc file for each ensemble member.
# Therefore, there are 10 different .nc files from 1980-2064 for ensemble members 1011, 1021, 1041, 1061, 1081, 1231, 1251, 1281, 1301.


In [None]:
files = [
    "/data/project/agaid/singh_cesm_ts/cesm_tmax/b.e21.BHISTcmip6.f09_g17.LE2-1301.005.cam.h1.TREFHTMX.19800101-19891231.nc",
    "/data/project/agaid/singh_cesm_ts/cesm_tmax/b.e21.BHISTcmip6.f09_g17.LE2-1301.005.cam.h1.TREFHTMX.19900101-19991231.nc",
    "/data/project/agaid/singh_cesm_ts/cesm_tmax/b.e21.BHISTcmip6.f09_g17.LE2-1301.005.cam.h1.TREFHTMX.20000101-20091231.nc",
    "/data/project/agaid/singh_cesm_ts/cesm_tmax/b.e21.BHISTcmip6.f09_g17.LE2-1301.005.cam.h1.TREFHTMX.20100101-20141231.nc",
    "/data/project/agaid/singh_cesm_ts/cesm_tmax/b.e21.BSSP370cmip6.f09_g17.LE2-1301.005.cam.h1.TREFHTMX.20150101-20241231.nc",
    "/data/project/agaid/singh_cesm_ts/cesm_tmax/b.e21.BSSP370cmip6.f09_g17.LE2-1301.005.cam.h1.TREFHTMX.20250101-20341231.nc",
    "/data/project/agaid/singh_cesm_ts/cesm_tmax/b.e21.BSSP370cmip6.f09_g17.LE2-1301.005.cam.h1.TREFHTMX.20350101-20441231.nc",
    "/data/project/agaid/singh_cesm_ts/cesm_tmax/b.e21.BSSP370cmip6.f09_g17.LE2-1301.005.cam.h1.TREFHTMX.20450101-20541231.nc",
    "/data/project/agaid/singh_cesm_ts/cesm_tmax/b.e21.BSSP370cmip6.f09_g17.LE2-1301.005.cam.h1.TREFHTMX.20550101-20641231.nc"
]


# Load each file as a dataset and append to the list


In [None]:
# Initialize an empty list to hold datasets
datasets = []

# Load each file as a dataset and append to the list
for file in files:
    ds = xr.open_dataset(file, use_cftime=True)
    # Check if the dataset includes January 1, 2015, and exclude it if so
    if '20100101-20141231' in file:  # Check if this is the file that may contain January 1, 2015
        ds = ds.sel(time=ds.time < cftime.DatetimeNoLeap(2015, 1, 1))
    datasets.append(ds)
    print(f"Loaded {file} covering {ds.time.values[0]} to {ds.time.values[-1]}")


# Concatenate the datasets along the time dimension


In [None]:
# Concatenate the datasets along the time dimension
ds_concat = xr.concat(datasets, dim="time")

# Check for any missing days (in case datasets do not cover every single day)
all_days = xr.cftime_range(start='1980-01-01', end='2064-12-31', freq='D', calendar='noleap')
missing_days = set(all_days.values) - set(ds_concat.time.values)
if missing_days:
    print(f"Missing days detected: {sorted(missing_days)[:10]}")
else:
    print("No missing days in concatenated dataset.")


# Save the concatenated dataset to the specified file


In [None]:
# Define the output file path
output_file = "/data/project/agaid/singh_cesm_ts/cesm_tmax_concatenated/ensemble_1301_sim5_concatenated.nc"

# Save the concatenated dataset to the specified file
ds_concat.to_netcdf(output_file)
print(f"Concatenated dataset saved to {output_file}")


# Close the datasets to free up resources


In [None]:
# Make sure to close the datasets to free up resources
for ds in datasets:
    ds.close()
