In [5]:
# Read .nc files that have daily temperature at 0.5 degree resolution saved in individual years, attach data from continuous years together (1961-1990), calculate 15 day moving average temperatures for each day, and save the final data to one new .nc file instead of individual years. 

import os
import xarray as xr
import numpy as np

# Define the input directory where the yearly .nc files are located and the output file path
input_dir = '/Volumes/easystore/ERA5_single_level_2mTemp/output/'  # Directory containing yearly .nc files
output_file = '/Users/shuangma/RESEARCH/WORKFLOW/A1_EXTREME_DATA_ANALYSIS/S1.2_output/combined_moving_avg_1961_1990.nc'  # File where the combined output will be saved

# Define the range of years to process
start_year = 1961
end_year = 1990

# List to hold the yearly datasets
datasets = []

# Load the land-sea mask
land_sea_mask = xr.open_dataset('/Users/shuangma/RESEARCH/WORKFLOW/A1_ToolBox/A1_Regridding/CARDAMOM_regrid/CARDAMOM-MAPS_05deg_LAND_SEA_MASK.nc')

# Assuming that the mask variable is called 'mask' and land has a value of 1
# Change 'mask' if your variable is named differently
mask = land_sea_mask['data']

# Ensure the mask has the same dimensions as the temperature data (lat, lon)
# You may need to interpolate or regrid the mask to match the resolution of your temperature data
# Here, we assume they already match

In [6]:
# Loop over each year and load the .nc file into an xarray Dataset
for year in range(start_year, end_year + 1):
    file_path = os.path.join(input_dir, f'processed_era5_2m_temperature_{year}.nc')  # Assuming the filenames follow this pattern
    if os.path.exists(file_path):
        print(f"Processing {file_path}")
        ds = xr.open_dataset(file_path)
        datasets.append(ds)
    else:
        print(f"File for {year} not found at {file_path}")



Processing /Volumes/easystore/ERA5_single_level_2mTemp/output/processed_era5_2m_temperature_1961.nc
Processing /Volumes/easystore/ERA5_single_level_2mTemp/output/processed_era5_2m_temperature_1962.nc
Processing /Volumes/easystore/ERA5_single_level_2mTemp/output/processed_era5_2m_temperature_1963.nc
Processing /Volumes/easystore/ERA5_single_level_2mTemp/output/processed_era5_2m_temperature_1964.nc
Processing /Volumes/easystore/ERA5_single_level_2mTemp/output/processed_era5_2m_temperature_1965.nc
Processing /Volumes/easystore/ERA5_single_level_2mTemp/output/processed_era5_2m_temperature_1966.nc
Processing /Volumes/easystore/ERA5_single_level_2mTemp/output/processed_era5_2m_temperature_1967.nc
Processing /Volumes/easystore/ERA5_single_level_2mTemp/output/processed_era5_2m_temperature_1968.nc
Processing /Volumes/easystore/ERA5_single_level_2mTemp/output/processed_era5_2m_temperature_1969.nc
Processing /Volumes/easystore/ERA5_single_level_2mTemp/output/processed_era5_2m_temperature_1970.nc


In [None]:
#     year=1961
#     file_path = os.path.join(input_dir, f'processed_era5_2m_temperature_{year}.nc')  # Assuming the filenames follow this pattern
#     print(f"Processing {file_path}")
#     ds = xr.open_dataset(file_path)
#     t2m = ds['t2m']
#     masked_temp_data = t2m.where(mask == 1, np.nan)
#     masked_temp_data
#     #datasets.append(masked_temp_data)

Processing /Volumes/easystore/ERA5_single_level_2mTemp/output/processed_era5_2m_temperature_1961.nc


In [7]:
# Combine the datasets along the time dimension  # this is taking a long time >15 min
combined_ds = xr.concat(datasets, dim='time')

In [None]:
# # Assuming 'datasets' is your list of xarray Datasets
# # Apply Dask chunking to each dataset in the list to improve performance
# dask_datasets = [ds.chunk({'time': 365}) for ds in datasets]  # You can adjust chunk size based on available memory

# # Concatenate the datasets along the time dimension using Dask
# combined_ds = xr.concat(dask_datasets, dim='time')

# # Optionally trigger computation by calling load()
# combined_ds.load()  # This will load the data, but it's done in chunks

In [None]:
# Calculate the 15-day moving average of the temperature variable
# Assuming the temperature variable is named 'temperature', change this if needed
temperature = combined_ds['t2m']

# Calculate the rolling mean (moving average) with a 15-day window
moving_avg_temperature = temperature.rolling(time=15, center=True).mean()

# Attach the new rolling average as a variable to the dataset
combined_ds['temperature_15day_avg'] = moving_avg_temperature

# Save the final dataset to a new .nc file
combined_ds.to_netcdf(output_file)

print(f"Combined and moving averaged data saved to {output_file}")