### Convert the hourly ERA5 fields to daily and save as netCDF files

A bit of data processing... Now we read in each of the 1-hour ERA5 data files and create daily means. This is because for the storm statistics, we use the daily mean variables so we don't care too much about sub-daily variability. This also makes the data much, much easier to work with. We also cut the latitudes at 40S because we don't use data north of that due to the strong subtropical, and western boundary current infleunce.

In [1]:
# load the modules needed for the data processing

import sys
sys.path.append('../functions/')

import numpy as np
import xarray as xr

from glob import glob
from tqdm.notebook import tqdm 

from storm_interstorm_id import storm_interstorm_id

from adjust_lon_xr_dataset import *



In [3]:
data_directory_out = '/Volumes/LaCie/Work/data/era5/DJF_1981_2023_daily_means/'

vars = [
    # 'winds', 
    # 'latent_heat_flux', 
    # 'sensible_heat_flux', 
    # '2m_dewpoint_temperature', 
    # '2m_temperature', 
    # 'net_solar_radiation',
    # 'net_thermal_radiation',
    'sea_ice_cover', 
    # 'sea_surface_temperature'
]

for var in vars:

    data_directory_in  = '/Volumes/LaCie/Work/data/era5/DJF_1981_2023_hourly_means/DJF_1981_2023_' + var + '/*.nc'

    file_list = sorted(glob(data_directory_in))

    print(str(var))
    
    years = np.arange(1981, 1992)
    
    for y in tqdm(range(years.size)):
    
        year = years[y]
        print(year)
    
        D = str(year) + '12.nc'
        J = str(year+1) + '01.nc'
        F = str(year+1) + '02.nc'
        
        # choose only the files that are dec, jan or feb
        filtered_files = [file for file in file_list if file.endswith(D) or file.endswith(J) or file.endswith(F)]
        
        # load them into xarray
        ds = xr.open_mfdataset(filtered_files, engine='netcdf4')
    
        # load the dataset
        ds = ds.load()    

        # cut the latitudes to our definition of the southern ocean
        ds = ds.sel(latitude=slice(-40, -90))

        # convert the fluxes to W m-2
        if var == 'net_solar_radiation':
        
            ds['ssr'] = (('time', 'latitude', 'longitude'), (ds['ssr'] / 3600).data) 

        if var == 'net_thermal_radiation':
        
            ds['str'] = (('time', 'latitude', 'longitude'), (ds['str'] / 3600).data)                 
    
        # as this stage - resample the data to daily resolution, then do the calculations and adjust longitudes

        ds_1D = ds.resample(time='1D').mean()
        
        if var == 'winds':
            
            # calculate the wind speed
            ds_1D['ws'] = (('time', 'latitude', 'longitude'), np.sqrt(ds_1D['u10'].data**2 + ds_1D['v10'].data**2))
        
        # Adjust the longitudes to be -180 to 180
        ds_1D = adjust_lon_xr_dataset(ds_1D)    
    
        print('saving to file...')
        
        ds_1D.to_netcdf(data_directory_out + str(var) + '_' + str(year) + '_DJF.nc')

sea_ice_cover


  0%|          | 0/11 [00:00<?, ?it/s]

1981
saving to file...
1982
saving to file...
1983
saving to file...
1984
saving to file...
1985
saving to file...
1986
saving to file...
1987
saving to file...
1988
saving to file...
1989
saving to file...
1990
saving to file...
1991
saving to file...
