In [2]:
import xarray as xr
import pandas as pd
import numpy as np
import dask
import glob
import os
import warnings
import time as timer

def extract_heatwaves(above_threshold):
    shift_forward = above_threshold.shift(time=-1, fill_value=False)
    shift_2forward = above_threshold.shift(time=-2, fill_value=False)
    shift_backward = above_threshold.shift(time=1, fill_value=False)
    shift_2backward = above_threshold.shift(time=2, fill_value=False)

    heatwave_days = above_threshold & (
        (shift_forward & shift_backward) |
        (shift_forward & shift_2forward) |
        (shift_backward & shift_2backward)
    )
    
    return heatwave_days

# HEATWAVE DAYS
def count_days(heatwaves):
    return heatwaves.groupby('time.year').sum(dim='time')

def crop_JJA(data):
    summer_data = data.sel(time=data['time.month'].isin([6, 7, 8]) | 
                       ((data['time.month'] == 9) & (data['time.day'] == 1)))
    summer_data = summer_data.where(~((summer_data['time.month'] == 9) & (summer_data['time.day'] == 1)), other=False) #set sept1 to 0
    return summer_data

In [None]:
daysperyear = 365
dayspersummer = 92

data = xr.open_mfdataset('/gws/nopw/j04/aopp/tildah/tasmax_data/ERA5/ERA5_dmtas_*.nc', combine='by_coords')
data = data.rename({
    "longitude": "lon",
    "latitude": "lat",
    "valid_time": "time",
    "t2m": "tasmax"
})

data = data.sel(time = slice('1940-01-01', '2024-12-30'))
europe_data = data.sel(lon=slice(-25,45), lat=slice(72, 36))
europe_data = europe_data.chunk({'time': daysperyear, 'lat': len(europe_data.lat), 'lon': len(europe_data.lon)})
#print(europe_data)
tasmax = europe_data['tasmax']
time = tasmax['time']
dayofyear =time.dt.dayofyear - (
    (time.dt.is_leap_year) & (time.dt.dayofyear > 59)
)
tasmax = tasmax.assign_coords(dayofyear=('time', dayofyear.data))
threshold_years = tasmax.sel(time = slice('1940-01-01', '2014-12-30'))

#need to alter if feb29 is included.
may25 = pd.Timestamp('2001-05-25').dayofyear 
sep7  = pd.Timestamp('2001-09-07').dayofyear
tasmax_JJA_plus = threshold_years.where((tasmax.dayofyear >= may25) & (tasmax.dayofyear <= sep7), drop=True)# 7-summer-7
tasmax_sorted = tasmax_JJA_plus.sortby('dayofyear')

n_years = len(np.unique(tasmax_JJA_plus['time'].dt.year.data)) #75
window_size = 15 * n_years
tasmax_rolled = tasmax_sorted.rolling(time=window_size, center=True, min_periods=1).construct('window')
middle_year = tasmax_sorted['time'].dt.year.values[n_years // 2]

middles = tasmax_rolled.sel(time=tasmax_rolled.time.dt.year == middle_year)
threshold = middles.quantile(0.9, dim='window')
#print(threshold)
JJA_threshold = crop_JJA(threshold)
JJA_tasmax = crop_JJA(tasmax)
threshold_by_doy = JJA_threshold.groupby('dayofyear').mean('time')
above_threshold = JJA_tasmax > threshold_by_doy.sel(dayofyear=JJA_tasmax['dayofyear'])
above_threshold = above_threshold
above_threshold = above_threshold.chunk({'time': dayspersummer + 1, 'lat': len(above_threshold.lat), 'lon': len(above_threshold.lon)})
#print(above_threshold)
heatwaves = extract_heatwaves(above_threshold)
print(heatwaves)
heatwave_days = count_days(heatwaves)
#print(heatwave_days)

stats = xr.Dataset({
    'days': heatwave_days,
})

stats.to_netcdf('ERA5_HWF.nc')

print("Processed and saved")


<xarray.DataArray 'tasmax' (time: 7905, lat: 145, lon: 281)> Size: 322MB
dask.array<and_, shape=(7905, 145, 281), dtype=bool, chunksize=(93, 145, 281), chunktype=numpy.ndarray>
Coordinates:
    number     int64 8B 0
  * lat        (lat) float64 1kB 72.0 71.75 71.5 71.25 ... 36.75 36.5 36.25 36.0
  * lon        (lon) float64 2kB -25.0 -24.75 -24.5 -24.25 ... 44.5 44.75 45.0
  * time       (time) datetime64[ns] 63kB 1940-06-01 1940-06-02 ... 2024-09-01
    dayofyear  (time) int64 63kB dask.array<chunksize=(93,), meta=np.ndarray>
    quantile   float64 8B 0.9


In [4]:
old = xr.open_dataset('~/ERA5_variables.nc')
new = xr.open_dataset('/home/users/tildah/Internship/ERA5/ERA5_HWF.nc')

In [12]:
print(old['days'].sel(latitude = 51, longitude = 0, method = 'nearest').values)
print(new['days'].sel(lat = 51, lon = 0, method = 'nearest').values)

[ 6 10  7  4  0  3  0 41  8 19  5  0  4  0  0 10  0 13  0  3  3  6  0  3
  0  0  3  3  0  0  6  0  0  4  0 13 25  4  0  0  0  0  8 12  6  0  5  0
  0 11  7  0  0  5  0 23  6  8  0  8  0  8  0 13  0 11 21  0  3  4  0  0
  3  6  3  0  5 14 23  7 12 11 21  9  4]
[ 6 10  7  4  0  3  0 44  8 23  5  0  6  0  0 10  0 14  0  6  3  6  0  0
  0  0  3  0  0  7  9  0  0  4  3 16 28  0  0  0  0  3  6 14  6  0  5  0
  0 15 11  0  0  4  5 23  6 12  0  8  3  8  0 17  3 11 24  0  0  4  3  0
  6 10  6  0  9 12 21  7 12  9 23  9  4]


In [6]:
old = xr.open_dataset('/home/users/tildah/Internship/ERA5/ERA5_HWF.nc')
new = xr.open_dataset('/home/users/tildah/Internship/ERA5/ERA5_HWF_with2025.nc')

print(old['days'].sel(lat = 51, lon = 0, method = 'nearest').values)
print(new['days'].sel(lat = 51, lon = 0, method = 'nearest').values)

print(new)

[ 6 10  7  4  0  3  0 44  8 23  5  0  6  0  0 10  0 14  0  6  3  6  0  0
  0  0  3  0  0  7  9  0  0  4  3 16 28  0  0  0  0  3  6 14  6  0  5  0
  0 15 11  0  0  4  5 23  6 12  0  8  3  8  0 17  3 11 24  0  0  4  3  0
  6 10  6  0  9 12 21  7 12  9 23  9  4]
[ 6 10  7  4  0  3  0 44  8 23  5  0  6  0  0 10  0 14  0  6  3  6  0  0
  0  0  3  0  0  7  9  0  0  4  3 16 28  0  0  0  0  3  6 14  6  0  5  0
  0 15 11  0  0  4  5 23  6 12  0  8  3  8  0 17  3 11 24  0  0  4  3  0
  6 10  6  0  9 12 21  7 12  9 23  9  4 14]
<xarray.Dataset> Size: 28MB
Dimensions:   (lat: 145, lon: 281, year: 86)
Coordinates:
    number    int64 8B ...
  * lat       (lat) float64 1kB 72.0 71.75 71.5 71.25 ... 36.75 36.5 36.25 36.0
  * lon       (lon) float64 2kB -25.0 -24.75 -24.5 -24.25 ... 44.5 44.75 45.0
    quantile  float64 8B ...
  * year      (year) int64 688B 1940 1941 1942 1943 1944 ... 2022 2023 2024 2025
Data variables:
    days      (year, lat, lon) int64 28MB ...
