In [14]:
# import all necessary libraries

import functions

from functions import mask_data
from functions import plot_aus

import geopandas as gpd
import xarray
from shapely.geometry import mapping

import numpy as np
import scipy.io as sio
import matplotlib.pyplot as plt

import cartopy.crs as ccrs

import pandas as pd
from datetime import datetime

import os

from sklearn.metrics import mean_squared_error

import matplotlib.patches as mpatches
import matplotlib.lines as mlines
import matplotlib.ticker as mticker

from shapely.geometry import Point

ERA 5 Data

In [2]:
# open data
ERA5_path = 'ERA5-Land/all_monthly_data_ERA5.nc'
ERA5_data = xarray.open_dataset(ERA5_path)

# combine exp ver 1 and exp ver 5
ERA5_data = ERA5_data.sel(expver=1).combine_first(ERA5_data.sel(expver=5))

# Extract time values
time_values = ERA5_data['time'].values

# Compute midpoint times
mid_times = time_values[:-1] + (time_values[1:] - time_values[:-1]) / 2

# Average data over consecutive time steps for all data variables
data_vars = {}
for var in ERA5_data.data_vars:
    # Extract raw data with .data to avoid ambiguity
    avg_data = ((ERA5_data[var].isel(time=slice(0, -1)).data + ERA5_data[var].isel(time=slice(1, None)).data) / 2)
    data_vars[var] = (('time', 'latitude', 'longitude'), avg_data)

# Create new dataset with updated time values
averaged_data = xarray.Dataset(
    data_vars,
    coords={
        'time': mid_times,
        'latitude': ERA5_data['latitude'].data,  # Extract raw data
        'longitude': ERA5_data['longitude'].data  # Extract raw data
    }
)

ERA5_data = averaged_data

In [3]:
# Define the date range to slice the data
start_date = '2002-06-01'
end_date = '2024-04-30'

# Slice the xarray based on the time range
ERA5_data = ERA5_data.sel(time=slice(start_date, end_date))

In [4]:
# get precip and ET data from ERA-5
precip_data_ERA5 = ERA5_data.tp *30
evap_data_ERA5 = ERA5_data.e *30

AWRA-L Data

In [5]:
# open data
awra_p_path = 'AWRA-L/rain_day.nc'
awra_p_data = xarray.open_dataset(awra_p_path)

# Extract time values
time_values = awra_p_data['time'].values

# Compute midpoint times
mid_times = time_values[:-1] + (time_values[1:] - time_values[:-1]) / 2

# Initialize dictionary for processed data variables
data_vars = {}

for var in awra_p_data.data_vars:
    # Check if the variable contains datetime values (we only want numerical data)
    if np.issubdtype(awra_p_data[var].dtype, np.datetime64):
        continue  # Skip datetime variables

    # Compute the average over consecutive time steps
    avg_data = (awra_p_data[var].isel(time=slice(0, -1)).data + awra_p_data[var].isel(time=slice(1, None)).data) / 2
    data_vars[var] = (('time', 'latitude', 'longitude'), avg_data)

# Create new dataset with updated time values
averaged_data = xarray.Dataset(
    data_vars,
    coords={
        'time': mid_times,
        'latitude': awra_p_data['latitude'].data,
        'longitude': awra_p_data['longitude'].data
    }
)

awra_p_data = averaged_data

In [6]:
# Define the date range to slice the data
start_date = '2002-06-01'
end_date = '2024-04-30'

# Slice the xarray based on the time range
awra_p_data = awra_p_data.sel(time=slice(start_date, end_date))

In [7]:
# open data
awra_et_path = 'AWRA-L/AWRA-L-8524-etot.nc'
awra_et_data = xarray.open_dataset(awra_et_path)

# Extract time values
time_values = awra_et_data['time'].values

# Compute midpoint times safely
mid_times = time_values[:-1] + (time_values[1:] - time_values[:-1]) / 2

# Initialize dictionary for processed data variables
data_vars = {}

for var in awra_et_data.data_vars:
    # Check if the variable contains datetime values (we only want numerical data)
    if np.issubdtype(awra_et_data[var].dtype, np.datetime64):
        continue  # Skip datetime variables

    # Compute the average over consecutive time steps
    avg_data = (awra_et_data[var].isel(time=slice(0, -1)).data + awra_et_data[var].isel(time=slice(1, None)).data) / 2
    data_vars[var] = (('time', 'latitude', 'longitude'), avg_data)

# Create new dataset with updated time values
averaged_data = xarray.Dataset(
    data_vars,
    coords={
        'time': mid_times,
        'latitude': awra_et_data['latitude'].data,
        'longitude': awra_et_data['longitude'].data
    }
)

awra_et_data = averaged_data

In [8]:
# Define the date range to slice the data
start_date = '2002-06-01'
end_date = '2024-04-30'

# Slice the xarray based on the time range
awra_et_data = awra_et_data.sel(time=slice(start_date, end_date))

In [9]:
# get precip and ET data from AWRA
precip_data_awra = awra_p_data.rain_day / 1000 # to m
et_data_awra = awra_et_data.etot / 1000 # to m

CMRSET Data

In [10]:
# open data
cmrset_path = 'AWAP_and_CMRSET/CMRSET_data.nc'
cmrset_data = xarray.open_dataset(cmrset_path)

In [11]:
# cut data to start at May 2002 and end at April 2024

# Define the date range to slice the data
start_date = '2002-05-01'
end_date = '2024-04-30'

# Slice the xarray based on the time range
cmrset_data = cmrset_data.sel(time=slice(start_date, end_date))

In [12]:
# get ET data from CMRSET
et_data_cmrset = cmrset_data.aET / 1000 # to m

Save Data

In [13]:
precip_data_ERA5.to_netcdf('Processed Data/Models/ERA5-p.nc')
evap_data_ERA5.to_netcdf('Processed Data/Models/ERA5-et.nc')
precip_data_awra.to_netcdf('Processed Data/Models/AWRA-p.nc')
et_data_awra.to_netcdf('Processed Data/Models/AWRA-et.nc')
et_data_cmrset.to_netcdf('Processed Data/Models/CMRSET.nc')