#### Script for making output as NC files per aggregation level

In [7]:
import xarray as xr
import numpy as np
import pandas as pd
from pathlib import Path

def calculate_aggregations_and_save_nc(input_nc_file, pressure_level=1000, min_year=None, max_year=None):
    """
    Calculate yearly, seasonal, and quarterly wind speed and direction averages at a specified pressure level, 
    and save the results into new NetCDF files with unique filenames for each timescale (yearly, seasonal, and quarterly).
    
    Parameters:
    input_nc_file (str): Path to NetCDF file with wind data.
    pressure_level (int): Pressure level to analyze (default is 1000 mb).
    min_year (int): Optional, minimum year for analysis.
    max_year (int): Optional, maximum year for analysis.
    
    Output:
    Separate NetCDF files containing aggregated results, named based on the input data range, pressure level, and 
    timescale (e.g., yearly, seasonal DJF, quarterly Q1, etc.).
    """

    ds = xr.open_dataset(input_nc_file, engine='netcdf4')

    # Ensure date is in the correct format
    if not np.issubdtype(ds['date'].dtype, np.datetime64):
        ds['date'] = pd.to_datetime(ds['date'].values, format='%Y%m%d')

    # Select specified pressure level
    ds = ds.sel(pressure_level=pressure_level)

    # Calculate wind speed and direction
    ds['wind_speed'] = np.sqrt(ds['u']**2 + ds['v']**2)
    ds['wind_dir'] = (270 - np.rad2deg(np.arctan2(ds['v'], ds['u']))) % 360

    # Determine the available years and set start/end years
    years = ds['date'].dt.year.values
    start_year = min_year if min_year else years.min()
    end_year = max_year if max_year else years.max()

    input_nc_path = Path(input_nc_file)
    output_dir = input_nc_path.parent  # Output in the same directory as the input
    base_output_file_name = f"{input_nc_path.stem}_{pressure_level}mb_{start_year}_{end_year}"

    # Yearly aggregation
    yearly = ds.resample(date='Y').mean()
    yearly_output_file = output_dir / f"{base_output_file_name}_yearly.nc"
    process_aggregation_and_save(yearly, yearly_output_file)

    # Seasonal aggregation
    seasons = {
        "DJF": resample_by_months(ds, ['12', '01', '02']),
        "MAM": resample_by_months(ds, ['03', '04', '05']),
        "JJA": resample_by_months(ds, ['06', '07', '08']),
        "SON": resample_by_months(ds, ['09', '10', '11'])
    }
    for season, data in seasons.items():
        season_output_file = output_dir / f"{base_output_file_name}_seasonal_{season}.nc"
        process_aggregation_and_save(data, season_output_file)

    # Quarterly aggregation
    quarters = {
        "Q1": resample_by_months(ds, ['01', '02', '03']),
        "Q2": resample_by_months(ds, ['04', '05', '06']),
        "Q3": resample_by_months(ds, ['07', '08', '09']),
        "Q4": resample_by_months(ds, ['10', '11', '12'])
    }
    for quarter, data in quarters.items():
        quarter_output_file = output_dir / f"{base_output_file_name}_quarterly_{quarter}.nc"
        process_aggregation_and_save(data, quarter_output_file)

def resample_by_months(ds, months):
    """
    Resample the dataset by the specified months and return the mean over the year.
    """
    return ds.sel(date=ds['date'].dt.month.isin([int(m) for m in months])).resample(date='Y').mean()

def process_aggregation_and_save(aggregated, output_nc_file):
    """
    Process aggregated wind speed and direction, then save to the output NetCDF file.
    
    Parameters:
    aggregated (xarray.Dataset): Aggregated dataset (yearly, seasonal, or quarterly).
    output_nc_file (Path): Output NetCDF file path.
    """

    # Extract years and months for the time steps
    years = aggregated['date'].dt.year.values
    months = aggregated['date'].dt.month.values
    timescale_label = [f"{year}-{month:02d}" for year, month in zip(years, months)]

    # Don't average over latitude and longitude here. Retain them as dimensions.
    avg_wind_speed = aggregated['wind_speed'].values
    avg_wind_dir = aggregated['wind_dir'].values

    # Retain latitude and longitude as coordinates
    latitudes = aggregated['latitude'].values
    longitudes = aggregated['longitude'].values

    # Create a new xarray Dataset for the output
    new_ds = xr.Dataset({
        'timescale': xr.DataArray(timescale_label, dims='time', coords={'time': aggregated['date'].values}),
        'avg_wind_speed': xr.DataArray(avg_wind_speed, dims=['time', 'latitude', 'longitude'], coords={
            'time': aggregated['date'].values, 'latitude': latitudes, 'longitude': longitudes
        }),
        'avg_wind_dir': xr.DataArray(avg_wind_dir, dims=['time', 'latitude', 'longitude'], coords={
            'time': aggregated['date'].values, 'latitude': latitudes, 'longitude': longitudes
        })
    })

    # Add metadata attributes
    new_ds['latitude'].attrs = {'standard_name': 'latitude', 'units': 'degrees_north'}
    new_ds['longitude'].attrs = {'standard_name': 'longitude', 'units': 'degrees_east'}

    new_ds.to_netcdf(output_nc_file, mode='w')
    print(f"Aggregated data saved to {output_nc_file}")


# Example usage:
# Define the input file path
input_nc_file = r"D:\UCalgary_Lectures\GEOG_683\Data_workspace\Monthly_multilevel\data_0.nc"

# Call the function to process the input NetCDF file and save the output
calculate_aggregations_and_save_nc(input_nc_file)


Aggregated data saved to D:\UCalgary_Lectures\GEOG_683\Data_workspace\Monthly_multilevel\data_0_1000mb_1940_2024_yearly.nc
Aggregated data saved to D:\UCalgary_Lectures\GEOG_683\Data_workspace\Monthly_multilevel\data_0_1000mb_1940_2024_seasonal_DJF.nc
Aggregated data saved to D:\UCalgary_Lectures\GEOG_683\Data_workspace\Monthly_multilevel\data_0_1000mb_1940_2024_seasonal_MAM.nc
Aggregated data saved to D:\UCalgary_Lectures\GEOG_683\Data_workspace\Monthly_multilevel\data_0_1000mb_1940_2024_seasonal_JJA.nc
Aggregated data saved to D:\UCalgary_Lectures\GEOG_683\Data_workspace\Monthly_multilevel\data_0_1000mb_1940_2024_seasonal_SON.nc
Aggregated data saved to D:\UCalgary_Lectures\GEOG_683\Data_workspace\Monthly_multilevel\data_0_1000mb_1940_2024_quarterly_Q1.nc
Aggregated data saved to D:\UCalgary_Lectures\GEOG_683\Data_workspace\Monthly_multilevel\data_0_1000mb_1940_2024_quarterly_Q2.nc
Aggregated data saved to D:\UCalgary_Lectures\GEOG_683\Data_workspace\Monthly_multilevel\data_0_1000mb_

In [4]:
ds = r"D:\UCalgary_Lectures\GEOG_683\Data_workspace\Monthly_multilevel\data_0_1000mb_1940_2024_yearly.nc"

ds = xr.open_dataset(ds, engine='netcdf4')

print(ds)

<xarray.Dataset>
Dimensions:         (date: 85)
Coordinates:
  * date            (date) datetime64[ns] 1940-12-31 1941-12-31 ... 2024-12-31
Data variables:
    timescale       (date) object ...
    avg_wind_speed  (date) float32 ...
    avg_wind_dir    (date) float32 ...


#### Script for making output as mean yearly TIF files 

In [9]:
import xarray as xr
import numpy as np
import pandas as pd
import rasterio
from rasterio.transform import from_origin
from pathlib import Path

def calculate_aggregations_and_save_tif(input_nc_file, pressure_level=1000, min_year=None, max_year=None, year_interval=None):
    """
    Calculate wind speed and direction averages at a specified pressure level, and save the results into new GeoTIFF files.
    
    Parameters:
    input_nc_file (str): Path to NetCDF file with wind data.
    pressure_level (int): Pressure level to analyze (default is 1000 mb).
    min_year (int): Optional, minimum year for analysis.
    max_year (int): Optional, maximum year for analysis.
    year_interval (int): Optional, number of years for grouping (e.g., 5 for 5-year intervals, 10 for 10-year intervals). 
                         If None, aggregate over the full available year range.
    
    Output:
    GeoTIFF files containing aggregated results for wind speed and wind direction, named based on the time period.
    """

    # Load the dataset
    ds = xr.open_dataset(input_nc_file, engine='netcdf4')

    # Ensure the 'date' coordinate is in the correct format
    if not np.issubdtype(ds['date'].dtype, np.datetime64):
        ds['date'] = pd.to_datetime(ds['date'].values, format='%Y%m%d')

    # Check if the specified pressure level exists in the dataset
    if 'pressure_level' in ds.coords and pressure_level in ds['pressure_level'].values:
        # Select the specified pressure level
        ds = ds.sel(pressure_level=pressure_level)
    else:
        raise ValueError(f"Pressure level {pressure_level} mb not found in the dataset.")

    # Check if 'u' and 'v' components exist in the dataset
    if 'u' not in ds or 'v' not in ds:
        raise KeyError(f"The dataset does not contain the required 'u' and 'v' wind components.")

    # Calculate wind speed (m/s) from u and v components
    ds['wind_speed'] = np.sqrt(ds['u']**2 + ds['v']**2)

    # Calculate wind direction in degrees, following meteorological convention (0° = North)
    ds['wind_dir'] = (270 - np.rad2deg(np.arctan2(ds['v'], ds['u']))) % 360

    # Determine the available years in the dataset
    years = ds['date'].dt.year.values
    available_min_year = years.min()
    available_max_year = years.max()

    # Set default min_year and max_year if not provided
    start_year = min_year if min_year else available_min_year
    end_year = max_year if max_year else available_max_year

    # Check for conflicts in year range
    if start_year > end_year:
        raise ValueError(f"min_year ({start_year}) cannot be greater than max_year ({end_year}).")
    
    if start_year < available_min_year or end_year > available_max_year:
        raise ValueError(f"The selected year range ({start_year}-{end_year}) is outside the available data range ({available_min_year}-{available_max_year}).")

    # Check if year_interval is larger than the available range
    if year_interval and year_interval > (end_year - start_year + 1):
        print(f"Year interval ({year_interval}) is larger than the available year range ({start_year}-{end_year}). Processing the entire range as a single block.")
        year_interval = None  # Set to None to process as a single block

    input_nc_path = Path(input_nc_file)
    output_dir = input_nc_path.parent  # Output in the same directory as the input
    base_output_file_name = f"{input_nc_path.stem}_{pressure_level}mb"

    # Handle year intervals
    if year_interval is None:
        # Aggregate over the full available year range
        data_interval = ds.sel(date=slice(f"{start_year}-01-01", f"{end_year}-12-31"))
        interval_mean = data_interval.mean(dim='date', skipna=True)  # Handle NaN values
        output_file = output_dir / f"{base_output_file_name}_{start_year}_{end_year}"
        process_aggregation_and_save_as_tif(interval_mean, output_file)
    else:
        # Subdivide into year intervals and process each group
        process_by_year_interval(ds, start_year, end_year, year_interval, output_dir, base_output_file_name)

def process_by_year_interval(ds, start_year, end_year, year_interval, output_dir, base_output_file_name):
    """
    Process data in blocks based on the specified year interval.
    """
    for year_start in range(start_year, end_year + 1, year_interval):
        year_end = min(year_start + year_interval - 1, end_year)
        year_label = f"{year_start}_{year_end}"
        
        # Select the data within the year range
        data_interval = ds.sel(date=slice(f"{year_start}-01-01", f"{year_end}-12-31"))

        # Check if the selected dataset is empty
        if data_interval.date.size == 0:
            print(f"No data available for the interval {year_label}. Skipping...")
            continue
        
        # Aggregate over the time dimension within the interval
        interval_mean = data_interval.mean(dim='date', skipna=True)

        # Generate output file name and save as GeoTIFF
        output_file = output_dir / f"{base_output_file_name}_{year_label}"
        process_aggregation_and_save_as_tif(interval_mean, output_file)

def process_aggregation_and_save_as_tif(aggregated, output_file):
    """
    Process aggregated wind speed and direction, then save as GeoTIFF files using rasterio.
    
    Parameters:
    aggregated (xarray.Dataset): Aggregated dataset (e.g., for a year interval or full range).
    output_file (Path): Output file name without extension.
    """

    # Extract necessary data
    avg_wind_speed = aggregated['wind_speed'].squeeze().values  # Ensure it is 2D (lat, lon)
    avg_wind_dir = aggregated['wind_dir'].squeeze().values  # Ensure it is 2D (lat, lon)
    latitudes = aggregated['latitude'].values
    longitudes = aggregated['longitude'].values

    # Check if the dataset is valid and has the correct shape
    if avg_wind_speed.ndim != 2 or avg_wind_speed.size == 0:
        print(f"Invalid dataset for {output_file}. Skipping...")
        return

    # Define the raster transformation (assuming regular grid with constant spacing)
    transform = from_origin(np.min(longitudes), np.max(latitudes), np.abs(longitudes[1] - longitudes[0]), np.abs(latitudes[1] - latitudes[0]))

    # Define metadata for GeoTIFF files
    meta = {
        'driver': 'GTiff',
        'height': avg_wind_speed.shape[0],  # Number of latitudes
        'width': avg_wind_speed.shape[1],   # Number of longitudes
        'count': 1,                         # Single band (wind speed or direction)
        'dtype': 'float32',
        'crs': 'EPSG:4326',                 # Coordinate reference system (WGS84)
        'transform': transform
    }

    # Save the wind speed raster
    wind_speed_tif = f"{output_file}_wind_speed.tif"
    with rasterio.open(wind_speed_tif, 'w', **meta) as dst:
        dst.write(avg_wind_speed, 1)  # Writing the aggregated wind speed

    print(f"Wind speed GeoTIFF saved to {wind_speed_tif}")

    # Save the wind direction raster
    wind_dir_tif = f"{output_file}_wind_dir.tif"
    with rasterio.open(wind_dir_tif, 'w', **meta) as dst:
        dst.write(avg_wind_dir, 1)  # Writing the averaged wind direction

    print(f"Wind direction GeoTIFF saved to {wind_dir_tif}")

# Example usage:
input_nc_file = r"D:\UCalgary_Lectures\GEOG_683\Data_workspace\Monthly_multilevel\data_0.nc"
calculate_aggregations_and_save_tif(input_nc_file, pressure_level=250, year_interval=10)


Wind speed GeoTIFF saved to D:\UCalgary_Lectures\GEOG_683\Data_workspace\Monthly_multilevel\data_0_250mb_1940_1949_wind_speed.tif
Wind direction GeoTIFF saved to D:\UCalgary_Lectures\GEOG_683\Data_workspace\Monthly_multilevel\data_0_250mb_1940_1949_wind_dir.tif
Wind speed GeoTIFF saved to D:\UCalgary_Lectures\GEOG_683\Data_workspace\Monthly_multilevel\data_0_250mb_1950_1959_wind_speed.tif
Wind direction GeoTIFF saved to D:\UCalgary_Lectures\GEOG_683\Data_workspace\Monthly_multilevel\data_0_250mb_1950_1959_wind_dir.tif
Wind speed GeoTIFF saved to D:\UCalgary_Lectures\GEOG_683\Data_workspace\Monthly_multilevel\data_0_250mb_1960_1969_wind_speed.tif
Wind direction GeoTIFF saved to D:\UCalgary_Lectures\GEOG_683\Data_workspace\Monthly_multilevel\data_0_250mb_1960_1969_wind_dir.tif
Wind speed GeoTIFF saved to D:\UCalgary_Lectures\GEOG_683\Data_workspace\Monthly_multilevel\data_0_250mb_1970_1979_wind_speed.tif
Wind direction GeoTIFF saved to D:\UCalgary_Lectures\GEOG_683\Data_workspace\Monthly

#### Script for making output as mean TIF (wind direction) files per aggregation level 

In [8]:
'''
This script processes ERA5 data to generate wind direction averages for yearly, seasonal, or quarterly periods. 
It allows the user to specify a range of years and intervals, and supports processing exact seasons (e.g., DJF for winter) 
or quarters (e.g., Q1 for January-March). Output is saved as GeoTIFF files.

Parameters:
- input_nc_file: Path to the input NetCDF file containing wind data.
- aggregation: Defines the type of aggregation (yearly, seasonal, or quarterly).
- pressure_level: The pressure level to analyze (default is 1000 mb).
- min_year, max_year: The start and end year for analysis.
- year_interval: The interval of years to group data (e.g., 5-year blocks).
- period: Defines the season (e.g., 'DJF') or quarter (e.g., 'Q1') for aggregation.

Output:
GeoTIFF files containing aggregated wind direction data for the specified periods and intervals.
'''

import xarray as xr
import numpy as np
import rasterio
from rasterio.transform import from_origin
from pathlib import Path

def calculate_aggregations_and_save_tif(input_nc_file, aggregation="yearly", pressure_level=1000, min_year=None, max_year=None, year_interval=None, period=None):
    ds = xr.open_dataset(input_nc_file, engine='netcdf4')

    # Ensure date format is correct
    if not np.issubdtype(ds['date'].dtype, np.datetime64):
        ds['date'] = np.array(pd.to_datetime(ds['date'].values, format='%Y%m%d'))

    # Select the specified pressure level
    if 'pressure_level' in ds.coords and pressure_level in ds['pressure_level'].values:
        ds = ds.sel(pressure_level=pressure_level)
    else:
        raise ValueError(f"Pressure level {pressure_level} mb not found in the dataset.")

    # Ensure required wind components are present
    if 'u' not in ds or 'v' not in ds:
        raise KeyError("The dataset does not contain the required 'u' and 'v' wind components.")

    # Calculate wind direction in degrees (meteorological convention)
    ds['wind_dir'] = (270 - np.rad2deg(np.arctan2(ds['v'], ds['u']))) % 360

    # Validate time range and year interval
    years = ds['date'].dt.year.values
    available_min_year = years.min()
    available_max_year = years.max()

    start_year = min_year if min_year else available_min_year
    end_year = max_year if max_year else available_max_year

    if start_year > end_year:
        raise ValueError(f"min_year ({start_year}) cannot be greater than max_year ({end_year}).")

    if start_year < available_min_year or end_year > available_max_year:
        raise ValueError(f"The selected year range ({start_year}-{end_year}) is outside the available data range ({available_min_year}-{available_max_year}).")

    if year_interval and year_interval > (end_year - start_year + 1):
        year_interval = None

    # Define output directory and base filename
    input_nc_path = Path(input_nc_file)
    output_dir = input_nc_path.parent
    base_output_file_name = f"{input_nc_path.stem}_{pressure_level}mb"

    # Define seasons and quarters
    periods = {
        'DJF': ([12, 1, 2], 'winter'),
        'MAM': ([3, 4, 5], 'spring'),
        'JJA': ([6, 7, 8], 'summer'),
        'SON': ([9, 10, 11], 'fall'),
        'Q1': ([1, 2, 3], 'Q1'),
        'Q2': ([4, 5, 6], 'Q2'),
        'Q3': ([7, 8, 9], 'Q3'),
        'Q4': ([10, 11, 12], 'Q4')
    }

    # Sanity check for a valid period declaration
    if period and period not in periods:
        raise ValueError(f"Invalid period '{period}'. Valid options: {list(periods.keys())}.")

    # Process data based on the aggregation type
    if year_interval is None:
        if aggregation == "yearly":
            process_yearly_aggregation(ds, start_year, end_year, output_dir, base_output_file_name)
        elif aggregation in ["seasonal", "quarterly"]:
            process_period_aggregation(ds, start_year, end_year, periods, output_dir, base_output_file_name, period)
    else:
        for year_start in range(start_year, end_year + 1, year_interval):
            year_end = min(year_start + year_interval - 1, end_year)
            if aggregation == "yearly":
                process_yearly_aggregation(ds, year_start, year_end, output_dir, base_output_file_name)
            elif aggregation in ["seasonal", "quarterly"]:
                process_period_aggregation(ds, year_start, year_end, periods, output_dir, base_output_file_name, period)

def process_yearly_aggregation(ds, start_year, end_year, output_dir, base_output_file_name):
    # Average wind direction for the given year range
    data_interval = ds.sel(date=slice(f"{start_year}-01-01", f"{end_year}-12-31"))
    yearly_mean = data_interval.mean(dim='date', skipna=True)
    output_file = output_dir / f"{base_output_file_name}_{start_year}_{end_year}_yearly"
    process_aggregation_and_save_as_tif(yearly_mean, output_file)

def process_period_aggregation(ds, start_year, end_year, periods, output_dir, base_output_file_name, period):
    months, label = periods[period]
    data_interval = ds.sel(date=slice(f"{start_year}-01-01", f"{end_year}-12-31"))

    # Select data for the specified period (season/quarter)
    period_data = data_interval.sel(date=data_interval['date'].dt.month.isin(months)).mean(dim='date', skipna=True)
    
    # Check if valid wind direction data exists
    if 'wind_dir' not in period_data or period_data['wind_dir'].size == 0:
        raise ValueError(f"No valid data found for the period: {label} in {start_year}-{end_year}.")

    # Save the result to a GeoTIFF
    output_file = output_dir / f"{base_output_file_name}_{start_year}_{end_year}_{label}"
    process_aggregation_and_save_as_tif(period_data, output_file)

def process_aggregation_and_save_as_tif(aggregated, output_file):
    avg_wind_dir = aggregated['wind_dir'].squeeze().values
    latitudes = aggregated['latitude'].values
    longitudes = aggregated['longitude'].values

    # Check if the data is valid before saving
    if avg_wind_dir.ndim != 2 or avg_wind_dir.size == 0:
        raise ValueError(f"Invalid dataset for {output_file}. Skipping...")

    transform = from_origin(np.min(longitudes), np.max(latitudes), np.abs(longitudes[1] - longitudes[0]), np.abs(latitudes[1] - latitudes[0]))

    meta = {
        'driver': 'GTiff',
        'height': avg_wind_dir.shape[0],
        'width': avg_wind_dir.shape[1],
        'count': 1,
        'dtype': 'float32',
        'crs': 'EPSG:4326',
        'transform': transform
    }

    wind_dir_tif = f"{output_file}_wind_dir.tif"
    with rasterio.open(wind_dir_tif, 'w', **meta) as dst:
        dst.write(avg_wind_dir, 1)

# Example usage:
input_nc_file = r"D:\UCalgary_Lectures\GEOG_683\Data_workspace\Monthly_multilevel\data_0.nc"
calculate_aggregations_and_save_tif(input_nc_file, aggregation="seasonal",pressure_level=500,  year_interval=5, period='DJF')
