In [2]:
import xarray as xr
import numpy as np
import pandas as pd
from scipy.stats import theilslopes
import rasterio
from rasterio.transform import from_origin
from pathlib import Path

def calculate_theil_sen_and_save_tif(input_nc_file, min_year=None, max_year=None):
    """
    Performs Theil-Sen regression for each pixel in the input NetCDF file (temperature data),
    resamples by year, converts temperature to Fahrenheit, multiplies the slope by the total 
    number of years, and saves the result as a GeoTIFF file.

    Parameters:
    input_nc_file (str): Path to the input NetCDF file with temperature data.
    min_year (int, optional): The minimum year for the analysis. Default is the earliest in the dataset.
    max_year (int, optional): The maximum year for the analysis. Default is the latest in the dataset.
    """
    
    # Load the processed NetCDF file
    ds = xr.open_dataset(input_nc_file, engine='netcdf4')

    # Convert the 'date' coordinate to datetime format
    ds['date'] = pd.to_datetime(ds['date'].values, format='%Y%m%d')

    # Resample by year and calculate the yearly mean
    yearly_mean = ds.resample(date='Y').mean()

    # Convert temperature from Kelvin to Fahrenheit
    yearly_mean['t2m'] = (yearly_mean['t2m'] - 273.15) * 9/5 + 32
    yearly_mean['t2m'].attrs['units'] = 'Fahrenheit'

    # Extract the years automatically from the 'date' coordinate
    years = yearly_mean['date.year'].values

    # Apply min_year and max_year if provided, otherwise use dataset bounds
    start_year = min_year if min_year is not None else years.min()
    end_year = max_year if max_year is not None else years.max()

    print(f"Calculating Theil-Sen regression for the years {start_year} to {end_year}")

    # Filter the data based on the selected years
    year_mask = (years >= start_year) & (years <= end_year)
    years = years[year_mask]
    t2m_data = yearly_mean['t2m'].values[year_mask, :, :]  # Filter temperature data

    lat = yearly_mean['latitude'].values
    lon = yearly_mean['longitude'].values

    # Initialize array for Theil-Sen slopes
    slope_array = np.zeros((len(lat), len(lon)), dtype=np.float32)

    # Perform Theil-Sen regression for each pixel
    for i in range(len(lat)):
        for j in range(len(lon)):
            pixel_time_series = t2m_data[:, i, j]
            if not np.any(np.isnan(pixel_time_series)):
                slope, _, _, _ = theilslopes(pixel_time_series, years)
                slope_array[i, j] = slope

    # Multiply slopes by total number of years (end_year - start_year)
    total_years = end_year - start_year
    slope_array *= total_years

    # Define affine transform for the GeoTIFF
    transform = from_origin(np.min(lon), np.max(lat), np.abs(lon[1] - lon[0]), np.abs(lat[1] - lat[0]))

    # Define metadata for the GeoTIFF
    meta = {
        'driver': 'GTiff',
        'height': slope_array.shape[0],
        'width': slope_array.shape[1],
        'count': 1,
        'dtype': 'float32',
        'crs': 'EPSG:4326',
        'transform': transform
    }

    # Create output file name based on the input file and time span
    input_file_path = Path(input_nc_file)
    output_file_name = f"{input_file_path.stem}_theilsen_yearly_{start_year}_{end_year}.tif"
    output_tif = input_file_path.with_name(output_file_name)

    # Save the slope result as a GeoTIFF
    with rasterio.open(output_tif, 'w', **meta) as dst:
        dst.write(slope_array, 1)

    print(f"GeoTIFF saved as {output_tif}")

# Example usage:
# calculate_theil_sen_and_save_tif('yearly_mean_temperature_fahrenheit.nc', min_year=1980)

ds = r"D:\UCalgary_Lectures\GEOG_683\Data_workspace\Monthly_single_l\data_0.nc"
calculate_theil_sen_and_save_tif(ds, min_year=1959, max_year=2022)


Calculating Theil-Sen regression for the years 1959 to 2022...
GeoTIFF saved as D:\UCalgary_Lectures\GEOG_683\Data_workspace\Monthly_single_l\data_0_theilsen_yearly_1959_2022.tif
