<a href="https://colab.research.google.com/github/melkatewabe10/Machine-learning_LST-Estimation-/blob/main/Trend_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Author: Tewabe Melkamu**

Date: 24/3/2025

Trend analysis

In [None]:
!pip install rasterio
!pip install scipy
!pip install tqdm

**Script**

In [None]:
# Import necessary libraries for numerical operations, file handling, and geospatial data processing.
import numpy as np
import rasterio
import glob
import os
import re
from tqdm import tqdm  # For displaying progress in loops

# ---------------------------
# Initial File Reading and Setup
# ---------------------------

# Specify the folder containing the NDVI TIFF files.
folder_path = 'taiwan_ndvi_evolution'  # Update this to your NDVI files folder

# Retrieve all NDVI TIFF files matching the pattern "NDVI_*.tif"
file_list = glob.glob(os.path.join(folder_path, 'NDVI_*.tif'))
if not file_list:
    raise FileNotFoundError(f"No TIFF files found in folder: {folder_path}")

def extract_year(filename):
    """
    Extract a 4-digit year from a filename.

    Parameters:
        filename (str): Filename (e.g., 'NDVI_2000.tif').

    Returns:
        int or None: The extracted year as an integer, or None if not found.
    """
    m = re.search(r'(\d{4})', os.path.basename(filename))
    return int(m.group(1)) if m else None

# Sort the file list based on the extracted year.
file_list = sorted(file_list, key=lambda x: extract_year(x))

# Extract years from filenames to form the time vector.
years = [extract_year(file) for file in file_list if extract_year(file) is not None]
if not years:
    raise ValueError("No valid year information found in file names.")

years = sorted(years)
time_vector = np.array(years, dtype=np.float32)

# ---------------------------
# Function Definitions
# ---------------------------

def read_data_stack(file_list):
    """
    Read each single-band NDVI TIFF file and stack them into a 3D numpy array.

    Parameters:
        file_list (list): List of NDVI TIFF file paths (assumed sorted by year).

    Returns:
        tuple: (data_stack, meta, nodata_value)
            - data_stack (np.ndarray): 3D array with dimensions (time, rows, cols).
            - meta (dict): Metadata from the first file (assumed consistent across files).
            - nodata_value (float or None): The nodata value from the file metadata.
    """
    with rasterio.open(file_list[0]) as src:
        meta = src.meta.copy()
        nodata_value = src.nodata
        first_band = src.read(1)
        rows, cols = first_band.shape
        data_stack = [first_band]

    for file in file_list[1:]:
        with rasterio.open(file) as src:
            band = src.read(1)
            if band.shape != (rows, cols):
                raise ValueError(f"Dimension mismatch detected in file: {file}")
            data_stack.append(band)

    data_stack = np.stack(data_stack, axis=0)  # Shape: (time, rows, cols)
    return data_stack, meta, nodata_value

def compute_pixel_slope(time_vector, pixel_series, nodata_value):
    """
    Compute the linear trend (slope) for a pixel's NDVI time series using linear regression.

    Parameters:
        time_vector (np.ndarray): 1D array of time points (years).
        pixel_series (np.ndarray): 1D array of NDVI values for one pixel over time.
        nodata_value (float or None): Value representing missing data.

    Returns:
        float: The slope of the NDVI time series, or np.nan if invalid.
    """
    # Check for nodata values in the pixel series
    if nodata_value is not None and np.any(pixel_series == nodata_value):
        return np.nan
    # If the pixel values are constant or all NaN, regression is not valid
    if np.all(np.isnan(pixel_series)) or np.std(pixel_series) == 0:
        return np.nan
    # Compute the slope using numpy's polyfit; the slope is the first coefficient
    slope, intercept = np.polyfit(time_vector, pixel_series, 1)
    return slope

# ---------------------------
# Main Processing Function
# ---------------------------

def main():
    # Read and stack all NDVI TIFF files into a 3D numpy array.
    data, meta, nodata_value = read_data_stack(file_list)
    num_years, rows, cols = data.shape

    # Initialize an output array for the NDVI trend (slope) per pixel.
    slope_array = np.full((rows, cols), np.nan, dtype=np.float32)

    # Process each pixel in the spatial domain.
    for i in tqdm(range(rows), desc="Processing rows"):
        for j in range(cols):
            pixel_series = data[:, i, j]
            # Compute the NDVI trend (slope) for the current pixel.
            slope = compute_pixel_slope(time_vector, pixel_series, nodata_value)
            slope_array[i, j] = slope

    # Update metadata for a single-band output (float32).
    meta.update(count=1, dtype=rasterio.float32)

    # Define the output filename for the NDVI evolution (trend) GeoTIFF.
    output_trend_tif = 'taiwan_ndvi_trend.tif'

    # Write the slope (trend) array to a new GeoTIFF file.
    with rasterio.open(output_trend_tif, 'w', **meta) as dst:
        dst.write(slope_array, 1)

    print("The NDVI evolution (trend) GeoTIFF file has been generated successfully.")

# Run the main function when the script is executed directly.
if __name__ == '__main__':
    main()
