# Variable time outputs (in progress)

Notebook to create data in order to investigate different year gaps

In [1]:
import os
from glob import glob
import earthpy as et
from os import path
import pandas as pd
import rasterio as rio
import gc

home = path.join(et.io.HOME, "GitHub", "visualize-forest-disturbance")

input_data = path.join(home, "data", "input")
output_data = path.join(home, "data", "output")

fire_tif_list = sorted(glob(path.join(input_data, "forest-disturbance-stack", "fire-western-conus", "*.tif")))
fire_output_dir = path.join(output_data, "output_tifs", "var_time_fire")

In [2]:
def area_calc(array):
    """
    Calculate area in kilometers of array with 30x30 meter pixels

    Parameters
    ----------
    array : numpy.array
        Array to calculate area for
    
    Returns
    ----------
    int
        Total area for the array
    """
    return (array.sum()*30*30/1000000)

def moving_window(tif_list, num_years, output_folder, file_name, area_dict, overwrite=False, make_1=False):
    """
    Applies a moving window analysis over a set of raster files to aggregate data over a specified number of years, 
    and writes the resulting raster and area calculation to an output folder.

    Parameters
    ----------
    tif_list : list
        List of file paths to the raster files (e.g., fire disturbance rasters).
    num_years : int
        The number of years to use in the moving window (e.g., a 5-year window).
    output_folder : str
        Path to the folder where the output rasters will be saved.
    file_name : str
        The base name of the output raster files.
    area_dict : dict
        Dictionary to store the total area calculation for each end year.
    overwrite : bool, optional
        If True, overwrite any existing files in the output folder with the same name (default is False).
    make_1 : bool, optional
        If True, convert all non-zero values in the rasters to 1 (binary raster) during the aggregation (default is False).

    Returns
    -------
    dict
        A dictionary containing the total area calculations for each end year of the moving window analysis.
    """
    
    # Adjust num_years to account for zero-based indexing
    num_years = num_years - 1

    # Check if output files already exist in the folder
    if len(glob(path.join(output_folder, file_name + "*.tif"))) != 0 and not overwrite:
        print("Output folder already contains files with that name. Set overwrite=True to overwrite.")
        return None
    else:
        # Iterate over each raster file in the list, using a moving window approach
        for i, tif in enumerate(tif_list):
            if i >= num_years:
                # Select rasters for the current window of years
                years = tif_list[i - num_years:i + 1]
                start_year = years[0][-8:-4]
                end_year = years[num_years][-8:-4]
                
                # Print progress for every 5-year window
                if int(end_year) % 5 == 0:
                    print("Processing years {} to {}".format(start_year, end_year))
                
                # Read the first raster in the window
                with rio.open(years[0]) as src:
                    years_array = src.read(1)
                    meta = src.profile

                # Convert non-zero values to 1, if requested
                if make_1:
                    years_array[years_array >= 1] = 1

                # Process the remaining rasters in the window
                for year in years[1:]:
                    with rio.open(year) as src:
                        array = src.read(1)
                    if make_1:
                        array[array >= 1] = 1
                    years_array *= array  # Multiply rasters to combine disturbance events across years
                    del(array)
                    gc.collect()

                # Generate the output file name and save the combined raster
                newfilename = file_name + "{}-{}.tif".format(start_year, end_year)
                with rio.open(path.join(output_folder, newfilename), 'w', **meta) as dst:
                    dst.write(years_array, 1)

                # Calculate and store the total affected area
                area_dict[end_year] = area_calc(years_array)
                
                # Clear memory
                del(years_array)
                gc.collect()

        return area_dict


def dict_to_csv(year_dict, folder, filename, overwrite=False):
    """
    Write a dictionary of year-to-area mappings to a CSV file.

    Parameters
    ----------
    year_dict : dict
        Dictionary where keys represent years (as strings or integers) and values represent the total area (floats or integers).
    folder : str
        The path to the folder where the CSV file will be saved.
    filename : str
        The name of the CSV file to be created.
    overwrite : bool, optional
        If True, overwrite the existing file if it already exists. Default is False.
    """
    full_path = path.join(folder, filename)
    if os.path.isfile(full_path) and not overwrite:
        print("Output folder already has a csv with that name, if you want to overwrite the file set overwrite=True")
    else:
        print("Creating csv")
        years = list(year_dict.keys())
        area = list(year_dict.values())
        data = {'End Year': years, 'Total Area': area}
        short_term_plot = pd.DataFrame(data)
        short_term_plot.to_csv(full_path)

def run_multiple_windows(tif_list, output_folder, var_name, num_names, window_range, overwrite=False, make_1=False):
    """
    Run a moving window analysis over a range of time windows, generate rasters, and save the results to CSV files.

    Parameters
    ----------
    tif_list : list
        List of file paths to the raster files (e.g., disturbance rasters).
    output_folder : str
        The path to the folder where the output files will be saved.
    var_name : str
        The variable name to use in the output file naming (e.g., "fire", "beetle").
    num_names : list
        List of names corresponding to the different time windows (e.g., ["5", "10", "15"] for 5, 10, 15 years).
    window_range : list
        List of integers representing the number of years for each moving window (e.g., [5, 10, 15]).
    overwrite : bool, optional
        If True, overwrite any existing files with the same name in the output folder (default is False).
    make_1 : bool, optional
        If True, convert all non-zero values in the rasters to 1 (binary raster) during the analysis (default is False).
    """
    for name, num in enumerate(window_range):
        number_name = num_names[name]
        print("Working on combination {}-{}".format(var_name, number_name))
        file_name = "{}-{}-year-".format(var_name, number_name)
        csv_name = "{}-{}-year.csv".format(var_name, number_name)
        area_dict = {}
        values = moving_window(tif_list, num, output_folder, file_name, area_dict, overwrite, make_1)
        if values:
            dict_to_csv(values, output_folder, csv_name, overwrite)

In [3]:
# Calculating 3, 4, and 5 years for fire
fire_tif_list = sorted(glob(path.join(input_data, "forest-disturbance-stack", "fire-western-conus", "*.tif")))
fire_output_dir = path.join(output_data, "output_tifs", "var_time_fire")
if not path.isdir(fire_output_dir):
    os.mkdir(fire_output_dir)
years_range = list(range(3, 6))
range_names = ['three', 'four', 'five']
run_multiple_windows(fire_tif_list, fire_output_dir, "fire", range_names, years_range, overwrite=True)

Working on combination fire-three
Processing years 2003 and 2005
Processing years 2008 and 2010
Processing years 2013 and 2015
Processing years 2018 and 2020
Creating csv
Working on combination fire-four
Processing years 2002 and 2005
Processing years 2007 and 2010
Processing years 2012 and 2015
Processing years 2017 and 2020
Creating csv
Working on combination fire-five
Processing years 2001 and 2005
Processing years 2006 and 2010
Processing years 2011 and 2015
Processing years 2016 and 2020
Creating csv


In [4]:
# Calculating 3, 4, and 5 years for beetle
beetle_tif_list = sorted(glob(path.join(input_data, "forest-disturbance-stack", "beetle-western-conus", "*.tif")))
beetle_output_dir = path.join(output_data, "output_tifs", "var_time_beetle")
if not path.isdir(beetle_output_dir):
    os.mkdir(beetle_output_dir)
years_range = list(range(3, 6))
range_names = ['three', 'four', 'five']
run_multiple_windows(beetle_tif_list, beetle_output_dir, "beetle", range_names, years_range, overwrite=True)

Working on combination beetle-three
Processing years 2003 and 2005
Processing years 2008 and 2010
Processing years 2013 and 2015
Processing years 2018 and 2020
Creating csv
Working on combination beetle-four
Processing years 2002 and 2005
Processing years 2007 and 2010
Processing years 2012 and 2015
Processing years 2017 and 2020
Creating csv
Working on combination beetle-five
Processing years 2001 and 2005
Processing years 2006 and 2010
Processing years 2011 and 2015
Processing years 2016 and 2020
Creating csv


In [5]:
# # Calculating 3, 4, and 5 years for drought
drought_tif_list = sorted(glob(path.join(input_data, "forest-disturbance-stack", "drought-western-conus", "*.tif")))
drought_output_dir = path.join(output_data, "output_tifs", "var_time_drought")
if not path.isdir(drought_output_dir):
    os.mkdir(drought_output_dir)
years_range = list(range(3, 6))
range_names = ['three', 'four', 'five']
run_multiple_windows(drought_tif_list, drought_output_dir, "drought", range_names, years_range, overwrite=True)

Working on combination drought-three
Processing years 2003 and 2005
Processing years 2008 and 2010
Processing years 2013 and 2015
Processing years 2018 and 2020
Creating csv
Working on combination drought-four
Processing years 2002 and 2005
Processing years 2007 and 2010
Processing years 2012 and 2015
Processing years 2017 and 2020
Creating csv
Working on combination drought-five
Processing years 2001 and 2005
Processing years 2006 and 2010
Processing years 2011 and 2015
Processing years 2016 and 2020
Creating csv


In [6]:
# Calculating 3, 4, and 5 years for all disturbances
forest_tif_list = sorted(glob(path.join(input_data, "forest-disturbance-stack", "western-conus", "forest-disturbance-stack_western-conus_*.tif")))
all_output_dir = path.join(output_data, "output_tifs", "var_time_all")
if not path.isdir(all_output_dir):
    os.mkdir(all_output_dir)
years_range = list(range(3, 6))
range_names = ['three', 'four', 'five']
run_multiple_windows(forest_tif_list, all_output_dir, "all", range_names, years_range, overwrite=True, make_1 = True)

Working on combination all-three
Processing years 2003 and 2005
Processing years 2008 and 2010
Processing years 2013 and 2015
Processing years 2018 and 2020
Creating csv
Working on combination all-four
Processing years 2002 and 2005
Processing years 2007 and 2010
Processing years 2012 and 2015
Processing years 2017 and 2020
Creating csv
Working on combination all-five
Processing years 2001 and 2005
Processing years 2006 and 2010
Processing years 2011 and 2015
Processing years 2016 and 2020
Creating csv
