# Landsat NDVI


In [None]:
import sys

sys.path.insert(0, "../../src")
from imports import *

init_notebook()

In [None]:
# ESA binary forest file
path_raster_esa = "/Volumes/SAMSUNG 1TB/land_cover_esa_v200/processed/esa_v200_10m_merged_binary_2154-30m_ndvi-extent-twice.tif"

# ESA zonal mean forest file
path_zonal_mean_forest = "/Volumes/SAMSUNG 1TB/land_cover_esa_v200/processed/binary_maps/forest_zonal_mean.csv"

# NDVI
files_ndvi_reprojected = "/Volumes/SAMSUNG 1TB/ndvi/reprojected/*.tif"
files_ndvi_multiplied = "/Volumes/SAMSUNG 1TB/ndvi/multiplied/*.tif"
files_ndvi_zonalmean = "/Volumes/SAMSUNG 1TB/ndvi/zonal_mean/*.csv"

# Coordinates
path_buffer = "../../data/final/nfi/700m_buffer_epsg2154.geojson"
# path_buffer = "../../data/final/nfi/700m_buffer_epsg2154_100-random-sites.geojson"  # ! DEBUG OPTION

# Multiply NDVI with Forest Pixel Map


In [None]:
# Load ESA binary raster
raster_esa = rasterio.open(path_raster_esa)

# Load all reprojected NDVI rasters
files = glob.glob(files_ndvi_reprojected)
files

# Loop through all rasters, multiply them, and save them again
for f in tqdm(files):

    newfile = f.replace("reprojected", "multiplied")
    if os.path.exists(newfile):
        # print(f"File {newfile} already exists, skipping it")
        continue

    raster_b = rasterio.open(f)
    raster_c = raster_esa.read(1) * raster_b.read(1)
    with rasterio.open(
        newfile,
        "w",
        driver="GTiff",
        height=raster_c.shape[0],
        width=raster_c.shape[1],
        count=1,
        dtype=raster_c.dtype,
        crs=raster_esa.crs,
        transform=raster_esa.transform,
    ) as dst:
        dst.write(raster_c, 1)

# Extract Zonal Mean from Forest-NDVI Map


In [None]:
# Extract zonal means from all rasters
# Get all input and output files
input_files = glob.glob(files_ndvi_multiplied)
output_files = [f.replace("multiplied/", "zonal_mean/") for f in input_files]
output_files = [f.replace(".tif", ".csv") for f in output_files]
path_files = pd.DataFrame({"input_file": input_files, "output_file": output_files})

# Run it
ndvi_extract_zonal_mean(
    path_files,
    path_buffer,
    force_run=True,
    return_df=False,
    verbose=True,
)

# Chime when done
chime.success()

In [None]:
# Get count of forest pixels per site to actually calculate the mean NDVI over all forest pixels!
input_files = path_raster_esa
output_files = path_zonal_mean_forest
path_files = pd.DataFrame({"input_file": [input_files], "output_file": [output_files]})

# Run it
# > Note, here I want the function to simply return the binary forest data.
df_forest_pixel_count = ndvi_extract_zonal_mean(
    path_files, path_buffer, force_run=False
)
if df_forest_pixel_count is not None:
    if df_forest_pixel_count.nodata.unique().shape[0] > 1:
        raise ValueError(
            "❌❌❌ More than one unique value in nodata, indicating missing data!: ",
            df_forest_pixel_count.nodata.unique(),
        )

df_forest_pixel_count

In [None]:
# Load all extracted NDVI files into one dataframe
output_files = glob.glob(files_ndvi_zonalmean)

first_file = True

for f in output_files:

    # Extract the year
    year = f.split("multiplied_")[1].split("_NDVI")[0]
    # print(f)

    # Load the file
    df = pd.read_csv(f)

    # Check if there other values than 0 in the nodata column
    if -999 in df["nodata"].unique():
        print("❌❌❌ NA data found in the file ❌❌❌")
        print(f)

    # Divide NDVI sum by the sum of forest pixels, else the signal is diluted in scarce forest areas!
    df[f"{year}"] = df["sum"] / df_forest_pixel_count["sum"]

    if first_file:
        df_out = df[["idp", "first_year", f"{year}"]]
        first_file = False
    else:
        df_out = pd.merge(
            df_out,
            df[["idp", "first_year", f"{year}"]],
            on=["idp", "first_year"],
            how="left",
        )

df_out

# Extract NDVI metrics


In [None]:
# Melt df into long format
df_long = df_out.melt(id_vars=["idp", "first_year"])
df_long["variable"] = df_long["variable"].astype(int)
df_long.head()

In [None]:
def ndvi_extract_metrics(df_idp_level, years_before_first_visit=5):
    # For each idp, for first to last year, calculate:
    # - Average anomaly
    # - Most negative anomaly
    # - Trend in NDVI (linear regression)

    # Get timewindow
    first_year = df_idp_level.first_year.unique()[0] - years_before_first_visit
    last_year = df_idp_level.first_year.unique()[0] + 5

    # Filter for timewindow
    df_idp_level = df_idp_level.copy()[
        (df_idp_level.variable >= first_year) & (df_idp_level.variable <= last_year)
    ]

    # Define output df
    df_metrics = pd.DataFrame()
    df_metrics["idp"] = df_idp_level.idp.unique()

    # Calculate mean NDVI
    df_metrics["ndvi_mean"] = df_idp_level["value"].mean()

    # Calculate anomalies
    df_idp_level["anomaly"] = df_idp_level["value"] - df_idp_level["value"].mean()

    # Calculate the most negative anomaly
    df_metrics["ndvi_min_anomaly"] = df_idp_level["anomaly"].min()

    # Calculate the trend in NDVI (linear regression)
    slope, intercept, r_value, p_value, std_err = stats.linregress(
        df_idp_level["variable"], df_idp_level["value"]
    )
    df_metrics["ndvi_trend"] = slope

    return df_metrics


# Run loop
df_ndvi_metrics = pd.DataFrame()

for idp in tqdm(df_long.idp.unique()):
    df_idp_level = df_long[df_long.idp == idp].copy()
    df_metrics = ndvi_extract_metrics(df_idp_level, years_before_first_visit=5)
    df_ndvi_metrics = pd.concat([df_ndvi_metrics, df_metrics])

df_ndvi_metrics

# Save final NDVI predictor set


In [None]:
df_ndvi_metrics.to_feather("../../data/final/predictor_datasets/ndvi.feather")

---


In [None]:
import sys

sys.path.insert(0, "../../src")
from imports import *

init_notebook()