# Landsat NDVI


In [1]:
import sys

sys.path.insert(0, "../../src")
from imports import *

init_notebook()

In [2]:
# ESA binary forest file
path_raster_esa = "/Volumes/SAMSUNG 1TB/land_cover_esa_v200/processed/esa_v200_10m_merged_binary_2154-30m_ndvi-extent-twice.tif"

# ESA zonal mean forest file
path_zonal_mean_forest = "/Volumes/SAMSUNG 1TB/land_cover_esa_v200/processed/binary_maps/forest_zonal_mean.csv"

# NDVI
files_ndvi_reprojected = "/Volumes/SAMSUNG 1TB/ndvi/reprojected/*.tif"
files_ndvi_multiplied = "/Volumes/SAMSUNG 1TB/ndvi/multiplied/*.tif"
files_ndvi_zonalmean = "/Volumes/SAMSUNG 1TB/ndvi/zonal_mean/*.csv"

# Coordinates
path_buffer = "../../data/final/nfi/700m_buffer_epsg2154.geojson"
# path_buffer = "../../data/final/nfi/700m_buffer_epsg2154_100-random-sites.geojson"  # ! DEBUG OPTION

# Multiply NDVI with Forest Pixel Map


In [3]:
# Load ESA binary raster
raster_esa = rasterio.open(path_raster_esa)

# Load all reprojected NDVI rasters
files = glob.glob(files_ndvi_reprojected)
files

# Loop through all rasters, multiply them, and save them again
for f in tqdm(files):

    newfile = f.replace("reprojected", "multiplied")
    if os.path.exists(newfile):
        # print(f"File {newfile} already exists, skipping it")
        continue

    raster_b = rasterio.open(f)
    raster_c = raster_esa.read(1) * raster_b.read(1)
    with rasterio.open(
        newfile,
        "w",
        driver="GTiff",
        height=raster_c.shape[0],
        width=raster_c.shape[1],
        count=1,
        dtype=raster_c.dtype,
        crs=raster_esa.crs,
        transform=raster_esa.transform,
    ) as dst:
        dst.write(raster_c, 1)

100%|██████████| 24/24 [00:00<00:00, 6474.36it/s]


# Extract Zonal Mean from Forest-NDVI Map


In [4]:
# Extract zonal means from all rasters
# Get all input and output files
input_files = glob.glob(files_ndvi_multiplied)
output_files = [f.replace("multiplied/", "zonal_mean/") for f in input_files]
output_files = [f.replace(".tif", ".csv") for f in output_files]
path_files = pd.DataFrame({"input_file": input_files, "output_file": output_files})

# Run it
ndvi_extract_zonal_mean(
    path_files,
    path_buffer,
    force_run=True,
    return_df=False,
    verbose=True,
)

# Chime when done
chime.success()

100%|██████████| 24/24 [2:07:15<00:00, 318.15s/it]


In [5]:
# Get count of forest pixels per site to actually calculate the mean NDVI over all forest pixels!
input_files = path_raster_esa
output_files = path_zonal_mean_forest
path_files = pd.DataFrame({"input_file": [input_files], "output_file": [output_files]})

# Run it
# > Note, here I want the function to simply return the binary forest data.
df_forest_pixel_count = ndvi_extract_zonal_mean(
    path_files, path_buffer, force_run=False
)
if df_forest_pixel_count is not None:
    if df_forest_pixel_count.nodata.unique().shape[0] > 1:
        raise ValueError(
            "❌❌❌ More than one unique value in nodata, indicating missing data!: ",
            df_forest_pixel_count.nodata.unique(),
        )

df_forest_pixel_count

100%|██████████| 1/1 [04:07<00:00, 247.62s/it]

 - 1 extractions done, returning last df from file: /Volumes/SAMSUNG 1TB/land_cover_esa_v200/processed/binary_maps/forest_zonal_mean.csv...





Unnamed: 0,idp,first_year,mean,count,sum,std,nodata
0,500008,2010,0.984579,1686,1660.0,0.123221,0.0
1,500013,2010,0.531826,1681,894.0,0.498986,0.0
2,500098,2010,0.455628,1679,765.0,0.498027,0.0
3,500103,2010,0.600238,1681,1009.0,0.489849,0.0
4,500137,2010,0.900595,1680,1513.0,0.299205,0.0
...,...,...,...,...,...,...,...
51408,1354883,2018,0.843824,1684,1421.0,0.363022,0.0
51409,1354893,2018,0.244801,1683,412.0,0.429969,0.0
51410,1354907,2018,0.760973,1686,1283.0,0.426489,0.0
51411,1354911,2018,0.525283,1681,883.0,0.499360,0.0


In [6]:
# Load all extracted NDVI files into one dataframe
output_files = glob.glob(files_ndvi_zonalmean)

first_file = True

for f in output_files:

    # Extract the year
    year = f.split("multiplied_")[1].split("_NDVI")[0]
    # print(f)

    # Load the file
    df = pd.read_csv(f)

    # Check if there other values than 0 in the nodata column
    if -999 in df["nodata"].unique():
        print("❌❌❌ NA data found in the file ❌❌❌")
        print(f)

    # Divide NDVI sum by the sum of forest pixels, else the signal is diluted in scarce forest areas!
    df[f"{year}"] = df["sum"] / df_forest_pixel_count["sum"]

    if first_file:
        df_out = df[["idp", "first_year", f"{year}"]]
        first_file = False
    else:
        df_out = pd.merge(
            df_out,
            df[["idp", "first_year", f"{year}"]],
            on=["idp", "first_year"],
            how="left",
        )

df_out

Unnamed: 0,idp,first_year,2008,2001,2016,2017,2015,2022,2021,2004,...,2012,2014,2013,2002,2023,2007,2006,2009,2019,2018
0,500008,2010,0.865537,0.885584,0.898175,0.878389,0.898636,0.882161,0.860663,0.851606,...,0.882708,0.897993,0.875002,0.877369,0.887104,0.763791,0.848612,0.825448,0.839048,0.869512
1,500013,2010,0.802119,0.867327,0.855683,0.871854,0.821777,0.874448,0.850984,0.774646,...,0.776725,0.872303,0.858599,0.820889,0.845955,0.830170,0.846983,0.824471,0.860931,0.859417
2,500098,2010,0.665674,0.637249,0.673185,0.621036,0.647688,0.565046,0.629250,0.596858,...,0.601842,0.643948,0.680242,0.647973,0.604431,0.616746,0.587699,0.633591,0.600178,0.619843
3,500103,2010,0.870753,0.769367,0.854973,0.868409,0.880337,0.790162,0.848721,0.850237,...,0.861683,0.851787,0.859611,0.857809,0.874711,0.793789,0.832877,0.867265,0.859881,0.832784
4,500137,2010,0.887364,0.838849,0.912473,0.832411,0.898307,0.871537,0.873715,0.861812,...,0.878058,0.893112,0.880284,0.848342,0.867392,0.793438,0.871544,0.843071,0.873554,0.837067
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51408,1354883,2018,0.818435,0.865629,0.876407,0.855076,0.880791,0.853608,0.853608,0.832468,...,0.883246,0.890984,0.888424,0.815469,0.886538,0.848968,0.833185,0.850264,0.902843,0.872849
51409,1354893,2018,0.778202,0.850680,0.831629,0.853453,0.839223,0.847142,0.826695,0.812824,...,0.801513,0.809340,0.874003,0.796655,0.862533,0.746781,0.820141,0.795156,0.772939,0.837776
51410,1354907,2018,0.871206,0.863907,0.866218,0.882685,0.874751,0.868109,0.856548,0.823078,...,0.843247,0.892128,0.845950,0.886674,0.897900,0.833711,0.837351,0.850862,0.890281,0.868610
51411,1354911,2018,0.840512,0.840380,0.825218,0.836551,0.830540,0.896792,0.873750,0.836751,...,0.855570,0.867483,0.837311,0.849683,0.863864,0.873612,0.844596,0.810455,0.864168,0.833952


# Extract NDVI metrics


In [7]:
# Melt df into long format
df_long = df_out.melt(id_vars=["idp", "first_year"])
df_long["variable"] = df_long["variable"].astype(int)
df_long.head()

Unnamed: 0,idp,first_year,variable,value
0,500008,2010,2008,0.865537
1,500013,2010,2008,0.802119
2,500098,2010,2008,0.665674
3,500103,2010,2008,0.870753
4,500137,2010,2008,0.887364


In [8]:
def ndvi_extract_metrics(df_idp_level, years_before_first_visit=5):
    # For each idp, for first to last year, calculate:
    # - Average anomaly
    # - Most negative anomaly
    # - Trend in NDVI (linear regression)

    # Get timewindow
    first_year = df_idp_level.first_year.unique()[0] - years_before_first_visit
    last_year = df_idp_level.first_year.unique()[0] + 5

    # Filter for timewindow
    df_idp_level = df_idp_level.copy()[
        (df_idp_level.variable >= first_year) & (df_idp_level.variable <= last_year)
    ]

    # Define output df
    df_metrics = pd.DataFrame()
    df_metrics["idp"] = df_idp_level.idp.unique()

    # Calculate mean NDVI
    df_metrics["ndvi_mean"] = df_idp_level["value"].mean()

    # Calculate anomalies
    df_idp_level["anomaly"] = df_idp_level["value"] - df_idp_level["value"].mean()

    # Calculate the most negative anomaly
    df_metrics["ndvi_min_anomaly"] = df_idp_level["anomaly"].min()

    # Calculate the trend in NDVI (linear regression)
    slope, intercept, r_value, p_value, std_err = stats.linregress(
        df_idp_level["variable"], df_idp_level["value"]
    )
    df_metrics["ndvi_trend"] = slope

    return df_metrics


# Run loop
df_ndvi_metrics = pd.DataFrame()

for idp in tqdm(df_long.idp.unique()):
    df_idp_level = df_long[df_long.idp == idp].copy()
    df_metrics = ndvi_extract_metrics(df_idp_level, years_before_first_visit=5)
    df_ndvi_metrics = pd.concat([df_ndvi_metrics, df_metrics])

df_ndvi_metrics

100%|██████████| 51413/51413 [02:04<00:00, 414.16it/s]


Unnamed: 0,idp,ndvi_mean,ndvi_min_anomaly,ndvi_trend
0,500008,0.857037,-0.093246,0.007070
0,500013,0.828092,-0.051756,-0.000602
0,500098,0.629311,-0.046175,0.005527
0,500103,0.847696,-0.053907,0.004621
0,500137,0.866085,-0.072647,0.004303
...,...,...,...,...
0,1354883,0.876330,-0.022723,-0.001714
0,1354893,0.835896,-0.062958,-0.000066
0,1354907,0.867752,-0.065659,-0.000105
0,1354911,0.852578,-0.027360,0.004130


# Save final NDVI predictor set


In [9]:
df_ndvi_metrics.to_feather("../../data/final/predictor_datasets/ndvi.feather")

---


In [2]:
import sys

sys.path.insert(0, "../../src")
from imports import *

init_notebook()