In [None]:
import os, sys
import numpy as np
import pandas as pd
import geopandas as gpd
import rasterio as rio
import seaborn as sns
import matplotlib.pyplot as plt
import rasterio.plot
from rasterio.transform import rowcol
from sklearn.metrics import confusion_matrix
from typing import Optional, List, Callable

In [None]:
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from src.area_utils import (
    binarize,
    load_raster,
    compute_area_estimate,
    create_area_estimate_summary,
    compute_area_error_matrix,
    create_confusion_matrix_summary,
)

In [None]:
country_iso_code = 'ETH' # Can be found https://www.iso.org/obp/ui/#search under the Alpha-3 code column
adm1_of_interest = ['Tigray']

gadm2_path = f'https://geodata.ucdavis.edu/gadm/gadm4.1/json/gadm41_{country_iso_code}_2.json.zip'
roi = gpd.read_file(gadm2_path)
roi = roi.query('NAME_1 in @adm1_of_interest')
roi.head()

In [None]:
# Optionally restrict ROI to an admin2 boundary
adm2_of_interest = [""]
roi = roi.query('NAME_2 in @adm2_of_interest')

In [None]:
# Merge selected region(s) into a single polygon
roi = roi.dissolve()

In [None]:
# Optionally define a shapefile to use instead
shape_fn = ""
roi = gpd.read_file(shape_fn)

In [None]:
roi.plot()

In [None]:
fn = lambda y : f"Ethiopia_Tigray_{y}_threshold-3-5.tif"

In [None]:
cropmap_y1, y1_meta = load_raster(fn(2020), roi)
cropmap_y2, y2_meta = load_raster(fn(2021), roi)

In [None]:
cropmask_y1 = binarize(cropmap_y1, y1_meta)
cropmask_y2 = binarize(cropmap_y2, y2_meta)

In [None]:
# Plot each crop-mask 
fig, axes = plt.subplots(nrows = 1, ncols = 2, figsize = (18, 9))
axes[0].imshow(cropmask_y1, cmap = 'YlGn', vmin = 0, vmax = 1)
axes[1].imshow(cropmask_y2, cmap = 'YlGn', vmin = 0, vmax = 1)
axes[0].set_title("2020", fontsize = 16, fontweight = "semibold")
axes[1].set_title("2021", fontsize = 16, fontweight = "semibold")
for ax in axes:
    ax.set_axis_off()
plt.tight_layout()

In [None]:
# Calculate change map
change_map = cropmask_y1 * 2 + cropmask_y2
change_meta = y1_meta.copy()

In [None]:
# Alternatively, load change map in from file

change_map_fn = "tigray-change-threshold-3-5.tif"
change_map, change_meta = load_raster(change_map_fn)

In [None]:
# Check that the unique values are 0, 1, 2, 3, 255/nodata
np.unique(change_map.flatten())

In [None]:
# Visualize the change map
# -> Green (0) : Stable NP
# -> Gray (3) : Stable P
plt.imshow(change_map, cmap='Accent')
plt.axis("off")
plt.title("Change Map", fontsize = 16, fontweight = "semibold")
plt.tight_layout()

In [None]:
# Save change-map

with rio.open("tigray-change-threshold-3-5.tif", "w", **change_meta) as dst:
    dst.write(np.expand_dims(change_map, 0))

In [None]:
pixel_size = change_meta["transform"][0]
print("Pixel Size - {:.2f} meters".format(pixel_size))

In [None]:
stable_np_px = np.where(change_map.data.flatten() == 0)[0].shape[0]
p_gain_px = np.where(change_map.data.flatten() == 1)[0].shape[0]
p_loss_px = np.where(change_map.data.flatten() == 2)[0].shape[0]
stable_p_px = np.where(change_map.data.flatten() == 3)[0].shape[0]
total_px = np.where(change_map.data.flatten() != change_map.fill_value)[0].shape[0] # Do not count masked values

In [None]:
# Load in labeled reference samples
ref_sample_path = '../data/shapefiles/change_2020-2021_strat_ref_samples_labeled.zip'
gdf = gpd.read_file(ref_sample_path).set_crs("EPSG:4326")
gdf = gdf.to_crs(change_meta["crs"])
gdf.head()

In [None]:
# Load additional reference samples
set1 = pd.read_csv('../data/shapefiles/ceo-Tigray-2020-2021-Feb-to-Dec---Additional-change-reference-samples-(set-1)-sample-data-2023-11-16.csv')
set2 = pd.read_csv('../data/shapefiles/ceo-Tigray-2020-2021-Feb-to-Dec---Additional-change-reference-samples-(set-2)-sample-data-2023-11-16.csv')

# make sure they agree
print(np.all(set1.columns[-2] == set1.columns[-2]))
print(np.all(set1.columns[-1] == set1.columns[-1]))

# convert to gdf
newpts = gpd.GeoDataFrame(set1, geometry=gpd.points_from_xy(x=set1.lon, y=set1.lat))
newpts.crs = 'EPSG:4326'
newpts = newpts.to_crs('EPSG:32637')

# add change class column
for idx, row in newpts.iterrows():
    if row['Do you see *active* cropland at this point in 2020? '] == 'Crop':
        if row['Do you see *active* cropland at this point in 2021? '] == 'Crop':
            change_class = '3' # stable planted 
        else:
            change_class = '2' # planted loss 
    else: # 2020 is noncrop
        if row['Do you see *active* cropland at this point in 2021? '] == 'Crop':
            change_class = '1' # planted gain 
        else:
            change_class = '0' # stable not planted 
    newpts.loc[idx, 'Change class'] = change_class

print(newpts.columns)
newpts.head()
newpts['Change class'].value_counts()

newpts.columns = ['plotid', 'sampleid', 'lon', 'lat', 'email', 'flagged', 'collection',
                    'analysis_d', 'imagery_ti', 'imagery_at', 'sample_geo', 'pl_samplei',
                    'Was this a', 'Was this_1', 'geometry', 'Change cla']
newgdf = pd.concat([gdf, newpts]).reset_index()
newgdf

In [None]:
# Overlay labeled reference sample onto change map

raster = rio.open("tigray-change-threshold-3-5.tif")
fig, ax = plt.subplots(nrows = 1, ncols = 1, figsize = (12, 8))
rasterio.plot.show(raster, cmap = "Accent", ax = ax)
newgdf.plot(ax = ax)
ax.set_axis_off()
plt.title("Reference Sample", fontsize = 16, fontweight = "semibold")
plt.tight_layout()

In [None]:
# Extract predictions from map for comparison with reference sample
for r, row in newgdf.iterrows():
    geometry = row["geometry"]
    x, y = geometry.x, geometry.y
    try:
        px, py = rowcol(change_meta["transform"], x, y)
        newgdf.loc[r, "Mapped class"] = change_map[px,py]
    except IndexError:
        newgdf.loc[r, "Mapped class"] = None

In [None]:
# How many out of bounds reference-sample points
print(f"Number of out of bound samples: {newgdf['Mapped class'].isna().sum()}")

In [None]:
# Remove out of bound points
newgdf = newgdf[~newgdf["Mapped class"].isna()]
newgdf["Mapped class"] = newgdf["Mapped class"].astype(np.uint8)

In [None]:
# Compute confusion matrix
y_true = np.array(newgdf['Change cla']).astype(np.uint8)
y_pred = np.array(newgdf['Mapped class']).astype(np.uint8)
cm = confusion_matrix(y_true, y_pred) 
print(cm) # Colums - predictions, Rows - actual
# Stable NP, PGain, PLoss, Stable P

In [None]:
def plot_confusion_matrix(cm, labels, datatype="d") -> None:
    """Pretty prints confusion matrix.

    Expects row 'Reference' and column 'Prediction/Map' ordered confusion matrix.

    Args:
        cm:
            Confusion matrix of reference and map samples expressed in terms of
            sample counts, n[i,j]. Row-column ordered reference-row, map-column.
        labels:
            List-like containing labels in same order as confusion matrix. For
            example:

            ["Stable NP", "PGain", "PLoss", "Stable P"]

            ["Non-Crop", "Crop"]

    """

    _, ax = plt.subplots(nrows=1, ncols=1)
    sns.heatmap(cm, cmap="crest", annot=True, fmt=datatype, cbar=False, 
                square=True, ax=ax, annot_kws={"size": 20})
    ax.xaxis.tick_top()
    ax.xaxis.set_label_coords(0.50, 1.2)
    ax.yaxis.set_label_coords(-0.125, 0.50)
    ax.set_xticklabels(labels=labels, fontsize=16)
    ax.set_yticklabels(labels=labels, fontsize=16)
    ax.set_xlabel("Map", fontsize=20)
    ax.set_ylabel("Reference", fontsize=20)
    plt.tight_layout()

In [None]:
# Pretty print confusion matrix
plot_confusion_matrix(cm, labels = ["Stable NP", "PGain", "PLoss", "Stable P"])

In [None]:
cm_summary = create_confusion_matrix_summary(cm, columns = ["Stable NP", "PGain", "PLoss", "Stable P"])

In [None]:
# total_px_area = change_map.flatten().shape[0] # <- Overcounts!
# Marginal pixel total of mapped classes
a_j = np.array([stable_np_px, p_gain_px, p_loss_px, stable_p_px], dtype = np.int64)

In [None]:
# Plot the error matrix expressed in terms of proportion of area
# as suggested by good practice recommendations
total_px = a_j.sum()
w_j = a_j / total_px
am = compute_area_error_matrix(cm, w_j)
plot_confusion_matrix(am, labels = ["Stable NP", "PGain", "PLoss", "Stable P"], datatype="0.2f")

In [None]:
estimates = compute_area_estimate(cm, a_j, px_size = pixel_size)

In [None]:
u_j, err_u_j = estimates["user"]
print(f"User's accuracy and 95% CI\n{u_j.round(2)}\n{(err_u_j).round(2)}")

In [None]:
p_i, err_p_i = estimates["producer"]
print(f"Producer's accuracy and 95% CI\n{p_i.round(2)}\n{(err_p_i).round(2)}")

In [None]:
acc, err_acc = estimates["accuracy"]
print(f"Overall accuracy and 95% CI\n{acc.round(2)} \u00B1 {(err_acc).round(2)}")

In [None]:
a_ha, err_ha = estimates["area"]["ha"]
print(f"Estimated area [ha] and 95% CI of area [ha] \n{np.stack([a_ha, err_ha]).round(2)}")

In [None]:
summary = create_area_estimate_summary(a_ha, err_ha, u_j, err_u_j, p_i, err_p_i, columns = ["Stable NP", "PGain", "PLoss", "Stable P"])