# Intercomparison

**Author:** Adebowale Adebayo

**Last updated:** May 2, 2024

**Description:** Runs intercomparison for Rwanda 2019

## 1. Setup

In [None]:
# !earthengine authenticate

In [1]:
!git clone https://github.com/nasaharvest/crop-mask.git

Cloning into 'crop-mask'...
remote: Enumerating objects: 12278, done.[K
remote: Counting objects: 100% (1689/1689), done.[K
remote: Compressing objects: 100% (506/506), done.[K
remote: Total 12278 (delta 1238), reused 1439 (delta 1155), pack-reused 10589[K
Receiving objects: 100% (12278/12278), 127.61 MiB | 12.31 MiB/s, done.
Resolving deltas: 100% (7960/7960), done.
Updating files: 100% (215/215), done.


In [1]:
%cd crop-mask/

/content/crop-mask


In [None]:
!pip install cartopy -qq
!pip install rasterio -qq
!pip install dvc[gs] -qq

In [2]:
!git checkout intercomparison-year

M	src/compare_covermaps.py
Already on 'intercomparison-year'
Your branch is up to date with 'origin/intercomparison-year'.


In [97]:
import ee
import geemap
import sys
import pandas as pd
import numpy as np

import geopandas as gpd
from pathlib import Path

ee.Authenticate()
ee.Initialize(project="bsos-geog-harvest1")

sys.path.append("../..")

from src.compare_covermaps import TARGETS, filter_by_bounds, generate_report, CLASS_COL, COUNTRY_COL
from src.compare_covermaps import TEST_COUNTRIES, TEST_CODE

## 2. Read in evaluation set

In [None]:
!dvc pull data/datasets

In [98]:
country = "Rwanda"
test_year = 2019

if country not in TEST_CODE:
    print(f"WARNING: {country} not found in TEST_CODE in src/compare_covermaps.py")
if country not in TEST_COUNTRIES:
    print(f"WARNING: {country} not found in TEST_COUNTRIES in src/compare_covermaps.py")
if country not in TEST_CODE or country not in TEST_COUNTRIES:
    print("Please update src/compare_covermaps.py and restart the notebook.")
else:
    country_code = TEST_CODE[country]
    dataset_path = TEST_COUNTRIES[country]

In [99]:
if not Path(dataset_path).exists():
    print(f"WARNING: Dataset: {dataset_path} not found, run `dvc pull data/datasets from root.")
else:
    df = pd.read_csv(dataset_path)[["lat", "lon", "class_probability", "subset"]]
    df = df[(df["class_probability"] != 0.5)].copy()
    # use only test data because validation points used for harvest-dev map
    df = df[df["subset"] == "testing"].copy()
    df[CLASS_COL] = (df["class_probability"] > 0.5).astype(int)
    df[COUNTRY_COL] = country

    gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.lon, df.lat), crs="epsg:4326")
    gdf = filter_by_bounds(country_code=country_code, gdf=gdf)

In [100]:
gdf = filter_by_bounds(country_code=country_code, gdf=gdf)

## 3. Run intercomparison

In [101]:
gdf.head()

Unnamed: 0,lat,lon,class_probability,subset,binary,country,geometry
12,-2.393914,30.165281,0.333333,testing,0,Rwanda,POINT (30.16528 -2.39391)
18,-2.286221,30.592638,0.0,testing,0,Rwanda,POINT (30.59264 -2.28622)
22,-2.180136,29.697418,1.0,testing,1,Rwanda,POINT (29.69742 -2.18014)
35,-2.7635,29.640383,0.666667,testing,1,Rwanda,POINT (29.64038 -2.76350)
38,-1.969022,29.695442,0.333333,testing,0,Rwanda,POINT (29.69544 -1.96902)


In [102]:
# filter maps within +/- 1 of the test year
TARGETS = {k:v for k,v in TARGETS.items() if any(y in range(test_year - 1, test_year + 2) for y in v.years_covered)}

In [103]:
for cropmap in TARGETS.values():
    if country not in cropmap.countries:
        continue
    print(f"[{country}] sampling " + cropmap.title + "...")
    map_sampled = cropmap.extract_test(gdf, test_year).copy()
    gdf = pd.merge(gdf, map_sampled, on=["lat", "lon"], how="left")
    gdf.drop_duplicates(inplace=True)  # TODO find why points get duplicated

[Rwanda] sampling copernicus...
using closest map year (2019) to test year (2019)
[Rwanda] sampling worldcover-v100...
[Rwanda] sampling worldcereal-v100...
[Rwanda] sampling glad...
using closest map year (2019) to test year (2019)
[Rwanda] sampling dynamicworld...
using closest map year (2019) to test year (2019)
[Rwanda] sampling digital-earth-africa...
[Rwanda] sampling esri-lulc...
using closest map year (2019) to test year (2019)


In [107]:
a_j = {}
for cropmap in TARGETS.values():
    if country not in cropmap.countries:
        continue
    print(f"[{country}] calculating pixel area for " + cropmap.title + "...")
    a_j[cropmap.title] = cropmap.compute_map_area(country, export=True, dataset_name=cropmap.title).copy()

[Rwanda] calculating pixel area for copernicus...
Export task started for copernicus, Rwanda. Returning null for now.
[Rwanda] calculating pixel area for worldcover-v100...
Export task started for worldcover-v100, Rwanda. Returning null for now.
[Rwanda] calculating pixel area for worldcereal-v100...
Export task started for worldcereal-v100, Rwanda. Returning null for now.
[Rwanda] calculating pixel area for glad...
Export task started for glad, Rwanda. Returning null for now.
[Rwanda] calculating pixel area for dynamicworld...
Export task started for dynamicworld, Rwanda. Returning null for now.
[Rwanda] calculating pixel area for digital-earth-africa...
Export task started for digital-earth-africa, Rwanda. Returning null for now.
[Rwanda] calculating pixel area for esri-lulc...
Export task started for esri-lulc, Rwanda. Returning null for now.


In [105]:
# update a_j values with exported values
for cropmap in a_j.keys():
    try:
        area_df = pd.read_csv(f'./Crop_NonCrop_Area_Sum_Export-{country}-{cropmap}.csv')
    except:
        continue
    crop_area = int(area_df['crop_sum'][0])
    noncrop_area = int(area_df['noncrop_sum'][0])
    a_j[cropmap] = np.array([noncrop_area, crop_area])

In [None]:
comparisons = []
for cropmap in TARGETS.keys():
    if cropmap not in gdf.columns:
        continue
    temp = gdf[[CLASS_COL, cropmap]].dropna()
    comparison = generate_report(cropmap, country, temp[CLASS_COL], temp[cropmap], a_j[cropmap], area_weighted=True)
    comparisons.append(comparison)

results = pd.concat(comparisons).set_index(["dataset"])

results

In [None]:
crop_proportion = round(gdf[CLASS_COL].value_counts(normalize=True)[1], 4) * 100
ax = results.sort_values("crop_f1").plot(
    y=["accuracy", "crop_recall_pa", "crop_precision_ua", "crop_f1"],
    xerr="std_crop_f1",
    kind="barh",
    figsize=(6, 14),
    width=0.8,
    title=f"{country}: {len(gdf)} points (crop proportion: {crop_proportion}%)",
);

for c in ax.containers[1::2]:
    ax.bar_label(c)

for border in ["top", "right", "bottom", "left"]:
    ax.spines[border].set_visible(False)

ax.legend(bbox_to_anchor=(1, 1), reverse=True);