# Covermap Comparison

**Author:** Adam Yang (ayang115@umd.edu)

**Description:** Compares datasets against test sets

**Crop Maps Used:** Harvest Togo & Kenya, Copernicus Land Cover, ESA CCI Land Cover Africa, GLAD Global Cropland Extent

In [2]:
import pandas as pd
import numpy as np
import geopandas as gdp
import os
import geemap
from pathlib import Path
from sklearn.metrics import classification_report 

In [3]:
import ee

ee.Authenticate()

ee.Initialize()


Successfully saved authorization token.


## **Section 0** - Setup

**Functions**

In [122]:
# Remaps classes to crop/noncrop 
def map_values(val, value_for_crop):
    if val == value_for_crop:
        return 1
    else:
        return 0

In [123]:
# Function used in map function to extract from feature collection
def raster_extraction(image, resolution, f_collection):
    feature = image.sampleRegions(
        collection = f_collection,
        scale = resolution
    )
    return feature

In [124]:
# Convert sklearn classification report dict to 
def report_to_row(dataset, report, df):
    new_report = pd.DataFrame(data = {
        "dataset": dataset, 
        "accuracy": report["accuracy"], 
        "crop_f1": report["1"]["f1-score"], 
        "crop_support": report["1"]["support"], 
        "noncrop_support": report["0"]["support"], 
        "crop_precision": report["1"]["precision"], 
        "crop_recall": report["1"]["recall"], 
        "noncrop_precision": report["0"]["precision"], 
        "noncrop_recall": report["0"]["recall"]
        }, index=[0])
    
    return pd.concat([df, new_report])

In [125]:
# Creates ee.Feature from longitude and latitude coordinates from a dataframe
def create_point(row):
    geom = ee.Geometry.Point(row["lon"], row["lat"])
    prop = dict(row)

    return ee.Feature(geom, prop)

**Retrieve Test Data**

In [126]:
TEST_COUNTRIES = ["Kenya", "Togo"]
DATA_PATH = "../data/datasets/"

In [127]:
datasets_path = Path(DATA_PATH).glob("*")

target_paths = [p for p in datasets_path if p.stem in TEST_COUNTRIES]

In [128]:
test_data = pd.DataFrame(columns=["lat", "lon", "test_class", "ee_pts", "country"])

test_set = []
for p in target_paths:
    # Set dict key name
    key = p.stem

    # Read in data and extract test values and points 
    df = pd.read_csv(p)
    df = df.loc[df["subset"] == "testing"]
    df = df[["lat", "lon", "class_probability"]]

    # Create earth engine geometry points
    df["ee_pts"] = df.apply(create_point, axis=1)

    # Recast points as 1 or 0 (threshold = 0.5)
    df["test_class"] = df["class_probability"].apply(lambda x: 1 if x>=0.5 else 0)

    df["country"] = key

    test_set.append(df)

test_data = pd.concat(test_set)

In [129]:
test_data.reset_index(inplace=True)
test_data.drop("index", axis=1, inplace=True)

In [130]:
test_data.head()

Unnamed: 0,lat,lon,class_probability,ee_pts,test_class,country
0,0.725122,34.386282,0.0,"ee.Feature({\n ""functionInvocationValue"": {\n...",0,Kenya
1,0.621939,34.466496,0.0,"ee.Feature({\n ""functionInvocationValue"": {\n...",0,Kenya
2,0.459661,34.090158,0.0,"ee.Feature({\n ""functionInvocationValue"": {\n...",0,Kenya
3,0.226497,34.054859,0.0,"ee.Feature({\n ""functionInvocationValue"": {\n...",0,Kenya
4,0.096834,33.951959,0.0,"ee.Feature({\n ""functionInvocationValue"": {\n...",0,Kenya


**Create dataframe for results**

In [131]:
results = {}

for p in target_paths:
    key = p.stem
    results[key] = pd.DataFrame(columns=["dataset", "accuracy", "crop_f1", "crop_support", "noncrop_support", 
        "crop_precision", "crop_recall", "noncrop_precision", "noncrop_recall"])


## **Section 1** - Harvest Data

In [132]:
import rasterio as rio

**Harvest Togo** ([Zenodo](https://zenodo.org/record/3836629#.Y1_WxnbMJPZ))

In [133]:
harvest_togo = rio.open("../../harvest-maps/togo_cropland_v9_08032020_binary.tif")

In [134]:
harvest_togo_sampled = test_data[["lat", "lon", "test_class"]].loc[test_data["country"]=="Togo"]
harvest_togo_sampled["harvest_class"] = list(rio.sample.sample_gen(harvest_togo, zip(harvest_togo_sampled["lon"], harvest_togo_sampled["lat"])))

In [135]:
harvest_togo_sampled = harvest_togo_sampled[harvest_togo_sampled["harvest_class"]!=255]
harvest_togo_sampled.head()

Unnamed: 0,lat,lon,test_class,harvest_class
829,9.875907,1.172471,0,[0]
830,9.180105,1.374695,0,[0]
831,9.275314,1.196385,0,[0]
832,7.346575,0.705167,0,[0]
833,6.777337,0.629817,0,[0]


In [136]:
harvest_togo_sampled["harvest_class"] = harvest_togo_sampled["harvest_class"].apply(lambda x: x[0]) 

In [137]:
harvest_togo_report = classification_report(harvest_togo_sampled["test_class"], harvest_togo_sampled["harvest_class"], output_dict=True)

results["Togo"] = report_to_row("harvest_togo", harvest_togo_report, results["Togo"])

**Harvest-Kenya** ([Zenodo](https://zenodo.org/record/4271144#.Y18ucXbMJPa))

In [138]:
harvest_kenya = rio.open("../../harvest-maps/kenya_cropland_binary_2019.tif")

In [139]:
harvest_kenya_sampled = test_data[["lat", "lon", "test_class"]].loc[test_data["country"]=="Kenya"]
harvest_kenya_sampled["harvest_class"] = list(rio.sample.sample_gen(harvest_kenya, zip(harvest_kenya_sampled["lon"], harvest_kenya_sampled["lat"])))

In [140]:
harvest_kenya_sampled = harvest_kenya_sampled[harvest_kenya_sampled["harvest_class"]!=255]
harvest_kenya_sampled.head()

Unnamed: 0,lat,lon,test_class,harvest_class
0,0.725122,34.386282,0,[1]
1,0.621939,34.466496,0,[1]
3,0.226497,34.054859,0,[0]
4,0.096834,33.951959,0,[0]
5,0.032072,33.968469,0,[0]


In [141]:
# Extract int value from list
harvest_kenya_sampled["harvest_class"] = harvest_kenya_sampled["harvest_class"].apply(lambda x: x[0]) 

In [142]:
harvest_kenya_report = classification_report(harvest_kenya_sampled["test_class"], harvest_kenya_sampled["harvest_class"], output_dict=True)

results["Kenya"] = report_to_row("harvest_kenya", harvest_kenya_report, results["Kenya"])

## **Section 2** - Earth Engine Accessible

In [143]:
# Create earth engine points for section 2
test_coll = ee.FeatureCollection(test_data["ee_pts"].tolist())

**Copernicus Land Cover** ([Earth Engine](https://developers.google.com/earth-engine/datasets/catalog/COPERNICUS_Landcover_100m_Proba-V-C3_Global#description))

In [110]:
# Load copernicus data
copernicus = ee.ImageCollection("COPERNICUS/Landcover/100m/Proba-V-C3/Global")
cop_results = copernicus.select("discrete_classification").filterDate("2019-01-01", "2020-01-01").map(lambda x: raster_extraction(x, 100, test_coll)).flatten()

In [111]:
cop_sampled = geemap.ee_to_gdf(cop_results)
cop_sampled["cop_class"] = cop_sampled["discrete_classification"].apply(lambda x: map_values(x, 40))
cop_sampled.head()

Unnamed: 0,geometry,class_probability,discrete_classification,lat,lon,cop_class
0,,0.0,126,0.725122,34.386282,0
1,,0.0,122,0.621939,34.466496,0
2,,0.0,50,0.459661,34.090158,0
3,,0.0,40,0.226497,34.054859,1
4,,0.0,20,0.096834,33.951959,0


In [112]:
test_data["cop"] = pd.merge(test_data, cop_sampled, on=["lat", "lon"], how="left")["cop_class"]

**ESA World Cover** ([Earth Engine](https://developers.google.com/earth-engine/datasets/catalog/ESA_WorldCover_v100)) 

In [113]:
esa = ee.ImageCollection("ESA/WorldCover/v100")
esa_results = esa.filterBounds(test_coll).map(lambda x: raster_extraction(x, 10, test_coll)).flatten()

In [114]:
esa_sampled = geemap.ee_to_gdf(esa_results)
esa_sampled["esa_class"] = esa_sampled["Map"].apply(lambda x: map_values(x, 40))
esa_sampled.head()

Unnamed: 0,geometry,Map,class_probability,lat,lon,esa_class
0,,20,0.0,0.725122,34.386282,0
1,,20,0.0,0.621939,34.466496,0
2,,50,0.0,0.459661,34.090158,0
3,,30,0.0,0.226497,34.054859,0
4,,30,0.0,0.096834,33.951959,0


In [115]:
test_data["esa"] = pd.merge(test_data, esa_sampled, on=["lat", "lon"], how="left")["esa_class"]

**GLAD Global** ([Earth Engine](https://glad.earthengine.app/view/global-cropland-dynamics))

In [116]:
glad = ee.ImageCollection("users/potapovpeter/Global_cropland_2019")
glad_results = glad.filterBounds(test_coll).map(lambda x: raster_extraction(x, 30, test_coll)).flatten()

In [117]:
glad_sampled = geemap.ee_to_gdf(glad_results)
glad_sampled.head()

Unnamed: 0,geometry,b1,class_probability,lat,lon
0,,1,0.0,0.725122,34.386282
1,,0,0.0,0.621939,34.466496
2,,0,0.0,0.459661,34.090158
3,,0,0.0,0.226497,34.054859
4,,0,0.0,0.096834,33.951959


In [118]:
test_data['glad'] = pd.merge(test_data, glad_sampled, on=["lat", "lon"], how="left")["b1"]

**Compute Results for Copernicus, ESA, GLAD**

In [119]:
for country, df in test_data.groupby("country"):
    for dataset in ["cop", "esa", "glad"]:
        print(country, dataset)
        report = classification_report(df["test_class"], df[dataset], output_dict= True)
        results[country] = report_to_row(dataset, report, results[country])

Kenya cop
Kenya esa
Kenya glad
Togo cop
Togo esa
Togo glad


## **Evaluation Results**

**Kenya**

In [120]:
results["Kenya"]

Unnamed: 0,dataset,accuracy,crop_f1,crop_support,noncrop_support,crop_precision,crop_recall,noncrop_precision,noncrop_recall
0,cop,0.913148,0.937824,571,258,0.925043,0.950963,0.884298,0.829457
0,esa,0.44994,0.34104,571,258,0.975207,0.206655,0.360169,0.988372
0,glad,0.849216,0.885636,571,258,0.927203,0.847636,0.716612,0.852713


**Togo**

In [121]:
results["Togo"]

Unnamed: 0,dataset,accuracy,crop_f1,crop_support,noncrop_support,crop_precision,crop_recall,noncrop_precision,noncrop_recall
0,cop,0.695775,0.571429,152,203,0.72,0.473684,0.686275,0.862069
0,esa,0.740845,0.606838,152,203,0.865854,0.467105,0.703297,0.945813
0,glad,0.707042,0.559322,152,203,0.785714,0.434211,0.682657,0.91133
