# PDP / Forest Logging Detection

In [1]:
import ee
import geemap
import geopandas as gpd

## Testowanie

In [2]:
ee.Authenticate() 
ee.Initialize()


Successfully saved authorization token.


In [97]:
def visualize_forest_change(year):
    if year < 2001 or year > 2023:
        raise ValueError("Podaj rok w zakresie od 2001 do 2023.")
    ee.Initialize()

    poland = ee.FeatureCollection("FAO/GAUL/2015/level0").filter(
        ee.Filter.eq("ADM0_NAME", "Poland")
    )

    hansen_data = ee.Image("UMD/hansen/global_forest_change_2023_v1_11")

    loss = hansen_data.select("loss")
    loss_year = hansen_data.select("lossyear")
    # Filtr strat lasu dla wybranego roku
    loss_in_year = loss.updateMask(loss_year.eq(year - 2000))
    loss_in_year_poland = loss_in_year.clip(poland)

    satellite_data_following_year = (
        ee.ImageCollection("COPERNICUS/S2")
        .filterDate(f"{year + 1}-06-01", f"{year + 1}-08-31")
        .filterBounds(poland)
        .filter(ee.Filter.lt("CLOUDY_PIXEL_PERCENTAGE", 20))
        .median()
        .clip(poland)
    )

    satellite_data_previous_year = (
        ee.ImageCollection("COPERNICUS/S2")
        .filterDate(f"{year - 1}-06-01", f"{year - 1}-08-31")
        .filterBounds(poland)
        .filter(ee.Filter.lt("CLOUDY_PIXEL_PERCENTAGE", 20))
        .median()
        .clip(poland)
    )

    # ustalenie obszaru zalesionego
    forest_cover = hansen_data.select("treecover2000").clip(poland).gte(25).selfMask()
    # .And(
    #     hansen_data \
    #         .select('lossyear') \
    #         .gt(year - 2000) \
    #         .Or(hansen_data.select('lossyear').eq(0))
    # )

    loss_viz = {"min": 0, "max": 1, "palette": ["red"]}
    satellite_viz = {"bands": ["B4", "B3", "B2"], "min": 0, "max": 3000, "gamma": 1.2}

    map = geemap.Map()
    map.centerObject(poland, 6)  # Przybliżenie na Polskę
    map.addLayer(satellite_data_following_year, satellite_viz, f"SAT {year + 1}")
    map.addLayer(satellite_data_previous_year, satellite_viz, f"SAT {year - 1}")
    map.addLayer(loss_in_year_poland, loss_viz, f"Straty lasu w {year}")
    map.addLayer(poland, {"color": "blue"}, "Granice Polski")
    map.addLayer(forest_cover, {"palette": ["green"]}, "Obszary leśne")

    return map

In [98]:
visualize_forest_change(2022)

Map(center=[52.108736119738694, 19.430639807941635], controls=(WidgetControl(options=['position', 'transparent…

In [9]:
def export_forest_loss_to_geojson(year, output_file):
    if year < 2001 or year > 2023:
        raise ValueError("Podaj rok w zakresie od 2001 do 2023.")

    ee.Initialize()

    poland = ee.FeatureCollection('FAO/GAUL/2015/level0') \
        .filter(ee.Filter.eq('ADM0_NAME', 'Poland'))
    poland_geometry = poland.geometry()

    hansen_data = ee.Image('UMD/hansen/global_forest_change_2023_v1_11')

    #Straty lasu
    loss = hansen_data.select('loss')
    loss_year = hansen_data.select('lossyear')


    loss_in_year = loss.updateMask(loss_year.eq(year - 2000))
    loss_in_year_poland = loss_in_year.clip(poland_geometry)


    loss_vector = loss_in_year_poland.reduceToVectors(
        geometry=poland_geometry,
        geometryType='polygon',
        reducer=ee.Reducer.countEvery(),
        scale=30,
        maxPixels=1e9, 
        bestEffort=True 
    )

    geemap.ee_export_vector(
        ee_object=loss_vector,
        filename=output_file
    )
export_forest_loss_to_geojson(2020, 'forest_loss_2020.geojson')


Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1/projects/572709905565/tables/4aacc943d68dfb5e5814306715ad52aa-6c2e1d65650a60d094dccb1558ea3317:getFeatures
Please wait ...
Data downloaded to /Users/jakub/AI/Z24/GEO/pdp-forest-logging/forest_loss_2020.geojson


## Klasyfikator

In [16]:
import ee
import geemap
import geopandas as gpd
from tqdm import tqdm

In [199]:
MIN_DEFORESTATION_YEAR = 2016
REFERENCE_YEAR = 2020
FOREST_COVER_THRESHOLD = 25
FOREST_PATCH_SIZE_M2 = 10000
BOUNDING_BOX_BUFFER_METERS = 10000
TRAINING_PATCH = ee.Geometry.Polygon([
    [17.5, 51.5],
    [17.5, 52.0],
    [18.3, 52.0],
    [18.3, 51.5],
    [17.5, 51.5]
])
TRAIN_TEST_SPLIT = 0.7
MIN_POINTS_PER_CLASS = 100


In [200]:
def create_labeled_image(region_geometry, min_deforestation_year, reference_year, forest_cover_threshold):
    hansen_data = ee.Image("UMD/hansen/global_forest_change_2023_v1_11").clip(region_geometry)
    treecover = hansen_data.select("treecover2000").gte(forest_cover_threshold)
    lossyear = hansen_data.select("lossyear")
    loss_occurrence = lossyear.gt(0)
    year_of_loss = lossyear.add(2000)
    
    class_forest = hansen_data.select("treecover2000").gt(0)
    
    # Class 2: Forest AND deforested between min_deforestation_year and reference_year
    class_recent_deforestation = (
        treecover
        .And(year_of_loss.gte(min_deforestation_year))
        .And(year_of_loss.lte(reference_year))
    ).selfMask().unmask(0)
    
    # Combine classes (mutually exclusive)
    labeled = (
        class_recent_deforestation.add(class_forest)
    ).rename("land_class")
    
    return labeled

In [201]:
# plot a map with "labeled" results from previous cell
labeled = create_labeled_image(
    TRAINING_PATCH, MIN_DEFORESTATION_YEAR, REFERENCE_YEAR, FOREST_COVER_THRESHOLD
)

# show a map
map = geemap.Map()

map.centerObject(TRAINING_PATCH, 12)
# map.addLayer(TRAINING_PATCH, {"color": "blue"}, "Training Patch")
map.addLayer(labeled, {"min": 0, "max": 2, "palette": ["black", "green", "red"]})
map

Map(center=[51.750217586909386, 17.899999999999842], controls=(WidgetControl(options=['position', 'transparent…

In [211]:
def extract_training_data(
    region_geometry,
    min_deforestation_year,
    reference_year,
    forest_cover_threshold,
    train_test_split,
):
    labeled = create_labeled_image(
        region_geometry, min_deforestation_year, reference_year, forest_cover_threshold
    )
    sat_image = (
        ee.ImageCollection("COPERNICUS/S2")
        .filterDate(f"{reference_year + 1}-06-01", f"{reference_year + 1}-08-31")
        .filterBounds(region_geometry)
        .filter(ee.Filter.lt("CLOUDY_PIXEL_PERCENTAGE", 20))
        .median()
        .clip(region_geometry)
    )
    merged = sat_image.addBands(labeled)
    points_all = merged.sample(
        region=region_geometry,
        scale=30,
        numPixels=50000,
        seed=42,
        tileScale=2,
        geometries=True,
    )

    none_points = points_all.filter(ee.Filter.eq("land_class", 0))
    forest_points = points_all.filter(ee.Filter.eq("land_class", 1))
    deforest_points = points_all.filter(ee.Filter.eq("land_class", 2))

    none_count = none_points.size().getInfo()
    forest_count = forest_points.size().getInfo()
    deforest_count = deforest_points.size().getInfo()

    min_count = min(none_count, forest_count, deforest_count)

    none_points_limited = none_points.limit(min_count)
    forest_points_limited = forest_points.limit(min_count)
    deforest_points_limited = deforest_points.limit(min_count)

    balanced_points = none_points_limited.merge(forest_points_limited).merge(
        deforest_points_limited
    )

    print("Balanced points count:", min_count, "(per class)")

    points_with_random = balanced_points.randomColumn("random_value", seed=42)
    train_points = points_with_random.filter(
        ee.Filter.lt("random_value", train_test_split)
    )
    test_points = points_with_random.filter(
        ee.Filter.gte("random_value", train_test_split)
    )

    return sat_image, labeled, train_points, test_points

In [212]:
def show_training_samples(sat_image, hansen_data, labeled_image, train_points, test_points):
    all_points = ee.FeatureCollection(train_points).merge(test_points)
    color_mapped = all_points.map(
        lambda f: f.set("style", {
            "color": ee.Algorithms.If(
                f.getNumber("land_class").eq(0),
                "grey",
                ee.Algorithms.If(
                    f.getNumber("land_class").eq(1),
                    "green",
                    "orange"
                )
            ),
            "pointSize": 5
        })
    )
    m = geemap.Map()
    m.centerObject(all_points.geometry(), 8)
    rgb_viz = {"bands": ["B4", "B3", "B2"], "min": 0, "max": 3000, "gamma": 1.2}
    labeled_viz = {"min": 0, "max": 2, "palette": ["grey", "green", "orange"]}
    hansen_lossyear_viz = {"min": 0, "max": 23, "palette": ["black", "yellow", "red"]}
    m.addLayer(sat_image, rgb_viz, "Sentinel")
    m.addLayer(hansen_data.select("lossyear"), hansen_lossyear_viz, "Hansen Loss Year")
    m.addLayer(labeled_image, labeled_viz, "Labeled Ground Truth")
    m.addLayer(color_mapped.style(**{"styleProperty": "style"}), {}, "Train/Test Points")
    return m


In [213]:
def train_classifier(train_points, test_points, bands, classifier=None):
    print("Trenuję klasyfikator...")
    if classifier is None:
        classifier = ee.Classifier.smileRandomForest(numberOfTrees=50)
    classifier = classifier.train(
        features=train_points, classProperty="land_class", inputProperties=bands
    )
    print("Obliczam metryki treningowe...")
    train_pred = train_points.classify(classifier)
    conf_matrix_train = train_pred.errorMatrix("land_class", "classification")
    print("Macierz błędów (train):", conf_matrix_train.getInfo())
    print("Dokładność (train):", conf_matrix_train.accuracy().getInfo())
    print("Obliczam metryki testowe...")
    test_pred = test_points.classify(classifier)
    conf_matrix_test = test_pred.errorMatrix("land_class", "classification")
    print("Macierz błędów (test):", conf_matrix_test.getInfo())
    print("Dokładność (test):", conf_matrix_test.accuracy().getInfo())
    return classifier

In [214]:
def classify_and_display_results(classifier, bands, bounding_box_buffer_m, reference_year, forest_cover_threshold):
    print("Wczytuję dane i generuję klasyfikację dla całej Polski...")
    poland = ee.FeatureCollection("FAO/GAUL/2015/level0").filter(ee.Filter.eq("ADM0_NAME", "Poland")).first()
    bbox = poland.geometry().bounds().buffer(bounding_box_buffer_m)
    poland_clipped = ee.Feature(poland).geometry().intersection(bbox)
    hansen_data = ee.Image("UMD/hansen/global_forest_change_2023_v1_11").clip(poland_clipped)
    sat_image = (
        ee.ImageCollection("COPERNICUS/S2")
        .filterDate(f"{reference_year + 1}-06-01", f"{reference_year + 1}-08-31")
        .filterBounds(poland_clipped)
        .filter(ee.Filter.lt("CLOUDY_PIXEL_PERCENTAGE", 20))
        .median()
        .clip(poland_clipped)
    )
    classified = sat_image.select(bands).classify(classifier).rename("classified")
    m = geemap.Map()
    m.centerObject(ee.FeatureCollection([poland]).geometry(), 6)
    m.add_basemap("OpenStreetMap")
    rgb_viz = {"bands": ["B4", "B3", "B2"], "min": 0, "max": 3000, "gamma": 1.2}
    class_viz = {"min": 0, "max": 2, "palette": ["grey", "green", "orange"]}
    m.addLayer(sat_image, rgb_viz, "Obraz Sentinel")
    m.addLayer(classified, class_viz, "Klasyfikacja")
    m.addLayer(hansen_data.select("lossyear"), {"min": 0, "max": 23, "palette": ["black", "yellow", "red"]}, "Hansen Loss Year")
    forest_mask = hansen_data.select("treecover2000").gte(forest_cover_threshold).selfMask()
    m.addLayer(forest_mask, {"palette": ["green"]}, "Hansen Forest 2000")
    return m


In [215]:
poland_fc = ee.FeatureCollection("FAO/GAUL/2015/level0").filter(ee.Filter.eq("ADM0_NAME", "Poland"))
region_for_training = poland_fc.geometry().intersection(TRAINING_PATCH)
sat_image, labeled_image, train_points, test_points = extract_training_data(
    region_for_training,
    MIN_DEFORESTATION_YEAR,
    REFERENCE_YEAR,
    FOREST_COVER_THRESHOLD,
    TRAIN_TEST_SPLIT
)
hansen_data = ee.Image("UMD/hansen/global_forest_change_2023_v1_11").clip(region_for_training)


Balanced points count: 520 (per class)


In [216]:
map_samples = show_training_samples(sat_image, hansen_data, labeled_image, train_points, test_points)
map_samples

Map(center=[51.73623497039189, 17.908811754919995], controls=(WidgetControl(options=['position', 'transparent_…

In [219]:
print("Trenuję model...")
bands = sat_image.bandNames().getInfo()

classifiers = {
    "SVM": ee.Classifier.libsvm(),
    "Naive Bayes": ee.Classifier.smileNaiveBayes(),
    "Random Forest n=50": ee.Classifier.smileRandomForest(numberOfTrees=50),
    "Random Forest n=100": ee.Classifier.smileRandomForest(numberOfTrees=100),
    "Random Forest n=200": ee.Classifier.smileRandomForest(numberOfTrees=200),
    "Gradient Tree Boosting n=50": ee.Classifier.smileGradientTreeBoost(
        numberOfTrees=50
    ),
    "Gradient Tree Boosting n=100": ee.Classifier.smileGradientTreeBoost(
        numberOfTrees=100
    ),
    "Gradient Tree Boosting n=200": ee.Classifier.smileGradientTreeBoost(
        numberOfTrees=200
    ),
}

for classifier_name in classifiers.keys():
    print(f"==================== {classifier_name} ====================")
    classifiers[classifier_name] = train_classifier(
        train_points, test_points, bands, classifiers[classifier_name]
    )
    print("============================================================")

Trenuję model...
Trenuję klasyfikator...
Obliczam metryki treningowe...
Macierz błędów (train): [[257, 88, 11], [30, 325, 23], [22, 118, 241]]
Dokładność (train): 0.7381165919282511
Obliczam metryki testowe...
Macierz błędów (test): [[116, 41, 7], [8, 123, 11], [9, 48, 82]]
Dokładność (test): 0.7213483146067415
Trenuję klasyfikator...
Obliczam metryki treningowe...
Macierz błędów (train): [[176, 125, 55], [23, 310, 45], [12, 158, 211]]
Dokładność (train): 0.6251121076233184
Obliczam metryki testowe...
Macierz błędów (test): [[82, 59, 23], [6, 123, 13], [3, 53, 83]]
Dokładność (test): 0.647191011235955
Trenuję klasyfikator...
Obliczam metryki treningowe...
Macierz błędów (train): [[355, 0, 1], [7, 368, 3], [2, 3, 376]]
Dokładność (train): 0.9856502242152466
Obliczam metryki testowe...
Macierz błędów (test): [[139, 10, 15], [8, 102, 32], [10, 24, 105]]
Dokładność (test): 0.7775280898876404
Trenuję klasyfikator...
Obliczam metryki treningowe...
Macierz błędów (train): [[355, 1, 0], [4, 37

In [221]:
print("Klasyfikuję i wyświetlam wyniki dla całej Polski...")
classification_map = classify_and_display_results(
    classifiers["Random Forest n=100"],
    bands,
    BOUNDING_BOX_BUFFER_METERS,
    REFERENCE_YEAR,
    FOREST_COVER_THRESHOLD
)
classification_map


Klasyfikuję i wyświetlam wyniki dla całej Polski...
Wczytuję dane i generuję klasyfikację dla całej Polski...


Map(center=[52.108736119738694, 19.430639807941635], controls=(WidgetControl(options=['position', 'transparent…