# Filtering and saving INPE polygons between 2008 and 2020

In [4]:
import geopandas as gpd
import rasterio
from rasterio.mask import mask
import numpy as np

# Load the shapefile
shapefile_path = 'DIRECTORY_PATH/inpe_file.shp'
gdf = gpd.read_file(shapefile_path)

# Filter polygons with "year" greater than 2020
filtered_gdf = gdf[(gdf['year'] >= 2008) & (gdf['year'] <= 2020)]

# Save the filtered shapefile
output_path = 'DIRECTORY_PATH/inpe_file_2008_2020.shp'
filtered_gdf.to_file(output_path)


KeyboardInterrupt



# Couting and summing area of INVALID polygons (based on missing information on geometries)

In [None]:
import geopandas as gpd

# Load shapefile containing deforestation polygons
deforest_polygons_path = 'DIRECTORY_PATH/inpe_file_2008_2020.shp'
deforest_polygons = gpd.read_file(deforest_polygons_path)

# Filter out invalid polygons
invalid_deforest_polygons = deforest_polygons[~deforest_polygons.geometry.is_valid]

# Calculate the sum of area_km for invalid geometries
total_area_invalid = invalid_deforest_polygons['area_km'].sum()

# Count the number of polygons with invalid geometries
num_invalid_polygons = len(invalid_deforest_polygons)

print("Total area of deforestation polygons with invalid geometry:", total_area_invalid)
print("Number of polygons with invalid geometry:", num_invalid_polygons)

# Counting and summing area of VALID polygons (based on missing information on geometries)

In [2]:
import geopandas as gpd

# Load shapefile containing deforestation polygons
deforest_polygons_path = 'DIRECTORY_PATH/inpe_file_2008_2020.shp'
deforest_polygons = gpd.read_file(deforest_polygons_path)

# Filter out valid polygons
valid_deforest_polygons = deforest_polygons[deforest_polygons.geometry.is_valid]

# Calculate the sum of area_km for valid geometries
total_area_valid = valid_deforest_polygons['area_km'].sum()

# Count the number of polygons with valid geometries
num_valid_polygons = len(valid_deforest_polygons)

print("Total area of deforestation polygons with valid geometry:", total_area_valid)
print("Number of polygons with valid geometry:", num_valid_polygons)


Total area of deforestation polygons with valid geometry: 91517.30936652982
Number of polygons with valid geometry: 552752


# Filtering and saving only valid polygons (based on missing information on geometries)

In [None]:
import geopandas as gpd

# Load shapefile containing deforestation polygons
deforest_polygons_path = 'DIRECTORY_PATH/inpe_file_2008_2020.shp'
deforest_polygons = gpd.read_file(deforest_polygons_path)

# Filter only valid polygons
valid_deforest_polygons = deforest_polygons[deforest_polygons.geometry.is_valid]

# Save the valid polygons to a new shapefile
output_valid_shapefile_path = 'DIRECTORY_PATH/inpe_file_2008_2020_valid.shp'
valid_deforest_polygons.to_file(output_valid_shapefile_path)


# Adjusting CRS of the INPE deforestation polygons to fit the raster file from Soares-Filho et al. (2014) 

In [3]:
import geopandas as gpd
from rasterio.warp import calculate_default_transform, reproject, Resampling
import rasterio

# Open the raster file to get its CRS and transform
with rasterio.open('DIRECTORY_PATH/Soares-Filho_Suitability.tif') as src:
    raster_crs = src.crs

# Read the polygon shapefile
gdf = gpd.read_file('DIRECTORY_PATH/inpe_file_2008_2020_valid.shp')

# Reproject the shapefile to match the CRS and extent of the raster
gdf_reprojected = gdf.to_crs(raster_crs)

# Save the reprojected shapefile
gdf_reprojected.to_file('DIRECTORY_PATH/inpe_file_2008_2020_valid_newCRS.shp')



KeyboardInterrupt



# Creating counts of Suitability pixels based on zonal_stats while considering only pixels entirely within the polygon

# Summing deforested area by suitability category

In [2]:
import geopandas as gpd
from rasterstats import zonal_stats

# Load shapefile containing deforestation polygons
deforest_polygons_path = 'DIRECTORY_PATH/inpe_file_2008_2020_valid_newCRS.shp'
deforest_polygons = gpd.read_file(deforest_polygons_path)

# Open the TIFF file
suitabilityTIF_path = 'DIRECTORY_PATH/Soares-Filho_Suitability.tif'

# Initialize counter for analyzed polygons
analyzed_polygons_count = 0

# Perform zonal statistics for all polygons
for idx, polygon in deforest_polygons.iterrows():
    try:
        stats = zonal_stats(polygon.geometry, suitabilityTIF_path, nodata=255, all_touched=False, categorical=True)
        # Print zone statistics for each polygon
        for value, count in stats[0].items():
            print(f"Zone statistics for polygon {idx}: {value} - {count}")
        # Extract counts of each pixel value for the polygon and add them as new columns
        for value, count in stats[0].items():
            column_name = f'pixel_{value}'
            if column_name not in deforest_polygons.columns:
                deforest_polygons[column_name] = 0
            deforest_polygons.at[idx, column_name] = count
        # Count the number of pixels with nodata value and add it as a new column
        nodata_count = stats[0].get(255, 0)
        deforest_polygons.at[idx, 'pixel_mis'] = nodata_count
        analyzed_polygons_count += 1
        # Print message after values have been added as columns
        print(f"Values added as columns for polygon {idx}")
    except Exception as e:
        print(f"Skipping polygon {idx} due to geometry problems: {e}")

# Print the number of polygons analyzed
print(f"{analyzed_polygons_count} polygons analyzed.")

# Save the updated shapefile
output_shapefile_path = 'DIRECTORY_PATH/inpe_file_2008_2020_valid_newCRS_ClassificationSuitability.shp'
deforest_polygons.to_file(output_shapefile_path)


KeyboardInterrupt



In [1]:
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np

# Read the shapefile into a GeoDataFrame
deforestation_agg = gpd.read_file('DIRECTORY_PATH/inpe_file_2008_2020_valid_newCRS_ClassificationSuitability.shp')

# Create a new column 'New_Category' initialized with 0
deforestation_agg['Suit_Cat'] = np.select(
    [
        (deforestation_agg['pixel_1'] > deforestation_agg['pixel_0']) & (deforestation_agg['pixel_1'] > deforestation_agg['pixel_2']),
        (deforestation_agg['pixel_2'] > deforestation_agg['pixel_0']) & (deforestation_agg['pixel_2'] > deforestation_agg['pixel_1'])
    ],
    [
        1,
        2
    ],
    default=0
)

# Sort the DataFrame by year
deforestation_agg.sort_values('year', inplace=True)

total_area = deforestation_agg['area_km'].sum()
total_area_per_suit_cat = deforestation_agg.groupby('Suit_Cat')['area_km'].sum()

print("Total area_km in the shapefile:", total_area,
      "\nTotal area_km per Suit_Cat:\n", total_area_per_suit_cat)


Total area_km in the shapefile: 91517.30936652982 
Total area_km per Suit_Cat:
 Suit_Cat
0    22056.312174
1    53325.935360
2    16135.061832
Name: area_km, dtype: float64


# Creating counts of Suitability pixels based on zonal_stats in the shapefile including legal and illegal classification (output from R code)

In [None]:
import rasterio
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
from rasterstats import zonal_stats

# Load shapefile containing deforestation polygons
deforest_polygons_path = 'DIRECTORY_PATH/inpe_file_2008_2020_valid_legal&illegalclassification.shp'
deforest_polygons = gpd.read_file(deforest_polygons_path)

# Open the TIFF file
suitabilityTIF_path = 'DIRECTORY_PATH/Soares-Filho_Suitability.tif'

# Initialize counter for analyzed polygons
analyzed_polygons_count = 0

# Perform zonal statistics for all polygons
for idx, polygon in deforest_polygons.iterrows():
    try:
        stats = zonal_stats(polygon.geometry, suitabilityTIF_path, nodata=255, all_touched=False, categorical=True)
        # Print zone statistics for each polygon
        for value, count in stats[0].items():
            print(f"Zone statistics for polygon {idx}: {value} - {count}")
        # Extract counts of each pixel value for the polygon and add them as new columns
        for value, count in stats[0].items():
            column_name = f'pixel_{value}'
            if column_name not in deforest_polygons.columns:
                deforest_polygons[column_name] = 0
            deforest_polygons.at[idx, column_name] = count
        # Count the number of pixels with nodata value and add it as a new column
        nodata_count = stats[0].get(255, 0)
        deforest_polygons.at[idx, 'pixel_mis'] = nodata_count
        analyzed_polygons_count += 1
        # Print message after values have been added as columns
        print(f"Values added as columns for polygon {idx}")
    except Exception as e:
        print(f"Skipping polygon {idx} due to geometry problems: {e}")

# Print the number of polygons analyzed
print(f"{analyzed_polygons_count} polygons analyzed.")

# Save the updated shapefile
output_shapefile_path = 'DIRECTORY_PATH/inpe_file_2008_2020_valid_legal&illegalclassification_ClassificationSuitability.shp'
deforest_polygons.to_file(output_shapefile_path)

# Summing deforested area by suitability and legality category

In [None]:
import geopandas as gpd

# Read the shapefile
shapefile2 = gpd.read_file('DIRECTORY_PATH/inpe_file_2008_2020_valid_legal&illegalclassification_ClassificationSuitability.shp')

# Define conditions for filtering
legal_soy_suitable = (shapefile2['legality'] == 'legal') & ((shapefile2['pixel_1'] > shapefile2['pixel_0']) | (shapefile2['pixel_2'] > shapefile2['pixel_0']))
illegal_soy_suitable = (shapefile2['legality'] == 'illegal') & ((shapefile2['pixel_1'] > shapefile2['pixel_0']) | (shapefile2['pixel_2'] > shapefile2['pixel_0']))
legal_not_soy_suitable = (shapefile2['legality'] == 'legal') & ~((shapefile2['pixel_1'] > shapefile2['pixel_0']) | (shapefile2['pixel_2'] > shapefile2['pixel_0']))
illegal_not_soy_suitable = (shapefile2['legality'] == 'illegal') & ~((shapefile2['pixel_1'] > shapefile2['pixel_0']) | (shapefile2['pixel_2'] > shapefile2['pixel_0']))

# Filter shapefile2 for each condition
legal_soy_suitable_deforestation = shapefile2[legal_soy_suitable]
illegal_soy_suitable_deforestation = shapefile2[illegal_soy_suitable]
legal_not_soy_suitable_deforestation = shapefile2[legal_not_soy_suitable]
illegal_not_soy_suitable_deforestation = shapefile2[illegal_not_soy_suitable]

# Calculate total deforestation for each category
total_legal_soy_suitable = legal_soy_suitable_deforestation['area'].sum()
total_illegal_soy_suitable = illegal_soy_suitable_deforestation['area'].sum()
total_legal_not_soy_suitable = legal_not_soy_suitable_deforestation['area'].sum()
total_illegal_not_soy_suitable = illegal_not_soy_suitable_deforestation['area'].sum()

# Print the totals
print("Total Deforestation per Category:")
print(f"Soy Suitable and Legal: {total_legal_soy_suitable} km²")
print(f"Soy Suitable and Illegal: {total_illegal_soy_suitable} km²")
print(f"Not Soy Suitable and Legal: {total_legal_not_soy_suitable} km²")
print(f"Not Soy Suitable and Illegal: {total_illegal_not_soy_suitable} km²")
