<a href="https://colab.research.google.com/github/kavyajeetbora/end_to_end_gee_with_python/blob/master/Development/District-Builtup-Index.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [45]:
import ee
import geemap
import geopandas as gpd
import pandas as pd
from tqdm.notebook import tqdm

ee.Authenticate()
ee.Initialize(project='kavyajeetbora-ee')

In [2]:
image_collection = ee.ImageCollection("JRC/GHSL/P2023A/GHS_BUILT_S")

[ee.ImageCollection.aggregate_array](https://developers.google.com/earth-engine/apidocs/ee-imagecollection-aggregate_array):  Aggregates over a given property of the objects in a collection, calculating a list of all the values of the selected property.

In [3]:
years = image_collection.aggregate_array('system:index').getInfo()
print(years)

['1975', '1980', '1985', '1990', '1995', '2000', '2005', '2010', '2015', '2020', '2025', '2030']


In [4]:
# Load the district boundaries dataset
districts = ee.FeatureCollection("FAO/GAUL/2015/level2").filter(ee.Filter.eq('ADM0_NAME', 'India'))
districts = districts.select(['ADM2_CODE', 'ADM2_NAME', 'ADM1_NAME', 'Shape_Area'])
feature = districts.first()

In [48]:
%%time

# Load the GHSL built-up surface dataset
ghsl_built_up = ee.ImageCollection("JRC/GHSL/P2023A/GHS_BUILT_S")

# Function to calculate the built-up surface area for each district for a given year
def calculate_built_up_area(feature, year):

    district_id = feature.get('ADM2_CODE')

    built_up_image = ghsl_built_up.filterDate(f'{year}-01-01', f'{year}-12-31').first()

    built_up_image = built_up_image

    built_up_area = built_up_image.reduceRegion(
        reducer=ee.Reducer.sum(),
        geometry=feature.geometry(),
        scale=100,  # Scale to match GHSL data resolution
        maxPixels=1e9
    ).get('built_surface')

    return feature.set({'district_id':district_id, 'year': year, 'area_m2': built_up_area})

# Initialize an empty list to store results
results_list = []


properties = ['ADM2_CODE', 'ADM1_NAME', 'ADM2_NAME', 'Shape_Area', 'area_m2', 'year']
dfs = []
# Loop through each year and calculate the built-up area for each district
progress_bar1 = tqdm(years, unit="Year", colour='green')
for year in progress_bar1:
    progress_bar1.set_description(f"Processing year: {year}")
    districts_with_built_up_area = districts.map(lambda feature: calculate_built_up_area(feature, year))
    data = {}
    progress_bar2 = tqdm(properties, unit="property", leave=False, colour='blue')
    for prop in progress_bar2:
        progress_bar2.set_description(f"Processing property: {prop}")

        values = districts_with_built_up_area.aggregate_array(prop).getInfo()
        data[prop] = values

    df = pd.DataFrame(data)
    dfs.append(df)

  0%|          | 0/12 [00:00<?, ?Year/s]

  0%|          | 0/6 [00:00<?, ?property/s]

  0%|          | 0/6 [00:00<?, ?property/s]

  0%|          | 0/6 [00:00<?, ?property/s]

  0%|          | 0/6 [00:00<?, ?property/s]

  0%|          | 0/6 [00:00<?, ?property/s]

  0%|          | 0/6 [00:00<?, ?property/s]

  0%|          | 0/6 [00:00<?, ?property/s]

  0%|          | 0/6 [00:00<?, ?property/s]

  0%|          | 0/6 [00:00<?, ?property/s]

  0%|          | 0/6 [00:00<?, ?property/s]

  0%|          | 0/6 [00:00<?, ?property/s]

  0%|          | 0/6 [00:00<?, ?property/s]

CPU times: user 2.15 s, sys: 205 ms, total: 2.35 s
Wall time: 2min 42s


In [49]:
df_final = pd.concat(dfs)
df_final.head()

Unnamed: 0,ADM2_CODE,ADM1_NAME,ADM2_NAME,Shape_Area,area_m2,year
0,17660,Haryana,Karnal,0.240933,17245100.0,1975
1,17661,Haryana,Kurukshetra,0.171739,11947410.0,1975
2,17662,Haryana,Mahendragarh,0.177959,5877115.0,1975
3,17665,Haryana,Sonepat,0.205076,18508350.0,1975
4,70134,Haryana,Kaithal,0.214695,11402000.0,1975


In [40]:
df_final

Index(['geometry', 'ADM1_NAME', 'ADM2_CODE', 'ADM2_NAME', 'Shape_Area',
       'area_m2', 'year'],
      dtype='object')

## Preprocess the geometry


- Simplify the geometry before plotting to optimize the data for faster rendering

In [None]:
geo_df_simplified = gdf.copy()

## Convert the coordinates to meters
geo_df_simplified = geo_df_simplified.to_crs("EPSG:3857")
## Simplify the geometry by tolerance distance: 1 KM
geo_df_simplified['geometry'] = geo_df_simplified['geometry'].simplify(tolerance=1000,preserve_topology=True)

## Reproject to WGS:84
geo_df_simplified = geo_df_simplified.to_crs("EPSG:4326")

memory_usuage = geo_df_simplified.memory_usage(deep=True).sum()/1024
print(f"Memory usuage of this dataframe is {memory_usuage:.2f} KB")