## Setup

In [None]:
!pip install geopandas

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting geopandas
  Downloading geopandas-0.13.0-py3-none-any.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m17.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting fiona>=1.8.19 (from geopandas)
  Downloading Fiona-1.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.0/16.0 MB[0m [31m45.2 MB/s[0m eta [36m0:00:00[0m
Collecting pyproj>=3.0.1 (from geopandas)
  Downloading pyproj-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.7/7.7 MB[0m [31m77.8 MB/s[0m eta [36m0:00:00[0m
Collecting click-plugins>=1.0 (from fiona>=1.8.19->geopandas)
  Downloading click_plugins-1.1.1-py2.py3-none-any.whl (7.5 kB)
Collecting cligj>=0.5 (from fiona>=1.8.19->geopandas)
  Do

In [None]:
# Import the necessary libraries
import geopandas as gpd
import pandas as pd
import numpy as np
from geopandas.tools import sjoin
from pyproj import CRS
import altair as alt

## Read Continents File
downloaded as geojson from https://hub.arcgis.com/datasets/esri::world-continents/explore?location=-0.937843%2C-0.000006%2C2.64)


In [None]:
gdf_countries = gpd.read_file('../data/World_Continents.geojson')

# Check the CRS of the GeoDataFrame
print(gdf_countries.crs)

# If the CRS is not set, you can set it using the EPSG code
gdf_countries.crs = CRS.from_epsg(4326)

# Remove unused columns
gdf_countries = gdf_countries.loc[:, ['CONTINENT', 'geometry']]

gdf_countries.head()

EPSG:4326


Unnamed: 0,CONTINENT,geometry
0,Africa,"MULTIPOLYGON (((35.48832 -21.68500, 35.45222 -..."
1,Asia,"MULTIPOLYGON (((-179.99999 68.98009, -179.9580..."
2,Australia,"MULTIPOLYGON (((158.88218 -54.71139, 158.87967..."
3,North America,"MULTIPOLYGON (((-81.67847 7.38861, -81.64945 7..."
4,Oceania,"MULTIPOLYGON (((179.99999 -16.96574, 179.98468..."


## Read Lakes File
downloaded from https://climate.esa.int/documents/1704/lakes_cci_v2.0.2_data_availability_shp.zip

In [None]:
# Read in the shapefile as a GeoDataFrame
gdf_lakes = gpd.read_file('../data/lakescci_v2.0.2_data-availability.shp', decimal=",")

# If the CRS is not set, you can set it using the EPSG code
gdf_lakes.crs = CRS.from_epsg(4326)

# Check the CRS of the GeoDataFrame
print(gdf_lakes.crs)

# Remove unused columns
gdf_lakes = gdf_lakes.loc[:, ['id', 'short_name', 'lat centre', 'lon centre', 'geometry']]

gdf_lakes.head()

EPSG:4326


Unnamed: 0,id,short_name,lat centre,lon centre,geometry
0,200000004,CGL200000004,355208,-8625,"POLYGON ((-0.70279 35.59445, -0.69584 35.59445..."
1,200000001,CGL200000001,356792,71375,"POLYGON ((7.06101 35.72891, 7.11188 35.73034, ..."
2,97,GLWD00000097,642097,-953819,"POLYGON ((-94.30139 63.99069, -94.30139 63.991..."
3,200000006,CGL200000006,234125,306292,"MULTIPOLYGON (((30.46388 23.39306, 30.46388 23..."
4,200000007,CGL200000007,-207792,253792,"POLYGON ((25.72040 -20.11587, 25.50332 -20.583..."


## Spatial join on both dataframes

In [None]:
# Perform the spatial join
joined_gdf = sjoin(gdf_lakes, gdf_countries, how='inner', op='intersects')

# Check the CRS of the GeoDataFrame
print(joined_gdf.crs)

# Remove unused columns
joined_gdf = joined_gdf.loc[:, ['id', 'short_name', 'lat centre', 'lon centre', 'CONTINENT', 'geometry']]

# Print the result
joined_gdf.head()

  if (await self.run_code(code, result,  async_=asy)):


EPSG:4326


Unnamed: 0,id,short_name,lat centre,lon centre,CONTINENT,geometry
0,200000004,CGL200000004,355208,-8625,Africa,"POLYGON ((-0.70279 35.59445, -0.69584 35.59445..."
1,200000001,CGL200000001,356792,71375,Africa,"POLYGON ((7.06101 35.72891, 7.11188 35.73034, ..."
3,200000006,CGL200000006,234125,306292,Africa,"MULTIPOLYGON (((30.46388 23.39306, 30.46388 23..."
4,200000007,CGL200000007,-207792,253792,Africa,"POLYGON ((25.72040 -20.11587, 25.50332 -20.583..."
5,200000008,CGL200000008,-206375,259792,Africa,"POLYGON ((26.03061 -20.20133, 26.10532 -20.338..."


## Calculate area and create count and area sum per continent


In [None]:
# Convert CRS to EPSG 3035
joined_gdf = joined_gdf.to_crs(epsg=3035)

# Calculate the area of each polygon in square meters
joined_gdf['area_m2'] = joined_gdf.area

# Convert the area to square kilometers
joined_gdf['area_km2'] = joined_gdf['area_m2'] / 1000000
joined_gdf['area_km2'] = joined_gdf['area_km2'].round()

# Convert lat and lon cols to float
joined_gdf['lat centre'] = joined_gdf['lat centre'].str.replace(',', '.').astype(float)
joined_gdf['lon centre'] = joined_gdf['lon centre'].str.replace(',', '.').astype(float)

joined_gdf = joined_gdf.loc[:, ['id', 'short_name','lat centre','lon centre', 'area_km2', 'CONTINENT']]
joined_gdf = joined_gdf.rename(columns={'lat centre': 'lat', 'lon centre': 'lon'})

In [None]:
result = joined_gdf.groupby('CONTINENT').agg({'area_km2': 'sum', 'id': 'count', 'lat': 'mean', 'lon': 'mean'}).reset_index()
result.columns = ['continent', 'area_km2', 'count', 'lat', 'lon']

result['percent_area'] = ((result['area_km2'] / result['area_km2'].sum()) * 100).round()
result['percent_count'] = ((result['count'] / result['count'].sum()) * 100).round()

# adding coords and order for manual placement of chart type scatter and donut
continent_coords = pd.DataFrame({
    'continent': ['North America', 'South America', 'Africa', 'Europe', 'Asia', 'Oceania', 'Australia'],
    'x': [1, 1.5, 2, 2, 3, 3.5, 3],
    'y': [3, 1, 1, 3, 2.5, 1.5, 1],
    'order': [7,6,5,1,2,3,4]
})

result = pd.merge(result, continent_coords, on='continent')

# remove Oceania because of small values
result = result[result['continent'].str.contains('Oceania')==False ]

result.to_csv('cci_lakes_continents.csv', index=False)
result.head(8)

Unnamed: 0,continent,area_km2,count,lat,lon,percent_area,percent_count,x,y,order
0,Africa,317100.0,162,1.056443,23.899519,21.0,8.0,2.0,1.0,5
1,Asia,296845.0,600,40.86325,91.754766,20.0,30.0,3.0,2.5,2
2,Australia,29520.0,53,-30.014702,133.553492,2.0,3.0,3.0,1.0,4
3,Europe,111445.0,291,55.431677,23.221636,7.0,14.0,2.0,3.0,1
4,North America,615899.0,734,52.07993,-100.241754,41.0,36.0,1.0,3.0,7
6,South America,135829.0,171,-23.617763,-63.01747,9.0,8.0,1.5,1.0,6


## Create Charts

### Chart Lake Count

In [None]:
# main chart (background)

continent_colors = {
    "Europe": "#6AA2B3",
    "Africa": "#234261",
    "Asia": "#315B86",
    "Australia": "#407597",
    "North America": "#11193D",
    "South America": "#77B0BB"
}

chart = alt.Chart(result).encode(
    theta=alt.Theta("percent_count:Q", stack=True),
    radius=alt.Radius("percent_count", scale=alt.Scale(type="sqrt", zero=True, rangeMin=70, rangeMax=300)),
    color=alt.Color("continent:N", scale=alt.Scale(domain=list(continent_colors.keys()), range=list(continent_colors.values())), legend=None),  # Color by continent
    order=alt.Order("order:Q", sort="ascending"),
    tooltip=['order','continent', 'area_km2']
).mark_arc(innerRadius=70, stroke="transparent"
).properties(
    width=600,
    height=600
).configure(
    padding={"left": 40, "top": 40, "right": 40, "bottom": 40},
    background='#DDCDB7'
).configure_view(
    strokeWidth=0
)

chart


### Chart Lake Area

In [None]:
# main chart (background)

continent_colors = {
    "Europe": "#6AA2B3",
    "Africa": "#234261",
    "Asia": "#315B86",
    "Australia": "#407597",
    "North America": "#11193D",
    "South America": "#77B0BB"
}


chart = alt.Chart(result).encode(
    theta=alt.Theta("percent_area:Q", stack=True),
    radius=alt.Radius("percent_area", scale=alt.Scale(type="sqrt", zero=True, rangeMin=70, rangeMax=300)),
    color=alt.Color("continent:N", scale=alt.Scale(domain=list(continent_colors.keys()), range=list(continent_colors.values())), legend=None),  # Color by continent
    order=alt.Order("order:Q", sort="ascending"),
    tooltip=['order','continent', 'area_km2']
).mark_arc(innerRadius=70, stroke="transparent"
).properties(
    width=600,
    height=600
).configure(
    padding={"left": 40, "top": 40, "right": 40, "bottom": 40},
    background='#DDCDB7'
).configure_view(
    strokeWidth=0
)

chart


### Posters

In [None]:
## Bauhaus Style Poster: Count

# main chart
continent_colors = {
    "Europe": "#6AA2B3",
    "Africa": "#234261",
    "Asia": "#315B86",
    "Australia": "#407597",
    "North America": "#11193D",
    "South America": "#77B0BB"
}

chart = alt.Chart(result).encode(
    theta=alt.Theta("percent_count:Q", stack=True),
    radius=alt.Radius("percent_count", scale=alt.Scale(type="sqrt", zero=True, rangeMin=70, rangeMax=300)),
    color=alt.Color("continent:N", scale=alt.Scale(domain=list(continent_colors.keys()), range=list(continent_colors.values())), legend=None),  # Color by continent
    order=alt.Order("order:Q", sort="ascending"),
    tooltip=['order','continent', 'area_km2']
).mark_arc(innerRadius=70, stroke="transparent"
).properties(
    width=763,
    height=800
)

# title text
title = alt.Chart().mark_text(
    text='Lakes monitored\nfrom Space',
    size=35,
    font='Helvetica',
    fontWeight='bold',
    color='#333333',
    align='left'
).encode(
    x=alt.value(0),
    y=alt.value(900)
)

# detail text
detail = alt.Chart().mark_text(
    text='cci lake data by count and continent',
    size=20,
    font='Helvetica',
    fontWeight='lighter',
    color='#333333',
    align='left'
).encode(
    x=alt.value(0),
    y=alt.value(950)
)

# Layering shapes and text
poster = alt.layer(chart, title, detail + logo).configure_view(
    strokeWidth=0,
    width=763,
    height=1080
).configure(
    padding={"left": 40, "top": 40, "right": 40, "bottom": 100},
    background='#DDCDB7'
).configure_view(
    strokeWidth=0
)

poster

In [None]:
## Bauhaus Style Poster: Area

# main chart
continent_colors = {
    "Europe": "#6AA2B3",
    "Africa": "#234261",
    "Asia": "#315B86",
    "Australia": "#407597",
    "North America": "#11193D",
    "South America": "#77B0BB"
}


chart = alt.Chart(result).encode(
    theta=alt.Theta("percent_area:Q", stack=True),
    radius=alt.Radius("percent_area", scale=alt.Scale(type="sqrt", zero=True, rangeMin=70, rangeMax=300)),
    color=alt.Color("continent:N", scale=alt.Scale(domain=list(continent_colors.keys()), range=list(continent_colors.values())), legend=None),  # Color by continent
    order=alt.Order("order:Q", sort="ascending"),
    tooltip=['order','continent', 'area_km2']
).mark_arc(innerRadius=70, stroke="transparent"
).properties(
    width=763,
    height=800
)

# title text
title = alt.Chart().mark_text(
    text='Lakes monitored\nfrom Space',
    size=35,
    font='Helvetica',
    fontWeight='bold',
    color='#333333',
    align='left'
).encode(
    x=alt.value(0),
    y=alt.value(900)
)

# detail text
detail = alt.Chart().mark_text(
    text='cci lake data by area and continent',
    size=20,
    font='Helvetica',
    fontWeight='lighter',
    color='#333333',
    align='left'
).encode(
    x=alt.value(0),
    y=alt.value(950)
)

# Layering shapes and text
poster = alt.layer(chart, title, detail + logo).configure_view(
    strokeWidth=0,
    width=763,
    height=1080
).configure(
    padding={"left": 40, "top": 40, "right": 40, "bottom": 100},
    background='#DDCDB7'
).configure_view(
    strokeWidth=0
)

poster