In [1]:
import geopandas as gpd
from shapely.geometry import Point

In [2]:
# Load dataset
gdf = gpd.read_file("../../data/europe/CNTR_RG_10M_2024_3035.gpkg")

In [3]:
# Define European countries and microstates
european_countries = {
    'Albania', 'Andorra', 'Armenia', 'Austria', 'Azerbaijan', 'Belarus', 'Belgium',
    'Bosnia and Herzegovina', 'Bulgaria', 'Croatia', 'Cyprus', 'Czechia', 'Denmark',
    'Estonia', 'Finland', 'France', 'Georgia', 'Germany', 'Greece', 'Hungary',
    'Iceland', 'Ireland', 'Italy', 'Kazakhstan', 'Kosovo', 'Latvia', 'Liechtenstein',
    'Lithuania', 'Luxembourg', 'Malta', 'Moldova', 'Monaco', 'Montenegro',
    'Netherlands', 'North Macedonia', 'Norway', 'Poland', 'Portugal', 'Romania',
    'Russian Federation', 'San Marino', 'Serbia', 'Slovakia', 'Slovenia',
    'Spain', 'Sweden', 'Switzerland', 'Türkiye', 'Ukraine', 'United Kingdom',
    'Vatican City', 'Gibraltar', 'Guernsey', 'Jersey', 'Isle of Man'
}
microstates = {
    'Vatican City', 'Gibraltar', 'Guernsey', 'Jersey', 'Isle of Man',
    'San Marino', 'Monaco', 'Liechtenstein', 'Andorra', 'Malta'
}

In [4]:
# Keep only European countries
gdf = gdf[gdf['NAME_ENGL'].isin(european_countries)].copy()

In [5]:
# Explode multipolygons to singleparts
gdf = gdf.explode(index_parts=False).reset_index(drop=True)

In [6]:
# Remove parts west of Iceland and south of Cyprus
# Calculate centroid coordinates in WGS84
centroids = gdf.geometry.centroid
centroids_wgs = gpd.GeoSeries(centroids, crs=gdf.crs).to_crs(epsg=4326)
gdf['lon'] = centroids_wgs.x
gdf['lat'] = centroids_wgs.y

# Filter parts: keep only east of -25° longitude and north of 33° latitude
gdf = gdf[(gdf['lon'] > -25) & (gdf['lat'] > 33)].drop(columns=['lon', 'lat'])

In [7]:
# Reproject to projected CRS and compute area
gdf = gdf.to_crs(epsg=3035)
gdf['area'] = gdf.geometry.area

# Determine minimum area (Liechtenstein)
liechtenstein_area = gdf[gdf['NAME_ENGL'] == 'Liechtenstein']['area'].max()

In [8]:
# Keep large parts or microstates
gdf = gdf[(gdf['area'] > liechtenstein_area) | (gdf['NAME_ENGL'].isin(microstates))].copy()

In [9]:
# Drop temporary area column
gdf = gdf.drop(columns='area')

In [10]:
# Merge parts back into multipolygons
gdf = gdf.dissolve(by='NAME_ENGL', as_index=False)

In [11]:
# Save cleaned outline
gdf.to_file("../../data/europe/countries.gpkg", driver="GPKG")

In [12]:
# Final report
print("Countries:", len(gdf))
print("List:", sorted(gdf['NAME_ENGL'].unique()))

Countries: 54
List: ['Albania', 'Andorra', 'Armenia', 'Austria', 'Azerbaijan', 'Belarus', 'Belgium', 'Bosnia and Herzegovina', 'Bulgaria', 'Croatia', 'Cyprus', 'Czechia', 'Denmark', 'Estonia', 'Finland', 'France', 'Georgia', 'Germany', 'Gibraltar', 'Greece', 'Guernsey', 'Hungary', 'Iceland', 'Ireland', 'Isle of Man', 'Italy', 'Jersey', 'Kazakhstan', 'Latvia', 'Liechtenstein', 'Lithuania', 'Luxembourg', 'Malta', 'Moldova', 'Monaco', 'Montenegro', 'Netherlands', 'North Macedonia', 'Norway', 'Poland', 'Portugal', 'Romania', 'Russian Federation', 'San Marino', 'Serbia', 'Slovakia', 'Slovenia', 'Spain', 'Sweden', 'Switzerland', 'Türkiye', 'Ukraine', 'United Kingdom', 'Vatican City']
