# Country visualization test

### Installation
1. Create virtual environment from requirements.txt
2. Add kernel to jupyter notebook using *ipython kernel install --user --name=YOUR_VIRTUAL_ENV_NAME*

In [None]:
# Hidden depedency of geopandas: descartes
import geopandas as gpd
import matplotlib.pyplot as plt

## Datasets
Datasets are from [naturalearthdata](https://www.naturalearthdata.com/downloads/10m-cultural-vectors/) with public license, meaning they are free to use for everybody. For countries the dataset **Admin 0 – Countries** is used, for the populated places, the dtaaset **Populated Places** is used.

In [None]:
# read countries from shapefile and plot the all
all_countries = gpd.read_file('data/ne_50m_admin_0_countries/ne_50m_admin_0_countries.shp')
print(f'Data CRS is {all_countries.crs["init"]}')
all_countries.plot()
all_countries.head(4)

## Country visualization
The following visualizations show the country selected in the variable *country_name* below. To see all available countries, check the ADMIN column of the *all_countries* dataframe.

In [None]:
country_name = 'Switzerland'
show_largest_area_only = False

In [None]:
# Extract country from dataset and replace geometry column 
# with largest polygon only if show_largest_area_only is set.
country_4326 = all_countries[all_countries['ADMIN'] == country_name].copy()
if show_largest_area_only:
    try:
        max_area = 0
        for multipoly in country_4326.geometry:
            for poly in multipoly:
                if poly.area > max_area:
                    largest_poly = poly
                    max_area = poly.area
        country_4326['geometry'] = largest_poly
    except TypeError:
        pass

In [None]:
# default plot
country_4326.plot()

In [None]:
# Plot with custom styles
fig, ax = plt.subplots()
plt.axis('off')
country_4326.plot(ax=ax, color='grey', edgecolor='black')

In [None]:
# Reproject data into pseudomercator (3857)
country_3857 = country_4326.to_crs(epsg=3857)
ax.set_axis_off()
country_3857.plot(ax=ax, color='grey', edgecolor='black')

## Point overlay
The dataset **Populated places** contains a wealth of information about the most populated places worldwide. The dataset also consists of a geometry column holding a point feature for each place. In the following, the data is aggregated to show the top 5 most populated places for the selected country.

In [None]:
# Data preparation: Select the 5 most populated places within country_name.
pop_places = gpd.read_file('data/ne_10m_populated_places/ne_10m_populated_places.shp')
country_megacities_top5_4326 = pop_places[(pop_places['ADM0NAME']==country_name)].sort_values(by=['POP_MAX'], ascending=False).head(5)
country_megacities_top5_3857 = country_megacities_top5_4326.to_crs(epsg=3857)
country_megacities_top5_3857

In [None]:
# Overlay with static attributes
fig, ax = plt.subplots()
ax.set_axis_off()
country_3857.plot(ax=ax, color='grey', edgecolor='black')
country_megacities_top5_3857.plot(ax=ax, marker='o', color='red', markersize=50)

In [None]:
# Overlay with different colors based on a simple list, whereby the first entry is blue, the rest in red.
colors = ['b', 'r', 'r', 'r', 'r']

fig, ax = plt.subplots()
ax.set_axis_off()
country_3857.plot(ax=ax, color='grey', edgecolor='black')
gpd.plotting.plot_point_collection(ax, country_megacities_top5_3857['geometry'], color=colors, markersize=50)

In [None]:
# Overlay with different markersizes corresponding to the population sizes.
def get_markersizes(dataseries, min_markersize, max_markersize):
    min_data = dataseries.min()
    max_data = dataseries.max()
    return [(v - min_data) / (max_data - min_data) * (max_markersize - min_markersize) + min_markersize for v in dataseries]

markersizes = get_markersizes(country_megacities_top5_3857['POP_MAX'], 40, 400)

fig, ax = plt.subplots()
ax.set_axis_off()
country_3857.plot(ax=ax, color='grey', edgecolor='black')
gpd.plotting.plot_point_collection(ax, country_megacities_top5_3857['geometry'], color='red', markersize=markersizes)