In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd

In [None]:
# Import countries shapefile
fp = os.path.join('data','ne_50m_admin_0_countries','ne_50m_admin_0_countries.shp')
countries = gpd.read_file(fp)
countries.columns = countries.columns.str.lower()  # Simplify column names
countries = countries[['admin', 'type', 'geometry']]
countries.head(3)

In [None]:
# Import Arctic communities GeoJSON directly from URL
URL = 'https://cn.dataone.org/cn/v2/resolve/urn%3Auuid%3Aed7718ae-fb0d-43dd-9270-fbfe80bfc7a4'
communities = gpd.read_file(URL)
communities

In [None]:
# Import country names from URL
URL = 'https://raw.githubusercontent.com/MEDS-eds-220/MEDS-eds-220-course/refs/heads/main/book/chapters/lesson-12-merge-data/country_names.csv'
country_names = pd.read_csv(URL)
country_names

The CRS of the 'communities' geodataframe is EPSG:4326 because all GeoJSON files are given in this CRS


In [None]:
countries.crs == communities.crs

Since the CRS match, we can plot them together

In [None]:
fig, ax = plt.subplots()
countries.plot(ax=ax)
communities.plot(ax=ax)
communities.plot(ax=ax, color='red')
plt.show()

## Arctic communities by country

In [None]:
# Number of arctic communities by country
n_comms = communities.groupby('country').size().reset_index(name='n_communities')
n_comms

## 'if-else' statements

Our goal is to merge 'c_comms' per country with the 'counties' geometries: to merge them, we want them to have a least one column in common. 


In [None]:
set(countries.columns)

In [None]:
set(countries.columns.intersection(n_comms.columns))

In [None]:
common_columns = set(countries.columns).intersection(n_comms.columns)

# Check if there are any common columns
if len(common_columns) != 0:
    print(f"Common columns: {common_columns}")
else:
    print("No columns in common.")

## Check-in

Wrap up the previous code into a function called 'check_common_columns' that prints a message depending on whether two general dataframes haave column names in common

In [None]:
# Check if there are any common column names

# New variables: df2, df2
def check_column_names(df1, df2):
    """
    Prints a message depending whether df1 and df2 have columns in common.
    """

    common_columns = set(df1.columns).intersection(df2.columns)
if len(common_columns) != 0:
    print(f"Common column names: {common_columns}")
else:
    print("No column names in common.")
#return: common_columns

## Merging data frames

We want to: 

1. use the 'country_names' data frame to add the country names to the 'c_comms' data.
2. merge the resulting data frame with 'country_names'

To merge dataframes we can use 'pandas.merge()' function:
```python
output_df = pd.merge(left_df,                   #df "on the left side"
                    right_df,                   #df "on the right side"
                    how = type_of_join,         # type of join, default = inner join
                    on = column_to_join)        # which column to join on, must be in both dataframes

In [None]:
n_comms = pd.merge(n_comms,
                    country_names,
                    how = 'left',
                    on = 'country')

In [None]:
check_column_names(n_comms, countries)

## Check-in 
create a nrw variable called 'arcti_countries' which is the result of an inner join between our countires and c_comms dataframes.


In [None]:
arctic_countries = pd.merge(countries,
                            n_comms,
                            how = 'inner',
                            on = 'admin')

In [None]:
arctic_countries

Checking the resulting we notice we lost the Aland Islands. Let's double check it.

In [None]:
# Update Index
arctic_countries = arctic_countries.set_index('admin')
arctic_countries

In [None]:
# Check Aland isalnd is nowhere in data frame
'Aland Islands' in arctic_countries.values

The Aland Islands is an autonomous region of Finland and there is one Arctic community registered in this region.
We will directly add one to Finland to account for this.  

In [None]:
arctic_countries.at['Finland', 'n_communities'] += 1

In [None]:
arctic_countries.at['Finland', 'n_communities']

## Chloropleth map

To make the map we just need to specify the 'column' parameter in 'plot()' and make it equal to the column with the values we want to plot in each country:

In [None]:
arctic_countries.plot(column='n_communities',
                      legend=True)

## Reprojecting

'to_crs()' method for 'geopandas.GeoDataFrame'

```python
updated_geodf.to_crs(new_crs)
```

here `new_crs` is the CRS we want to convert to, this is an object of type CRS or aa string representing the CRS

In [None]:
# Reproject to CRS optimized for Arctic region
arctic_countries = arctic_countries.to_crs('epsg:3413')
arctic_countries.plot()

In [None]:
fig, ax = plt.subplots(figsize=(8, 6))

# Remove the axis for a cleaner map
ax.axis('off')

# Create choropleth map of communities
# Plot with refined color and edge style
arctic_countries.plot(
    ax=ax,
    column='n_communities',
    cmap='PuBuGn',
    legend=True,
    edgecolor="0.6",
    linewidth=0.5,
    legend_kwds={
        "shrink": 0.7,
        "label": "Number of Arctic Communities",
        "orientation": "horizontal",
        "pad": 0.05
    }
)

# Add title and subtitle for better context
ax.set_title('Distribution of Arctic Communities', fontsize=18, weight='bold', pad=15)

plt.show()

# 