## Demo geo_spde.preprocess_coords

#### Libraries

In [None]:
import os
import sys
from pathlib import Path
import numpy as np
import pandas as pd
import geopandas as gpd
import geodatasets
from plotnine import *

from geo_spde.coords import preprocess_coords
from geo_spde.exceptions import CoordsError

#### Geolocated data

Taken from Shaddick et al 2016:  Ground monitor measurements from Canada, USA, Mexico

In [None]:
pm25 = pd.read_csv('north_america_pm25.csv')

#### World map

From naturalearth,  low res world map with country boundaries.

In [None]:
url = "https://naciscdn.org/naturalearth/110m/cultural/ne_110m_admin_0_countries.zip"
world = gpd.read_file(url)
sorted(world.keys())
na_map = world[world['ISO_A3'].isin(['CAN', 'USA', 'MEX'])]
na_map[['ISO_A3', 'ADMIN']]


#### Plot raw data

Show ground monitor measurements on map, raw coordinate system (lon/lat)

In [None]:
p1 = (ggplot() +
         geom_map(na_map, fill='white', color='black', size=0.2) +
         geom_point(data=pm25,
                   mapping=aes(x='Longitude', y='Latitude', color='PM25'),
                   size=0.1) +
         scale_color_gradient2(low='blue', mid='lightgreen', high='darkorange',
                               midpoint=15,  name='PM2.5') +
         theme_minimal() +
         labs(title="North America PM2.5 Locations, raw coords") +
         coord_fixed())

p1

### geo_spde.preprocess_coords

Remove any duplicate observations, project lon/lat to appropriate map scale.

In [None]:
pm25_coords = pm25[['Longitude','Latitude']].to_numpy()
clean_coords, indices, proj_info = preprocess_coords(pm25_coords)

### Plot preprocessed data

In [None]:
# Create dataframe with projected coordinates
df_projected = pm25.iloc[indices].copy()
df_projected['x_proj'] = clean_coords[:, 0]
df_projected['y_proj'] = clean_coords[:, 1]
df_projected.head(3)

In [None]:
# a. translate `na_map` to albers coords
na_map_albers = na_map.to_crs(proj_info['proj4_string'])

In [None]:
p2 = (ggplot() +
      geom_map(na_map_albers, fill='white', color='black', size=0.2) +
      geom_point(data=df_projected,
                 mapping=aes(x='x_proj', y='y_proj', color='PM25'),
                 size=0.1) +
         scale_color_gradient2(low='blue', mid='lightgreen', high='darkorange',
                               midpoint=15,  name='PM2.5') +
         theme_minimal() +
         labs(title="North America PM2.5 Locations, Albers projection") +
         coord_fixed())

p2

#### Compare to raw data

In [None]:
p1