https://geopandas.org/index.html

# Libraries

In [None]:
# importing libraries

import pandas as pd

import matplotlib.pyplot as plt

import geopandas as gpd
from shapely.geometry import Point, LineString

import folium
from folium import Marker, GeoJson
from folium.plugins import MarkerCluster, HeatMap


# Data

### Reading GeoJSON file

In [None]:
wc = gpd.read_file('../input/human-development-index-hdi/countries.geojson')
wc.head(2)

In [None]:
wc.columns

In [None]:
wc.economy

### Plotting geopandas dataframe

In [None]:
fig, ax = plt.subplots(figsize=(10, 5))
wc.plot(ax=ax,color='midnightblue')
plt.show()

### Properties

In [None]:
type(wc.geometry)

In [None]:
len(wc)

In [None]:
wc.geometry[:5]

In [None]:
wc.geometry[:5].area

### Converting back to json


In [None]:
# wc_json = wc.to_json()
# print(wc_json)

### Subsetting map

In [None]:
italy = wc[wc['name'] =='Italy']
italy.plot()

In [None]:
china = wc[wc['name'] =='China']
china.plot()

### Color according to column values

In [None]:
wc.plot(column='labelrank', cmap='Blues_r', figsize=(10, 5))

### cmap

In [None]:
wc.plot(column='labelrank', cmap='Greens', figsize=(10, 5))

### categorical

In [None]:
wc.plot(column='economy', cmap='Greens_r', figsize=(10, 5), legend=True)

### subsetting

In [None]:
na = wc[wc['continent']=='Asia']
na.plot(column='labelrank', cmap='Greens', legend=True, figsize=(10, 5))

### cmap, legend, legend keywords

In [None]:
leg_kwds={'title':'District Number',
          'loc': 'upper left',
          'bbox_to_anchor':(1, 1.03),
          'ncol':3}

na = wc[wc['continent']=='South America']
na.plot(column='admin', cmap='Set2', legend=True, legend_kwds=leg_kwds)

In [None]:
leg_kwds={'title':'District Number',
          'loc': 'upper left',
          'bbox_to_anchor':(1, 1.03),
          'ncol':4}

na = wc[wc['continent']=='Asia']
na.plot(column='admin', cmap='Set2', legend=True, legend_kwds=leg_kwds)

### scheme, k

In [None]:
na.plot(column='labelrank', cmap='Reds', legend=True, scheme='equal_interval', k=2, figsize=(10, 5))

In [None]:
na.plot(column='labelrank', cmap='Reds', legend=True, scheme='equal_interval', k=4, figsize=(10, 5))

In [None]:
na.plot(column='labelrank', cmap='Reds', legend=True, scheme='quantiles', k=3, figsize=(10, 5))

In [None]:
na.plot(column='labelrank', cmap='Reds', legend=True, scheme='quantiles', k=3, figsize=(10, 5))

### Ploting points over base map

In [None]:
# ! ls ../input/natural-earth/110m_cultural/

In [None]:
# most populated cities
cities = gpd.read_file('../input/natural-earth/110m_cultural/ne_110m_populated_places.shp')
# cities.head(2)

In [None]:
fig, ax = plt.subplots(figsize=(12, 6))
wc.plot(ax=ax, color='lightgrey')
cities.plot(ax=ax, color='darkorange', markersize=10)
ax.set_axis_off()

In [None]:
brussels = cities.loc[170, 'geometry']
print(brussels)
print(type(brussels))
brussels

### Subsetting and plotting

In [None]:
belgium = wc[wc['name']=='Belgium']['geometry'].squeeze()
uk = wc[wc['name']=='United Kingdom']['geometry'].squeeze()
germany = wc[wc['name']=='Germany']['geometry'].squeeze()
ireland = wc[wc['name']=='Ireland']['geometry'].squeeze()

gpd.GeoSeries([belgium, uk, germany, ireland]).plot()

In [None]:
# .crs

In [None]:
# to crs

In [None]:
# .area .centroid

## .distance() .contains() .within() .touches() .intersects()

### .distance()

In [None]:
brussels = cities.loc[170, 'geometry']
dublin = cities.loc[156, 'geometry']

brussels.distance(dublin)

### .contains()

In [None]:
belgium.contains(brussels)

In [None]:
ireland.contains(brussels)

### .within()

In [None]:
brussels.within(belgium)

### .touches()

In [None]:
belgium.touches(germany)

In [None]:
belgium.touches(uk)

### Creating line from points

In [None]:
# creating line 
dublin_brussels_line = LineString(zip((brussels.x,dublin.x ), (brussels.y, dublin.y)))

fig, ax = plt.subplots()
gpd.GeoSeries([belgium, uk, germany, ireland]).plot(color='gainsboro', ax=ax)
gpd.GeoSeries([dublin_brussels_line]).plot(color='deeppink', ax = ax)
ax.set_axis_off()

### .intersects()

In [None]:
for i in [belgium, uk, germany, ireland]:
    print(dublin_brussels_line.intersects(i))

### Plotting line over base map

In [None]:
rivers = gpd.read_file('../input/natural-earth/110m_physical/ne_110m_rivers_lake_centerlines.shp')
# rivers.head(2)

In [None]:
fig, ax = plt.subplots(figsize=(12, 6))
wc.plot(ax=ax, color='gainsboro')
rivers.plot(ax=ax, color='teal', markersize=10)
ax.set_axis_off()

In [None]:
amazon = rivers[rivers['name']=='Amazonas']
amazon

In [None]:
amazon.geometry

In [None]:
amazon.geometry.squeeze()

In [None]:
fig, ax = plt.subplots(figsize=(12, 6))
wc[wc['continent']=='South America'].plot(ax=ax, color='gainsboro')
rivers[rivers['name']=='Amazonas'].plot(ax=ax, color='teal', markersize=10)
ax.set_axis_off()

### Subsetting dataframes

In [None]:
print(wc.shape)
mask = wc.intersects(amazon.geometry.squeeze())
wc[mask]

In [None]:
mask = wc.intersects(dublin)
wc[mask]

# NYC wild areas and campsites

In [None]:
full_data = gpd.read_file("../input/geospatial-learn-course-data/DEC_lands/DEC_lands/DEC_lands.shp")
full_data.head(2)

In [None]:
type(full_data)

In [None]:
data = full_data.loc[:, ["CLASS", "COUNTY", "geometry"]].copy()

In [None]:
# How many lands of each type are there?
data['CLASS'].value_counts()

In [None]:
# Select lands that fall under the "WILD FOREST" or "WILDERNESS" category
wild_lands = data.loc[data.CLASS.isin(['WILD FOREST', 'WILDERNESS'])].copy()
wild_lands.head()

In [None]:
wild_lands.plot()

In [None]:
wild_lands.geometry.head()

In [None]:
# Campsites in New York state (Point)
POI_data = gpd.read_file("../input/geospatial-learn-course-data/DEC_pointsinterest/DEC_pointsinterest/Decptsofinterest.shp")
campsites = POI_data.loc[POI_data.ASSET=='PRIMITIVE CAMPSITE'].copy()

# Foot trails in New York state (LineString)
roads_trails = gpd.read_file("../input/geospatial-learn-course-data/DEC_roadstrails/DEC_roadstrails/Decroadstrails.shp")
trails = roads_trails.loc[roads_trails.ASSET=='FOOT TRAIL'].copy()

# County boundaries in New York state (Polygon)
counties = gpd.read_file("../input/geospatial-learn-course-data/NY_county_boundaries/NY_county_boundaries/NY_county_boundaries.shp")

In [None]:
ax = counties.plot(figsize=(10, 10), color='none', edgecolor='grey', zorder=3)
wild_lands.plot(color='teal', ax=ax)
campsites.plot(color='red', markersize=2, ax=ax)
trails.plot(color='black', markersize=1, ax=ax)

In [None]:
regions = gpd.read_file("../input/geospatial-learn-course-data/ghana/ghana/Regions/Map_of_Regions_in_Ghana.shp")
print(regions.crs)

In [None]:
# Create a DataFrame with health facilities in Ghana
facilities_df = pd.read_csv("../input/geospatial-learn-course-data/ghana/ghana/health_facilities.csv")

# Convert the DataFrame to a GeoDataFrame
facilities = gpd.GeoDataFrame(facilities_df, geometry=gpd.points_from_xy(facilities_df.Longitude, facilities_df.Latitude))

ax = regions.plot(figsize=(8,8), color='whitesmoke', linestyle=':', edgecolor='black')
facilities.plot(markersize=1, ax=ax)

In [None]:
# Set the coordinate reference system (CRS) to EPSG 4326
facilities.crs = {'init': 'epsg:4326'}

# Create a map
ax = regions.plot(figsize=(8,8), color='whitesmoke', linestyle=':', edgecolor='black')
facilities.to_crs(epsg=32630).plot(markersize=1, ax=ax)

In [None]:
# The "Latitude" and "Longitude" columns are unchanged
facilities.head()

In [None]:
# The "Latitude" and "Longitude" columns are unchanged
facilities.to_crs(epsg=32630).head()

In [None]:
# Load the data and print the first 5 rows
birds_df = pd.read_csv("../input/geospatial-learn-course-data/purple_martin.csv", parse_dates=['timestamp'])
print("There are {} different birds in the dataset.".format(birds_df["tag-local-identifier"].nunique()))
birds_df.head()

In [None]:
# Create the GeoDataFrame
birds = gpd.GeoDataFrame(birds_df, geometry=gpd.points_from_xy(birds_df["location-long"], birds_df["location-lat"]))

# Set the CRS to {'init': 'epsg:4326'}
birds.crs = {'init' :'epsg:4326'}

In [None]:
# Load a GeoDataFrame with country boundaries in North/South America, print the first 5 rows
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
americas = world.loc[world['continent'].isin(['North America', 'South America'])]
americas.head()

In [None]:
ax = americas.plot(figsize=(10,10), color='white', linestyle=':', edgecolor='gray')
birds.plot(ax=ax, markersize=10)

In [None]:
# GeoDataFrame showing path for each bird
path_df = birds.groupby("tag-local-identifier")['geometry'].apply(list).apply(lambda x: LineString(x)).reset_index()
path_gdf = gpd.GeoDataFrame(path_df, geometry=path_df.geometry)
path_gdf.crs = {'init' :'epsg:4326'}

# GeoDataFrame showing starting point for each bird
start_df = birds.groupby("tag-local-identifier")['geometry'].apply(list).apply(lambda x: x[0]).reset_index()
start_gdf = gpd.GeoDataFrame(start_df, geometry=start_df.geometry)
start_gdf.crs = {'init' :'epsg:4326'}

# Show first five rows of GeoDataFrame
start_gdf.head()

In [None]:
# Your code here
end_df = birds.groupby("tag-local-identifier")['geometry'].apply(list).apply(lambda x: x[-1]).reset_index()
end_gdf = gpd.GeoDataFrame(end_df, geometry=end_df.geometry)
end_gdf.crs = {'init': 'epsg:4326'}

In [None]:
# Your code here
ax = americas.plot(figsize=(10, 10), color='white', linestyle=':', edgecolor='gray')

start_gdf.plot(ax=ax, color='red',  markersize=30)
path_gdf.plot(ax=ax, cmap='tab20b', linestyle='-', linewidth=1, zorder=1)
end_gdf.plot(ax=ax, color='black', markersize=30)


In [None]:
# Path of the shapefile to load
protected_filepath = "../input/geospatial-learn-course-data/SAPA_Aug2019-shapefile/SAPA_Aug2019-shapefile/SAPA_Aug2019-shapefile-polygons.shp"

# Your code here

protected_areas = gpd.read_file(protected_filepath)

# Country boundaries in South America
south_america = americas.loc[americas['continent']=='South America']

# Your code here: plot protected areas in South America
ax = south_america.plot(figsize=(10,10), color='white', edgecolor='gray')
protected_areas.plot(ax=ax, alpha=0.4)

In [None]:
P_Area = sum(protected_areas['REP_AREA']-protected_areas['REP_M_AREA'])

# Your code here: Calculate the total area of South America (in square kilometers)
totalArea = sum(south_america.geometry.to_crs(epsg=3035).area) / 10**6

# What percentage of South America is protected?
percentage_protected = P_Area/totalArea
print('Approximately {}% of South America is protected.'.format(round(percentage_protected*100, 2)))

In [None]:
# Your code here
ax = south_america.plot(figsize=(10,10), color='white', edgecolor='gray')
protected_areas[protected_areas['MARINE']!='2'].plot(ax=ax, alpha=0.4, zorder=1)
birds[birds.geometry.y < 0].plot(ax=ax, color='red', alpha=0.6, markersize=10, zorder=2)

https://datascience103579984.wordpress.com/2019/11/25/working-with-geospatial-data-in-python-from-datacamp/

https://www.datacamp.com/community/tutorials/geospatial-data-python

# Geocode

In [None]:
from geopandas.tools import geocode

In [None]:
geocode("Taj Mahal")

In [None]:
geocode("The White House")

In [None]:
result = geocode("The Great Pyramid of Giza", provider="nominatim")
result

In [None]:
point = result.geometry.iloc[0]
print("Latitude:", point.y)
print("Longitude:", point.x)

In [None]:
universities = pd.read_csv("../input/geospatial-learn-course-data/top_universities.csv")
universities.head()

In [None]:
import numpy as np

def my_geocoder(row):
    try:
        point = geocode(row, provider='nominatim').geometry.iloc[0]
        return pd.Series({'Latitude': point.y, 'Longitude': point.x, 'geometry': point})
    except:
        return None

universities[['Latitude', 'Longitude', 'geometry']] = universities.apply(lambda x: my_geocoder(x['Name']), axis=1)

print("{}% of addresses were geocoded!".format(
    (1 - sum(np.isnan(universities["Latitude"])) / len(universities)) * 100))

# Drop universities that were not successfully geocoded
universities = universities.loc[~np.isnan(universities["Latitude"])]
universities = gpd.GeoDataFrame(universities, geometry=universities.geometry)
universities.crs = {'init': 'epsg:4326'}
universities.head()

# Table joins

In [None]:
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
europe = world.loc[world.continent == 'Europe'].reset_index(drop=True)

europe_stats = europe[["name", "pop_est", "gdp_md_est"]]
europe_boundaries = europe[["name", "geometry"]]

In [None]:
europe_boundaries.head()

In [None]:
europe_stats.head()

In [None]:
europe = europe_boundaries.merge(europe_stats, on="name")
europe.head()

In [None]:
# Use spatial join to match universities to countries in Europe
european_universities = gpd.sjoin(universities, europe)

# Investigate the result
print("We located {} universities.".format(len(universities)))
print("Only {} of the universities were located in Europe (in {} different countries).".format(
    len(european_universities), len(european_universities.name.unique())))

european_universities.head()

In [None]:
# Load and preview Starbucks locations in California
starbucks = pd.read_csv("../input/geospatial-learn-course-data/starbucks_locations.csv")
starbucks.head()

In [None]:
# How many rows in each column have missing values?
print(starbucks.isnull().sum())

# View rows with missing locations
rows_with_missing = starbucks[starbucks["City"]=="Berkeley"]
rows_with_missing

In [None]:
# Your code here
def my_geocoder(row):
    point = geocode(row, provider='nominatim').geometry[0]
    return pd.Series({'Longitude': point.x, 'Latitude': point.y})

berkeley_locations = rows_with_missing.apply(lambda x: my_geocoder(x['Address']), axis=1)
starbucks.update(berkeley_locations)

print(starbucks.isnull().sum())

In [None]:
# Create a base map
m_2 = folium.Map(location=[37.88,-122.26], zoom_start=13)

# Your code here: Add a marker for each Berkeley location
for idx, row in starbucks[starbucks["City"]=='Berkeley'].iterrows():
    Marker([row['Latitude'], row['Longitude']]).add_to(m_2)
    
# Show the map
m_2

In [None]:
CA_counties = gpd.read_file("../input/geospatial-learn-course-data/CA_county_boundaries/CA_county_boundaries/CA_county_boundaries.shp")
CA_pop = pd.read_csv("../input/geospatial-learn-course-data/CA_county_population.csv", index_col="GEOID")
CA_high_earners = pd.read_csv("../input/geospatial-learn-course-data/CA_county_high_earners.csv", index_col="GEOID")
CA_median_age = pd.read_csv("../input/geospatial-learn-course-data/CA_county_median_age.csv", index_col="GEOID")

In [None]:
cols_to_add = CA_pop.join([CA_high_earners, CA_median_age]).reset_index()
CA_stats = CA_counties.merge(cols_to_add, on="GEOID")

In [None]:
CA_stats["density"] = CA_stats["population"] / CA_stats["area_sqkm"]

In [None]:
sel_counties = CA_stats[((CA_stats.high_earners > 100000) &
                         (CA_stats.median_age < 38.5) &
                         (CA_stats.density > 285) &
                         ((CA_stats.median_age < 35.5) |
                         (CA_stats.density > 1400) |
                         (CA_stats.high_earners > 500000)))]

In [None]:
starbucks_gdf = gpd.GeoDataFrame(starbucks, geometry=gpd.points_from_xy(starbucks.Longitude, starbucks.Latitude))
starbucks_gdf.crs = {'init': 'epsg:4326'}

In [None]:
# Fill in your answer
locations_of_interest = gpd.sjoin(starbucks_gdf, sel_counties)
num_stores = len(locations_of_interest)

In [None]:
import math

In [None]:
# Create a base map
m_6 = folium.Map(location=[37,-120], zoom_start=6)

# Your code here: show selected store locations
mc = MarkerCluster()

locations_of_interest = gpd.sjoin(starbucks_gdf, sel_counties)
for idx, row in locations_of_interest.iterrows():
    if not math.isnan(row['Longitude']) and not math.isnan(row['Latitude']):
        mc.add_child(folium.Marker([row['Latitude'], row['Longitude']]))

m_6.add_child(mc)

# Uncomment to see a hint
#q_6.hint()

# Show the map
m_6

In [None]:
releases = gpd.read_file("../input/geospatial-learn-course-data/toxic_release_pennsylvania/toxic_release_pennsylvania/toxic_release_pennsylvania.shp") 
releases.head()

In [None]:
stations = gpd.read_file("../input/geospatial-learn-course-data/PhillyHealth_Air_Monitoring_Stations/PhillyHealth_Air_Monitoring_Stations/PhillyHealth_Air_Monitoring_Stations.shp")
stations.head()

In [None]:
print(stations.crs)
print(releases.crs)

In [None]:
# Select one release incident in particular
recent_release = releases.iloc[360]

# Measure distance from release to each station
distances = stations.geometry.distance(recent_release.geometry)
distances

In [None]:
print('Mean distance to monitoring stations: {} feet'.format(distances.mean()))


In [None]:
print('Closest monitoring station ({} feet):'.format(distances.min()))
print(stations.iloc[distances.idxmin()][["ADDRESS", "LATITUDE", "LONGITUDE"]])

In [None]:
two_mile_buffer = stations.geometry.buffer(2*5280)
two_mile_buffer.head()

In [None]:
# Create map with release incidents and monitoring stations
m = folium.Map(location=[39.9526,-75.1652], zoom_start=11)
HeatMap(data=releases[['LATITUDE', 'LONGITUDE']], radius=15).add_to(m)
for idx, row in stations.iterrows():
    Marker([row['LATITUDE'], row['LONGITUDE']]).add_to(m)
    
# Plot each polygon on the map
GeoJson(two_mile_buffer.to_crs(epsg=4326)).add_to(m)

# Show the map
m

In [None]:
# Turn group of polygons into single multipolygon
my_union = two_mile_buffer.geometry.unary_union
print('Type:', type(my_union))

# Show the MultiPolygon object
my_union

In [None]:
# The closest station is less than two miles away
my_union.contains(releases.iloc[360].geometry)

In [None]:
# The closest station is more than two miles away
my_union.contains(releases.iloc[358].geometry)