# Final Project: Earth Analytics Python Course, Spring 2020
## Steph Shepherd & Lauren Herwehe

Short description of the purpose of the code

In [1]:
# Import libraries
import warnings
from glob import glob
import os

import numpy as np
import numpy.ma as ma
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import geopandas as gpd

import earthpy as et
import earthpy.spatial as es
import earthpy.plot as ep

# Set working directory
os.chdir(os.path.join(et.io.HOME, 'earth-analytics', 'data'))

### Psuedocode
Overlay projected dam locations and rivers (with additional attributes on degree of regulations) on top of protected areas to identify which and to what degree protected areas are impacted by proposed hydropower dams


1. Get Data from figshare download (all are shapefiles)
    * Ramsar Sites
    * Proposed Dams
2. Open each shapefile
    * check CRS
    * select the desired attributes for each shapefile and export to a new geopandas dataframe
    * fill any NA values
3. Select Sites
    * use a buffer of (5 km?, 10 km?, maybe both) to select all Ramsar sites within the buffer distance from a proposed dam
    * save this as a new geopandas dataframe
4. Calculate stats
    * area of land impacted? (from Ramsar site shapefiles)
    * 
4. Create Deliverables - plots/figures
    * Map of all the sites
    * Bar plot Number of sites per region (continent, country, ???)
    * Map of one or two countries/regions
    * Graph comparing these regions.



In [2]:
#Download Data
#Ramsar Sites
et.data.get_data(url='https://ndownloader.figshare.com/articles/12217679')

#Future dams
et.data.get_data(url='https://ndownloader.figshare.com/articles/12216467')

Downloading from https://ndownloader.figshare.com/articles/12217679
Downloading from https://ndownloader.figshare.com/articles/12216467


'/home/jovyan/earth-analytics/data/earthpy-downloads/12216467'

In [3]:
# Open the necessary shapefiles with geopandas
ramsar_areas = gpd.read_file(os.path.join("final-project-data", "ramsar-boundaries", "features_publishedPolygon.shp"))

# Check the crs of the files
print(ramsar_areas.crs)


# # Clean the no data values in each shapefile
# sjer_roads_cl["RTTYP"] = sjer_roads_cl["RTTYP"].fillna("Unknown")
# sjer_roads_cl = sjer_roads_cl[~sjer_roads_cl.is_empty]

epsg:4326


In [6]:
# Open the necessary csv files with pandas
fname = os.path.join("final-project-data", "future-dams", "future_dams_2015.csv")
dam_locs = pd.read_csv(fname)

dam_locs


Unnamed: 0,DAM_ID,Project name,Continent,Country,Main_river,Major Basin,Capacity (MW),LAT_cleaned,Lon_Cleaned,Stage,Start,End,Reference 1,Reference 2,Reference 3
0,0,Susitna-Watana,North America,United States,Susitna,Pacific and Arctic Coast 1,600.00,62.786772,-148.025780,P,2012.0,2023.0,http://www.hydroreform.org/projects/susitna-wa...,,
1,1,Site C,North America,Canada,Peace,Mackenzie,900.00,56.225245,-120.950245,P,2014.0,2020.0,http://en.wikipedia.org/wiki/Site_C_dam,https://www.sitecproject.com/sites/default/fil...,
2,2,Gull Island,North America,Canada,Churchill,Churchill,2250.00,53.032879,-61.212954,U,,2028.0,http://en.wikipedia.org/wiki/Lower_Churchill_P...,http://www.nalcorenergy.com/lower-churchill-pr...,
3,3,Muskrat,North America,Canada,Churchill,Churchill,824.00,53.244625,-60.773792,U,,2017.0,http://en.wikipedia.org/wiki/Lower_Churchill_P...,http://www.nalcorenergy.com/lower-churchill-pr...,
4,4,Conawapa,North America,Canada,Nelson,Saskatchewan - Nelson,1485.00,56.535417,-94.029465,P,,2022.0,http://www.energymanitoba.org/hp_conawapa.htm,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3695,3777,Esperanza De Atirro,North America,Costa Rica,Parismina,Southern Central America,3.40,9.810665,-83.648165,P,,,Plantas y proyectos,,
3696,3778,Rio Banano,North America,Costa Rica,Estrella,Southern Central America,20.00,9.892825,-83.164583,P,,,Plantas y proyectos,,
3697,3779,Tarcoles,North America,Costa Rica,Tárcoles / Virilla / Colorado,Southern Central America,19.27,9.916664,-84.466669,P,,,Plantas y proyectos,,
3698,3780,Ruf,North America,Costa Rica,Tárcoles / Virilla / Colorado,Southern Central America,20.00,9.910417,-84.487152,P,,,Plantas y proyectos,,


In [None]:

# # Check the crs of the files
# print(ca_counties.crs)
# print(roads.crs)

# Reproject both layers to epsg 0570
ca_counties_5070 = ca_counties.to_crs(epsg=5070)
roads_5070 = roads.to_crs(epsg=5070)

# Select only the three counties of interest
three_counties = ca_counties_5070[ca_counties_5070['NAME'].isin(
    ["Siskiyou", "Modoc", "Del Norte"])]

# # Check the new crs of the files
# print(three_counties_5070.crs)
# print(roads_5070.crs)

# Clip the roads data using the clip_shp module
roads_5070_cl = cl.clip_shp(roads_5070, three_counties)

# Redefine the CRS of the roads layer
roads_5070_cl.crs = three_counties.crs

# Assign the roads to their respective county with a spatial join
roads_region = gpd.sjoin(roads_5070_cl, three_counties,
                         how="inner", op='intersects')

In [None]:
# PLOT 1 - Place only the code required to create a plot of your data here
# Additional processing code can go above this code cell

# Settting color palettes and sizing for roads and points
pointsPalette = {'trees': 'chartreuse',
                 'grass': 'darkgreen', 'soil': 'burlywood'}

roadPalette = {'M': 'grey', 'S': "blue",
               'C': "magenta", 'Unknown': "lightgrey"}

lineWidths = {'M': .5, 'S': 2, 'C': 2, 'Unknown': .5}

# Create figure
fig, ax = plt.subplots(figsize=(10, 10))

for ctype, data in sjer_plots.groupby('plot_type'):
    color = pointsPalette[ctype]
    label = ctype
    data.plot(color=color,
              ax=ax,
              label=label,
              markersize=100)

for ctype, data in sjer_roads_cl.groupby('RTTYP'):
    color = roadPalette[ctype]
    label = ctype
    data.plot(color=color,
              ax=ax,
              linewidth=lineWidths[ctype],
              label=label)

ax.set(title='Madera County Roads and Study Plot Locations')

ax.legend(fontsize=15,
          frameon=False,
          loc=('lower right'),
          title="LEGEND")

ax.set_axis_off()
plt.axis('equal')
### DO NOT REMOVE LINE BELOW ###
plot01_roads_plot_locs = nb.convert_axes(plt, which_axes="current")

In [None]:
# PLOT 2 - Place only the code required to plot your data here
# Additional processing code can go above this code cell
# Important: name your final geodataframe for county boundaries: three_counties

# Plot the data
fig, ax = plt.subplots(figsize=(10, 5))
three_counties.plot(edgecolor="black",
                    facecolor='none',
                    ax=ax)

roads_region.plot(column='NAME',
                  ax=ax,
                  legend=True)

ax.set(title='California Roads in Del Norte, Modoc, and Siskiyou Counties')
ax.set_axis_off()
plt.axis('equal')

### DO NOT REMOVE LINE BELOW ###
plot02_county_roads_clip = nb.convert_axes(plt, which_axes="current")

In [None]:
# TABLE 1 - Place the code required to create the dataframe
# Important: name your final geodataframe: cali_roads_summary

# # Calculate the total length of road in each county
roads_region['length'] = roads_region.length
cali_roads_summary = roads_region[['length', 'NAME']].groupby('NAME').sum()

# Print the new table
print(cali_roads_summary)

In [None]:
# Download the data
data = et.data.get_data(
    url=' https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_0_countries.zip')

# Open the necessary files with geopandas
countries = gpd.read_file(os.path.join("data", "earthpy-downloads",
                                       "ne_10m_admin_0_countries", "ne_10m_admin_0_countries.shp"))

# Subset the data
pop_data = countries[["REGION_WB", "CONTINENT",
                      "POP_RANK", "POP_EST", 'geometry']]

# Dissolve and aggregate the data
mean_region_val = pop_data.dissolve(by='REGION_WB', aggfunc=['sum', 'mean'])

# Getting column names to use in making our plots
list(mean_region_val.columns)

In [None]:
# PLOT 3 - Place only the code required to plot your data here
# Additional processing code can go above this code cell
# Important: name your final geodataframe: mean_region_val
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 10))

mean_region_val.plot(column=('POP_EST', 'sum'),
                     legend=True,
                     cmap='OrRd',
                     ax=ax1)

mean_region_val.plot(column=('POP_RANK', 'mean'),
                     cmap='OrRd',
                     legend=True,
                     ax=ax2)

plt.suptitle('Global Total Estimated Population by Region', fontsize=16)

plt.show()


### DO NOT REMOVE LINE BELOW ###
plot04_global_population = nb.convert_axes(plt, which_axes="all")