# Final Project: Earth Analytics Python Course, Spring 2020
## Steph Shepherd & Lauren Herwehe

Short description of the purpose of the code

In [1]:
# Import libraries
import warnings
from glob import glob
import os

import numpy as np
import numpy.ma as ma
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import geopandas as gpd
from geopandas import GeoDataFrame as gdf
from shapely.geometry import Point, Polygon

import urllib.request

import earthpy as et
import earthpy.spatial as es
import earthpy.plot as ep

from zipfile import ZipFile

# Set working directory
os.chdir(os.path.join(et.io.HOME, 'earth-analytics', 'data'))

### Psuedocode
Overlay projected dam locations and rivers (with additional attributes on degree of regulations) on top of protected areas to identify which and to what degree protected areas are impacted by proposed hydropower dams


1. Get Data from figshare download (all are shapefiles)
    * Ramsar Sites
    * Proposed Dams
2. Open each shapefile
    * check CRS
    * select the desired attributes for each shapefile and export to a new geopandas dataframe
    * fill any NA values
3. Select Sites
    * use a buffer of (5 km?, 10 km?, maybe both) to select all Ramsar sites within the buffer distance from a proposed dam
    * save this as a new geopandas dataframe
4. Calculate stats
    * area of land impacted? (from Ramsar site shapefiles)
    * 
4. Create Deliverables - plots/figures
    * Map of all the sites
    * Bar plot Number of sites per region (continent, country, ???)
    * Map of one or two countries/regions
    * Graph comparing these regions.



In [2]:
os.getcwd()

'/home/jovyan/earth-analytics/data'

In [4]:
#Download Data
#Ramsar Sites
urllib.request.urlretrieve("https://ndownloader.figshare.com/articles/12217679/versions/1", "12217679.zip")

#et.data.get_data(url='https://ndownloader.figshare.com/articles/12217679/versions/1')

with ZipFile('12217679.zip', 'r') as zipObj:
    zipObj.extractall("ramsar")

#Future dams
#urllib.request.urlretrieve("https://ndownloader.figshare.com/files/22486157", "22486157.zip")

et.data.get_data(url="https://ndownloader.figshare.com/files/22486157")

#with ZipFile('22486157.zip', 'r') as zipObj:
    #zipObj.extractall("FHReD")

'/home/jovyan/earth-analytics/data/earthpy-downloads/future_dams_2015.csv'

In [5]:
# Open the necessary shapefiles with geopandas
ramsar_areas = gpd.read_file(os.path.join("ramsar", "features_publishedPolygon.shp"))

# Check the crs of the files
print(ramsar_areas.crs)


#Open the cvs file as a geopandas database
fname = os.path.join("earthpy-downloads", "future_dams_2015.csv")
df = pd.read_csv(fname)

# Covert the pandas dataframe to a shapefile for plotting
geometry = [Point(xy) for xy in zip(df.Lon_Cleaned, df.LAT_cleaned)]
crs = {'init': 'epsg:4326'} 
geo_df = gdf(df, crs=crs, geometry=geometry)
geo_df.to_file(driver='ESRI Shapefile', filename='proposed_dams.shp')

proposed_dams = gpd.read_file("proposed_dams.shp")


epsg:4326


  return _prepare_from_string(" ".join(pjargs))


In [6]:
# Not needed will delete
# Open the necessary csv files with pandas
fname = os.path.join("final-project-data", "future-dams", "future_dams_2015.csv")
dam_locs = pd.read_csv(fname)

dam_locs.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3700 entries, 0 to 3699
Data columns (total 15 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   DAM_ID         3700 non-null   int64  
 1   Project name   3700 non-null   object 
 2   Continent      3700 non-null   object 
 3   Country        3700 non-null   object 
 4   Main_river     3670 non-null   object 
 5   Major Basin    3700 non-null   object 
 6   Capacity (MW)  3490 non-null   float64
 7   LAT_cleaned    3700 non-null   float64
 8   Lon_Cleaned    3700 non-null   float64
 9   Stage          3700 non-null   object 
 10  Start          622 non-null    float64
 11  End            450 non-null    float64
 12  Reference 1    3699 non-null   object 
 13  Reference 2    370 non-null    object 
 14  Reference 3    34 non-null     object 
dtypes: float64(5), int64(1), object(9)
memory usage: 433.7+ KB


In [7]:
#Not needed will delete
dam_loc_cont_index = dam_locs.set_index("Continent")

dam_asia = dam_loc_cont_index.loc[["Asia"]]

dam_asia

Unnamed: 0_level_0,DAM_ID,Project name,Country,Main_river,Major Basin,Capacity (MW),LAT_cleaned,Lon_Cleaned,Stage,Start,End,Reference 1,Reference 2,Reference 3
Continent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Asia,1334,Lentekhi,Georgia,Kuban,"Black Sea, South Coast",120.0,42.807366,42.710417,U,2011.0,2016.0,http://hydropower.ge/user_upload/Lentekhi_Hydr...,,
Asia,1335,Tvishi,Georgia,Kr,"Black Sea, South Coast",100.0,42.510302,42.801969,P,1987.0,,www.menr.gov.ge/common/get_doc.aspx?doc_id=7233,,
Asia,1336,Zhoneti,Georgia,Kr,"Black Sea, South Coast",100.0,42.373621,42.713879,P,1987.0,,www.menr.gov.ge/common/get_doc.aspx?doc_id=7234,,
Asia,1337,Namakhvani,Georgia,Kr,"Black Sea, South Coast",250.0,42.417622,42.700956,P,1987.0,,www.menr.gov.ge/common/get_doc.aspx?doc_id=7234,,
Asia,1341,Dzegvi,Georgia,Kr,"Caspian Sea, South West Coast",15.7,41.858119,44.727083,P,2011.0,2016.0,http://hydropower.ge/user_upload/Dzegvi_Hydro_...,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Asia,3716,Longpan,China,Yangtze Basin,Yangtze,4200.0,27.168977,100.077311,P,,,http://tibetanplateau.blogspot.co.uk/2013/03/2...,,
Asia,3717,Maji,China,Salween Basin,Salween,4200.0,27.403090,98.827083,P,,,http://tibetanplateau.blogspot.co.uk/2013/03/2...,,
Asia,3720,Tongka,China,Salween Basin,Salween,,30.468750,96.649547,P,,,http://tibetanplateau.blogspot.co.uk/2013/03/2...,,
Asia,3721,Bangduo,China,Mekong Basin,Mekong,,29.469642,98.393750,P,,,http://tibetanplateau.blogspot.co.uk/2013/03/2...,,


In [8]:
#Group data by Stage and Continent, Stage and Country, Country and capacity?
#stage P = planned and U = under construction

dam_cont_stage = dam_locs.groupby(["Continent"]).get_group("Stage")

dam_cont_stage


KeyError: 'Stage'

In [None]:

# # Check the crs of the files
# print(ca_counties.crs)
# print(roads.crs)

# Reproject both layers to epsg 0570
ca_counties_5070 = ca_counties.to_crs(epsg=5070)
roads_5070 = roads.to_crs(epsg=5070)

# Select only the three counties of interest
three_counties = ca_counties_5070[ca_counties_5070['NAME'].isin(
    ["Siskiyou", "Modoc", "Del Norte"])]

# # Check the new crs of the files
# print(three_counties_5070.crs)
# print(roads_5070.crs)

# Clip the roads data using the clip_shp module
roads_5070_cl = cl.clip_shp(roads_5070, three_counties)

# Redefine the CRS of the roads layer
roads_5070_cl.crs = three_counties.crs

# Assign the roads to their respective county with a spatial join
roads_region = gpd.sjoin(roads_5070_cl, three_counties,
                         how="inner", op='intersects')

In [None]:
# PLOT 1 - Place only the code required to create a plot of your data here
# Additional processing code can go above this code cell

# Settting color palettes and sizing for roads and points
pointsPalette = {'trees': 'chartreuse',
                 'grass': 'darkgreen', 'soil': 'burlywood'}

roadPalette = {'M': 'grey', 'S': "blue",
               'C': "magenta", 'Unknown': "lightgrey"}

lineWidths = {'M': .5, 'S': 2, 'C': 2, 'Unknown': .5}

# Create figure
fig, ax = plt.subplots(figsize=(10, 10))

for ctype, data in sjer_plots.groupby('plot_type'):
    color = pointsPalette[ctype]
    label = ctype
    data.plot(color=color,
              ax=ax,
              label=label,
              markersize=100)

for ctype, data in sjer_roads_cl.groupby('RTTYP'):
    color = roadPalette[ctype]
    label = ctype
    data.plot(color=color,
              ax=ax,
              linewidth=lineWidths[ctype],
              label=label)

ax.set(title='Madera County Roads and Study Plot Locations')

ax.legend(fontsize=15,
          frameon=False,
          loc=('lower right'),
          title="LEGEND")

ax.set_axis_off()
plt.axis('equal')
### DO NOT REMOVE LINE BELOW ###
plot01_roads_plot_locs = nb.convert_axes(plt, which_axes="current")

In [None]:
# PLOT 2 - Place only the code required to plot your data here
# Additional processing code can go above this code cell
# Important: name your final geodataframe for county boundaries: three_counties

# Plot the data
fig, ax = plt.subplots(figsize=(10, 5))
three_counties.plot(edgecolor="black",
                    facecolor='none',
                    ax=ax)

roads_region.plot(column='NAME',
                  ax=ax,
                  legend=True)

ax.set(title='California Roads in Del Norte, Modoc, and Siskiyou Counties')
ax.set_axis_off()
plt.axis('equal')

### DO NOT REMOVE LINE BELOW ###
plot02_county_roads_clip = nb.convert_axes(plt, which_axes="current")

In [None]:
# TABLE 1 - Place the code required to create the dataframe
# Important: name your final geodataframe: cali_roads_summary

# # Calculate the total length of road in each county
roads_region['length'] = roads_region.length
cali_roads_summary = roads_region[['length', 'NAME']].groupby('NAME').sum()

# Print the new table
print(cali_roads_summary)

In [None]:
# Download the data
data = et.data.get_data(
    url=' https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_0_countries.zip')

# Open the necessary files with geopandas
countries = gpd.read_file(os.path.join("data", "earthpy-downloads",
                                       "ne_10m_admin_0_countries", "ne_10m_admin_0_countries.shp"))

# Subset the data
pop_data = countries[["REGION_WB", "CONTINENT",
                      "POP_RANK", "POP_EST", 'geometry']]

# Dissolve and aggregate the data
mean_region_val = pop_data.dissolve(by='REGION_WB', aggfunc=['sum', 'mean'])

# Getting column names to use in making our plots
list(mean_region_val.columns)

In [None]:
# PLOT 3 - Place only the code required to plot your data here
# Additional processing code can go above this code cell
# Important: name your final geodataframe: mean_region_val
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 10))

mean_region_val.plot(column=('POP_EST', 'sum'),
                     legend=True,
                     cmap='OrRd',
                     ax=ax1)

mean_region_val.plot(column=('POP_RANK', 'mean'),
                     cmap='OrRd',
                     legend=True,
                     ax=ax2)

plt.suptitle('Global Total Estimated Population by Region', fontsize=16)

plt.show()


### DO NOT REMOVE LINE BELOW ###
plot04_global_population = nb.convert_axes(plt, which_axes="all")