In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
from shapely.geometry import Point, Polygon, LineString
import fiona
from datetime import datetime
import plotly.graph_objects as go
import shapely
import json
import networkx as nx
mapbox_access_token = "pk.eyJ1IjoidGhhbXN1cHBwIiwiYSI6ImNrN3Z4eTk2cTA3M2czbG5udDBtM29ubGIifQ.3UvulsJUb0FSLnAOkJiRiA"

## Reading HUC12 Data

https://mapshaper.org/ - website to simplify HUC boundaries by as much as 10x without losing much information

In [2]:
layers = fiona.listlayers('wbdhu12_a_us_september2019.gdb')
# Read into a geodataframe
huc12 = gpd.read_file('wbdhu12_a_us_september2019.gdb', layer = 'WBDHU12')

# Convert to lat long
huc12.crs = {'init': 'epsg:2283'}

In [3]:
# Filter for the rows that contain the CB states
bay_states = ['MD', 'PA', 'DC', 'VA']
keep = []
for i in bay_states:
    for x in huc12['STATES'].unique():
        if i in x:
            keep.append(x)
        else:
            pass
keephuc = huc12[huc12['STATES'].isin(keep)]
keephuc = keephuc.reset_index(drop = True)

keephuc['HUC12'] = keephuc['HUC12'].apply(lambda x: x[1:])
keephuc['TOHUC'] = keephuc['TOHUC'].apply(lambda x: x[1:])

In [6]:
keephuc

Unnamed: 0,TNMID,METASOURCEID,SOURCEDATADESC,SOURCEORIGINATOR,SOURCEFEATUREID,LOADDATE,GNIS_ID,AREAACRES,AREASQKM,STATES,HUC12,NAME,HUTYPE,HUMOD,TOHUC,NONCONTRIBUTINGAREAACRES,NONCONTRIBUTINGAREASQKM,Shape_Length,Shape_Area,geometry
0,{0F437B7A-8CC7-4910-87AE-470DF920F088},,,,,2013-01-18T07:08:10,,9569.63,38.73,"DE,MD",20402050501,Muddy Run,S,NM,20402050502,0.0,0.0,0.336632,0.004062,"MULTIPOLYGON (((-75.69696 39.63148, -75.69653 ..."
1,{8970F771-7C32-49A3-9F1C-4D818EAD488C},{ED602145-9201-4827-9CE1-05D252484579},,,,2017-10-03T20:10:48,,28148.18,113.91,"DE,MD",20403030302,Saint Martin River,S,NM,20403030304,0.0,0.0,0.636515,0.011751,"MULTIPOLYGON (((-75.23082 38.48462, -75.23013 ..."
2,{1E1062FD-3DFC-44DD-A1E4-49C3899A33E0},,,,,2013-01-18T07:08:10,,17256.73,69.84,"DE,MD",20403030303,Assawoman Bay,S,NM,20403030304,0.0,0.0,0.480219,0.007206,"MULTIPOLYGON (((-75.05235 38.45161, -75.05240 ..."
3,{9FDAEFFF-4386-409F-BAB5-C2D1D450DDEF},{398C29E1-B69C-4748-A790-3729C5B5D492},,,,2017-10-17T09:14:29,,24274.41,98.24,"DE,MD",20600020204,Long Creek-Back Creek,S,NM,20600020205,0.0,0.0,0.524234,0.010291,"MULTIPOLYGON (((-75.74141 39.57760, -75.74140 ..."
4,{1B20CDFC-A859-4937-BA8B-3DD93DF90631},{398C29E1-B69C-4748-A790-3729C5B5D492},,,,2017-10-17T09:14:29,,18380.59,74.38,"DE,MD",20600020205,Perch Creek-Elk River,S,NM,20600020207,0.0,0.0,0.509279,0.007797,"MULTIPOLYGON (((-75.75512 39.59106, -75.75537 ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3056,{3E7EDF3C-A490-4F2A-BC60-7EA6576AD444},{41ABE014-FF3A-40BF-B22D-BAB6D1A13610},,,,2017-10-17T09:14:35,,10143.15,41.05,MD,20600060301,Southwest Branch Western Branch Patuxent River,S,NM,20600060302,0.0,0.0,0.412000,0.004248,"MULTIPOLYGON (((-76.84130 38.92683, -76.84117 ..."
3057,{A60A4E2D-56F8-4471-8E6E-759405B2EB67},{41ABE014-FF3A-40BF-B22D-BAB6D1A13610},,,,2017-10-17T09:14:35,,19512.97,78.97,MD,20600060302,Northwest Branch Western Branch Patuxent River,S,NM,20600060304,0.0,0.0,0.498164,0.008187,"MULTIPOLYGON (((-76.80349 39.00022, -76.80311 ..."
3058,{C61E8B31-AE6B-4F34-9B75-66D3F28C6A99},{41ABE014-FF3A-40BF-B22D-BAB6D1A13610},,,,2016-06-11T08:00:21,,14520.78,58.76,"MD,VA",20700080905,Sugarland Run,M,NM,20700081005,0.0,0.0,0.427630,0.006109,"MULTIPOLYGON (((-77.33384 39.05951, -77.33385 ..."
3059,{AFA78C7D-5DD0-47D9-990D-7DC28FA13236},{41ABE014-FF3A-40BF-B22D-BAB6D1A13610},,,,2013-01-18T07:09:05,,37272.75,150.84,"MD,VA",20700081004,Difficult Run,S,NM,20700081005,0.0,0.0,0.737205,0.015674,"MULTIPOLYGON (((-77.34241 39.00855, -77.34240 ..."


In [144]:
# To simplify
keephuc_gdf = gpd.GeoDataFrame(keephuc)
keephuc_gdf.to_file('keephuc.shp')

In [4]:
# Load the simplified GDF

keephuc = gpd.read_file('keephuc/keephuc.shp')

In [5]:
keephuc['centroid'] = keephuc['geometry'].apply(lambda x: x.centroid)
keephuc['centroid_long'] = keephuc['centroid'].apply(lambda x: x.x)
keephuc['centroid_lat'] = keephuc['centroid'].apply(lambda x: x.y)
keephuc = keephuc.drop('centroid', axis = 1)

In [26]:
keephuc.to_file('keephuc/keephuc_new.shp')

In [27]:
# Filter for the rows that contain the CB states
bay_states = ['MD', 'PA', 'DC']
keep = []
for i in bay_states:
    for x in keephuc['STATES'].unique():
        if i in x:
            keep.append(x)
        else:
            pass
keephuc = keephuc[keephuc['STATES'].isin(keep)]
keephuc = keephuc.reset_index(drop = True)

In [31]:
keephuc.columns

Index(['TNMID', 'METASOURCE', 'SOURCEDATA', 'SOURCEORIG', 'SOURCEFEAT',
       'LOADDATE', 'GNIS_ID', 'AREAACRES', 'AREASQKM', 'STATES', 'HUC12',
       'NAME', 'HUTYPE', 'HUMOD', 'TOHUC', 'NONCONTRIB', 'NONCONTR_1',
       'Shape_Leng', 'Shape_Area', 'geometry', 'centroid_long',
       'centroid_lat'],
      dtype='object')

In [33]:
keephuc[['HUC12', 'Shape_Leng', 'Shape_Area', 'HUC12', 'NAME', 'centroid_long', 'centroid_lat']].to_csv('huc_data.csv')

# Water Quality Data

In [7]:
water_subset = pd.read_csv('water_subset.csv')


Columns (6,10,14,16,23,28,32,35,38,43,46,47,49,63,67) have mixed types.Specify dtype option on import or set low_memory=False.



## Looking at Point Source Data

In [65]:
ps_data = pd.read_csv('PointSourceLoadDataState.csv')
ps_data['LATITUDE'] = ps_data['LATITUDE'].apply(lambda x: round(x, 5))
ps_data['LONGITUDE'] = ps_data['LONGITUDE'].apply(lambda x: round(x, 5))
ps_data = ps_data.dropna(subset = ['LATITUDE'])


Columns (2,3,16) have mixed types.Specify dtype option on import or set low_memory=False.



In [66]:
ps_data['coordinates'] = ps_data.apply(lambda row: (row['LONGITUDE'], row['LATITUDE']), axis = 1)
point_sources = ps_data.drop_duplicates('coordinates').reset_index(drop = True)
point_sources = point_sources.dropna(subset = ['LATITUDE'])
point_sources['geometry'] = point_sources.apply(lambda row: shapely.geometry.Point(row['LONGITUDE'], row['LATITUDE']), axis = 1)

## Watershed Data

In [264]:
rivers = gpd.read_file('P6Beta_v3_LRSegs_081516/P6Beta_v3_LRSegs_081516.shp')
rivers = rivers.to_crs(epsg = 4326)

In [265]:
# Generate centroids

rivers['centroid'] = rivers['geometry'].apply(lambda row: row.centroid)
rivers['LON'] = rivers['centroid'].apply(lambda row: row.x)
rivers['LAT'] = rivers['centroid'].apply(lambda row: row.y)

# Subset by watershed
rivers = rivers.loc[rivers['Region'] == 'Chesapeake Bay Watershed', :]

rivers_condensed = rivers.loc[rivers['Watershed'] == 'Susquehanna River Basin', :].reset_index(drop = True)

# Convert Polygon into a geojson
rivers_condensed['geometry_json'] = rivers_condensed['geometry'].apply(lambda x: json.dumps(shapely.geometry.mapping(x)))

In [260]:
# 92-segment data
segments = gpd.read_file('Chesapeake_Bay_92_Segments-shp/Chesapeake_Bay_92_Segments.shp')
segments['centroid'] = segments['geometry'].apply(lambda row: row.centroid)
segments['LON'] = segments['centroid'].apply(lambda row: row.x)
segments['LAT'] = segments['centroid'].apply(lambda row: row.y)
segments['geometry_json'] = segments['geometry'].apply(lambda x: json.dumps(shapely.geometry.mapping(x)))

### Finding which HUCs the Point Sources are in

In [67]:
# For every point source, find out the HUC12 it is in
points = list(point_sources['coordinates'])

In [68]:
# Function that finds the HUC that the pollution point source is in

def find_huc(coord):
    point = shapely.geometry.Point(coord[0], coord[1])
    for i in range(len(keephuc)):
        huc_geom = keephuc['geometry'][i]

        if point.within(huc_geom):
            print('Found!')
            huc = keephuc['HUC12'][i]
            print(huc)
            return huc
    print('None found')
    return 'None'

In [69]:
# Code that finds the HUCs that every pollution point source (coordinates) is in
points_huc_dict = {}
for point in points:
    find_huc(point)
    
    points_huc_dict.update({point: find_huc(point)})

Found!
20700100301
Found!
20700100301
Found!
20700081005
Found!
20700081005
Found!
20700100103
Found!
20700100103
Found!
20700100204
Found!
20700100204
Found!
20700100204
Found!
20700100204
Found!
20700100103
Found!
20700100103
Found!
20700100301
Found!
20700100301
Found!
20700100103
Found!
20700100103
Found!
20700100102
Found!
20700100102
Found!
20700100301
Found!
20700100301
Found!
20700100301
Found!
20700100301
Found!
20600031204
Found!
20600031204
Found!
20600010000
Found!
20600010000
Found!
20600031203
Found!
20600031203
Found!
20600050509
Found!
20600050509
Found!
20600020602
Found!
20600020602
Found!
20801100301
Found!
20801100301
Found!
20600020202
Found!
20600020202
Found!
20801090501
Found!
20801090501
Found!
20600030405
Found!
20600030405
Found!
20700080202
Found!
20700080202
Found!
20600031203
Found!
20600031203
Found!
20600031203
Found!
20600031203
Found!
20700020301
Found!
20700020301
Found!
20600031202
Found!
20600031202
Found!
20600031202
Found!
20600031202
Found!
20600

Found!
20700100102
Found!
20700100102
Found!
20600060401
Found!
20600060401
Found!
20600060402
Found!
20600060402
Found!
20600060104
Found!
20600060104
Found!
20600020407
Found!
20600020407
Found!
20700090505
Found!
20700090505
Found!
20700110304
Found!
20700110304
Found!
20600031202
Found!
20600031202
Found!
20600060403
Found!
20600060403
Found!
20700040807
Found!
20700040807
Found!
20600050503
Found!
20600050503
Found!
20700110401
Found!
20700110401
Found!
20801110202
Found!
20801110202
Found!
20700020602
Found!
20700020602
Found!
20700110701
Found!
20700110701
Found!
20600020104
Found!
20600020104
Found!
20600031203
Found!
20600031203
Found!
20600060504
Found!
20600060504
Found!
20801090406
Found!
20801090406
Found!
20600031203
Found!
20600031203
Found!
20801100602
Found!
20801100602
Found!
20801100602
Found!
20801100602
Found!
20801100405
Found!
20801100405
Found!
20600020104
Found!
20600020104
Found!
20700110304
Found!
20700110304
Found!
20600060202
Found!
20600060202
Found!
20700

Found!
20700100102
Found!
20700100102
Found!
20600020409
Found!
20600020409
Found!
20700110702
Found!
20700110702
Found!
20600030402
Found!
20600030402
Found!
20700041103
Found!
20700041103
Found!
20600031102
Found!
20600031102
Found!
20700040501
Found!
20700040501
Found!
20600060301
Found!
20600060301
Found!
20700041103
Found!
20700041103
Found!
20600031203
Found!
20600031203
Found!
20801100401
Found!
20801100401
Found!
20503061604
Found!
20503061604
Found!
20600031102
Found!
20600031102
Found!
20600030402
Found!
20600030402
Found!
20700080401
Found!
20700080401
Found!
20600020202
Found!
20600020202
Found!
20600060301
Found!
20600060301
Found!
20600020410
Found!
20600020410
Found!
20600060501
Found!
20600060501
Found!
20600050205
Found!
20600050205
Found!
20600030602
Found!
20600030602
Found!
20801110204
Found!
20801110204
Found!
20600040101
Found!
20600040101
Found!
20801110303
Found!
20801110303
Found!
20700110401
Found!
20700110401
Found!
20600060301
Found!
20600060301
Found!
20600

Found!
20600030602
Found!
20600030602
Found!
20600060401
Found!
20600060401
Found!
20600031203
Found!
20600031203
Found!
20700110901
Found!
20700110901
Found!
20700090402
Found!
20700090402
Found!
20600030901
Found!
20600030901
Found!
20600060203
Found!
20600060203
Found!
20600020101
Found!
20600020101
Found!
20700110902
Found!
20700110902
Found!
20600050206
Found!
20600050206
Found!
20600020103
Found!
20600020103
Found!
20700090604
Found!
20700090604
Found!
20801100303
Found!
20801100303
Found!
20801090406
Found!
20801090406
Found!
20600060304
Found!
20600060304
Found!
20600031203
Found!
20600031203
Found!
20600060202
Found!
20600060202
Found!
20600060301
Found!
20600060301
Found!
20600031103
Found!
20600031103
Found!
20700110401
Found!
20700110401
Found!
20700110902
Found!
20700110902
Found!
20700110401
Found!
20700110401
Found!
20600060403
Found!
20600060403
Found!
20600031204
Found!
20600031204
Found!
20700100102
Found!
20700100102
Found!
20801090305
Found!
20801090305
Found!
20600

Found!
20700081003
Found!
20503061713
Found!
20503061713
Found!
20700100101
Found!
20700100101
Found!
20700041007
Found!
20700041007
Found!
20600031103
Found!
20600031103
Found!
20700090505
Found!
20700090505
Found!
20700080102
Found!
20700080102
Found!
20600050504
Found!
20600050504
Found!
20700041001
Found!
20700041001
Found!
20503061502
Found!
20503061502
Found!
20700110401
Found!
20700110401
Found!
20600031201
Found!
20600031201
Found!
20600030405
Found!
20600030405
Found!
20700080904
Found!
20700080904
Found!
20700100102
Found!
20700100102
Found!
20600031201
Found!
20600031201
Found!
20600040401
Found!
20600040401
Found!
20700041007
Found!
20700041007
Found!
20700100201
Found!
20700100201
Found!
20600030406
Found!
20600030406
Found!
20801100304
Found!
20801100304
Found!
20600020502
Found!
20600020502
Found!
20700081005
Found!
20700081005
Found!
20700041009
Found!
20700041009
Found!
20600030602
Found!
20600030602
Found!
20700081005
Found!
20700081005
Found!
20700081005
Found!
20700

Found!
20600030701
Found!
20600050504
Found!
20600050504
Found!
20600040202
Found!
20600040202
Found!
20600020205
Found!
20600020205
Found!
20700110904
Found!
20700110904
Found!
20801100304
Found!
20801100304
Found!
20600031204
Found!
20600031204
Found!
20600031204
Found!
20600031204
Found!
20600030602
Found!
20600030602
Found!
20600050504
Found!
20600050504
Found!
20700100305
Found!
20700100305
Found!
20600040302
Found!
20600040302
Found!
20600020104
Found!
20600020104
Found!
20600031204
Found!
20600031204
Found!
20600030701
Found!
20600030701
Found!
20600031204
Found!
20600031204
Found!
20600040203
Found!
20600040203
Found!
20600060604
Found!
20600060604
Found!
20600030602
Found!
20600030602
Found!
20600030703
Found!
20600030703
Found!
20600030105
Found!
20600030105
Found!
20600030701
Found!
20600030701
Found!
20600040302
Found!
20600040302
Found!
20600031204
Found!
20600031204
Found!
20600050504
Found!
20600050504
Found!
20600060604
Found!
20600060604
Found!
20600060604
Found!
20600

Found!
20503020104
Found!
20503020104
Found!
20502061205
Found!
20502061205
Found!
20503061105
Found!
20503061105
Found!
20503010101
Found!
20503010101
Found!
20503051001
Found!
20503051001
Found!
20502040106
Found!
20502040106
Found!
20700040302
Found!
20700040302
Found!
20501071006
Found!
20501071006
Found!
20503010305
Found!
20503010305
Found!
20503051010
Found!
20503051010
Found!
20700041002
Found!
20700041002
Found!
20503040803
Found!
20503040803
Found!
20503051011
Found!
20503051011
Found!
20502061102
Found!
20502061102
Found!
20502061202
Found!
20502061202
Found!
20501071002
Found!
20501071002
Found!
20502061205
Found!
20502061205
Found!
20503060601
Found!
20503060601
Found!
20503060401
Found!
20503060401
Found!
20700040806
Found!
20700040806
Found!
20700020506
Found!
20700020506
Found!
20503050408
Found!
20503050408
Found!
20503060804
Found!
20503060804
Found!
20503050605
Found!
20503050605
Found!
20503060102
Found!
20503060102
Found!
20501061208
Found!
20501061208
Found!
20503

Found!
20501060102
Found!
20503030105
Found!
20503030105
Found!
20501061208
Found!
20501061208
Found!
20503060801
Found!
20503060801
Found!
20501060502
Found!
20501060502
Found!
20503011006
Found!
20503011006
Found!
20502010301
Found!
20502010301
Found!
20503060801
Found!
20503060801
Found!
20503020201
Found!
20503020201
Found!
20501070301
Found!
20501070301
Found!
20503061106
Found!
20503061106
Found!
20503050703
Found!
20503050703
Found!
20503020503
Found!
20503020503
Found!
20501061102
Found!
20501061102
Found!
20502061103
Found!
20502061103
Found!
20502061103
Found!
20502061103
Found!
20501061002
Found!
20501061002
Found!
20501061101
Found!
20501061101
Found!
20503030403
Found!
20503030403
Found!
20502040101
Found!
20502040101
Found!
20502061205
Found!
20502061205
Found!
20502040404
Found!
20502040404
Found!
20700040303
Found!
20700040303
Found!
20503061601
Found!
20503061601
Found!
20503060707
Found!
20503060707
Found!
20501061405
Found!
20501061405
Found!
20503050107
Found!
20503

Found!
20501061203
Found!
20501070102
Found!
20501070102
Found!
20501011310
Found!
20501011310
Found!
20501070803
Found!
20501070803
Found!
20503010501
Found!
20503010501
Found!
20501070803
Found!
20501070803
Found!
20503050701
Found!
20503050701
Found!
20503010501
Found!
20503010501
Found!
20503061103
Found!
20503061103
Found!
20700040805
Found!
20700040805
Found!
20700090301
Found!
20700090301
Found!
20503051010
Found!
20503051010
Found!
20503060504
Found!
20503060504
Found!
20503010904
Found!
20503010904
Found!
20503061704
Found!
20503061704
Found!
20700090102
Found!
20700090102
Found!
20700041002
Found!
20700041002
Found!
20503050106
Found!
20503050106
Found!
20503050406
Found!
20503050406
Found!
20503060102
Found!
20503060102
Found!
20503050408
Found!
20503050408
Found!
20503050901
Found!
20503050901
Found!
20503060904
Found!
20503060904
Found!
20503060401
Found!
20503060401
Found!
20503061102
Found!
20503061102
Found!
20700040805
Found!
20700040805
Found!
20503050403
Found!
20503

Found!
20503060401
Found!
20503011006
Found!
20503011006
Found!
20503050107
Found!
20503050107
Found!
20503060402
Found!
20503060402
Found!
20700090301
Found!
20700090301
Found!
20503051011
Found!
20503051011
Found!
20503011004
Found!
20503011004
Found!
20503061201
Found!
20503061201
Found!
20503011006
Found!
20503011006
Found!
20503060902
Found!
20503060902
Found!
20503040301
Found!
20503040301
Found!
20503061704
Found!
20503061704
Found!
20503061106
Found!
20503061106
Found!
20503061104
Found!
20503061104
Found!
20503060704
Found!
20503060704
Found!
20700040101
Found!
20700040101
Found!
20503010901
Found!
20503010901
Found!
20503051006
Found!
20503051006
Found!
20503050307
Found!
20503050307
Found!
20503040503
Found!
20503040503
Found!
20503060707
Found!
20503060707
Found!
20503030801
Found!
20503030801
Found!
20700090301
Found!
20700090301
Found!
20700040101
Found!
20700040101
Found!
20503061701
Found!
20503061701
Found!
20503061204
Found!
20503061204
Found!
20503051011
Found!
20503

Found!
20502061202
Found!
20501040908
Found!
20501040908
Found!
20501071002
Found!
20501071002
Found!
20501040902
Found!
20501040902
Found!
20502060304
Found!
20502060304
Found!
20501070902
Found!
20501070902
Found!
20501071001
Found!
20501071001
Found!
20502060103
Found!
20502060103
Found!
20502061204
Found!
20502061204
Found!
20502040301
Found!
20502040301
Found!
20501060503
Found!
20501060503
Found!
20503010102
Found!
20503010102
Found!
20501070602
Found!
20501070602
Found!
20501050602
Found!
20501050602
Found!
20501060503
Found!
20501060503
Found!
20501071006
Found!
20501071006
Found!
20503010104
Found!
20503010104
Found!
20502060208
Found!
20502060208
Found!
20502061202
Found!
20502061202
Found!
20503010201
Found!
20503010201
Found!
20501060501
Found!
20501060501
Found!
20501071004
Found!
20501071004
Found!
20502061207
Found!
20502061207
Found!
20501070902
Found!
20501070902
Found!
20501070707
Found!
20501070707
Found!
20502010406
Found!
20502010406
Found!
20502050208
Found!
20502

Found!
20502010408
Found!
20502010408
Found!
20502010407
Found!
20502010407
Found!
20502010702
Found!
20502010702
Found!
20502010709
Found!
20502010709
Found!
20502030102
Found!
20502030102
Found!
20501040906
Found!
20501040906
Found!
20502010310
Found!
20502010310
Found!
20502040402
Found!
20502040402
Found!
20502040402
Found!
20502040402
Found!
20502040405
Found!
20502040405
Found!
20501050506
Found!
20501050506
Found!
20502040401
Found!
20502040401
Found!
20502060602
Found!
20502060602
Found!
20501060501
Found!
20501060501
Found!
20501070707
Found!
20501070707
Found!
20502040402
Found!
20502040402
Found!
20502061203
Found!
20502061203
Found!
20502060208
Found!
20502060208
Found!
20502050402
Found!
20502050402
Found!
20502050301
Found!
20502050301
Found!
20502020307
Found!
20502020307
Found!
20502020204
Found!
20502020204
Found!
20502020305
Found!
20502020305
Found!
20503061402
Found!
20503061402
Found!
20503050701
Found!
20503050701
Found!
20503020504
Found!
20503020504
Found!
20503

Found!
20503050203
Found!
20503050203
Found!
20502040402
Found!
20502040402
Found!
20503010104
Found!
20503010104
Found!
20503010401
Found!
20503010401
Found!
20502060602
Found!
20502060602
Found!
20503061702
Found!
20503061702
Found!
20503050905
Found!
20503050905
Found!
20501070707
Found!
20501070707
Found!
20502010102
Found!
20502010102
Found!
20503050504
Found!
20503050504
Found!
20503010406
Found!
20503010406
Found!
20502060701
Found!
20502060701
Found!
20502060602
Found!
20502060602
Found!
20502030408
Found!
20502030408
Found!
20503020106
Found!
20503020106
Found!
20501071006
Found!
20501071006
Found!
20501071001
Found!
20501071001
Found!
20501070902
Found!
20501070902
Found!
20503020502
Found!
20503020502
Found!
20503030101
Found!
20503030101
Found!
20502061203
Found!
20502061203
Found!
20503030302
Found!
20503030302
Found!
20501040908
Found!
20501040908
Found!
20503010405
Found!
20503010405
Found!
20503020101
Found!
20503020101
Found!
20502010306
Found!
20502010306
Found!
20502

Found!
20501070902
Found!
20502040401
Found!
20502040401
Found!
20501040604
Found!
20501040604
Found!
20502010707
Found!
20502010707
Found!
20502050303
Found!
20502050303
Found!
20502061102
Found!
20502061102
Found!
20501060502
Found!
20501060502
Found!
20502010310
Found!
20502010310
Found!
20502040406
Found!
20502040406
Found!
20502040401
Found!
20502040401
Found!
20502040401
Found!
20502040401
Found!
20503050308
Found!
20503050308
Found!
20502040402
Found!
20502040402
Found!
20502010310
Found!
20502010310
Found!
20502040402
Found!
20502040402
Found!
20503060901
Found!
20503060901
Found!
20503051006
Found!
20503051006
Found!
20503030505
Found!
20503030505
Found!
20502010707
Found!
20502010707
Found!
20503050802
Found!
20503050802
Found!
20502010703
Found!
20502010703
Found!
20503010102
Found!
20503010102
Found!
20502010707
Found!
20502010707
Found!
20501060501
Found!
20501060501
Found!
20502010406
Found!
20502010406
Found!
20501070602
Found!
20501070602
Found!
20502061202
Found!
20502

Found!
20503030802
Found!
20501071001
Found!
20501071001
Found!
20503030503
Found!
20503030503
Found!
20503041204
Found!
20503041204
Found!
20503040901
Found!
20503040901
Found!
20502040401
Found!
20502040401
Found!
20503050902
Found!
20503050902
Found!
20700040702
Found!
20700040702
Found!
20501070109
Found!
20501070109
Found!
20502040401
Found!
20502040401
Found!
20503030505
Found!
20503030505
Found!
20503030505
Found!
20503030505
Found!
20503030503
Found!
20503030503
Found!
20502060604
Found!
20502060604
Found!
20503060303
Found!
20503060303
Found!
20502060509
Found!
20502060509
Found!
20501060902
Found!
20501060902
Found!
20503030505
Found!
20503030505
Found!
20501071001
Found!
20501071001
Found!
20503050605
Found!
20503050605
Found!
20501060304
Found!
20501060304
Found!
20503050601
Found!
20503050601
Found!
20502010707
Found!
20502010707
Found!
20502060103
Found!
20502060103
Found!
20503040501
Found!
20503040501
Found!
20503030505
Found!
20503030505
Found!
20503061103
Found!
20503

In [70]:
# Flip around to find point sources in each HUC

points_huc_tuple_rev = [(huc, point) for point, huc in points_huc_dict.items()]

huc_points_dict = {}
for e in points_huc_tuple_rev:
    huc_points_dict.setdefault(e[0], set()).add(e[1])

In [71]:
# Add the HUC data into the Point Source dataset
ps_data['HUC'] = ps_data.apply(lambda row: int(points_huc_dict[row['coordinates']]), axis = 1)
# This is'PointSourceLoadDataState_updated.csv'

### Wrangling with HUC level Point-Source data

In [72]:
ps_data['Date'] = ps_data['DMR_DATE'].apply(lambda x: datetime.strptime(x, '%m/%d/%Y'))
# Convert the date data into Y-M
ps_data['Month'] = ps_data['Date'].dt.to_period('M')

In [73]:
# Subset by HUC, date and month - how much total nitrogen levels are within each HUC, for a certain date and month
ps_data.loc[(ps_data['Month'] == '2013-06') & 
            (ps_data['PARAMETER'] == 'TN') &
            (ps_data['HUC'] == 20600031204), 'VALUE'].sum()

56.454517

In [74]:
# For each facility, calculate the nutrient load for each month
def get_load_data(parameter):

    ps_data_params = ps_data.loc[(ps_data['PARAMETER'] == parameter) | (ps_data['PARAMETER'] == 'FLOW'), ['FACILITY', 'HUC', 'Month', 'PARAMETER', 'VALUE']] \
        .drop_duplicates(subset = ['FACILITY', 'HUC', 'Month', 'PARAMETER']) \
        .pivot(index = ['FACILITY', 'HUC', 'Month'], columns = 'PARAMETER', values = 'VALUE') \
        .reset_index()

    ps_data_params['LOAD'] = ps_data_params['FLOW'] * ps_data_params[parameter] * 80 * 8.344

    # Group by HUC and find total nutrient load for each month
    month_data_huc = ps_data_params.groupby(['HUC', 'Month'])['LOAD'].sum().reset_index()

    # Reshape to get monthly nutrient load for each HUC
    month_data_huc_pivot = month_data_huc.pivot(columns = 'HUC', index = 'Month', values = 'LOAD').fillna(0)
    
    return month_data_huc_pivot

In [79]:
ps_data['Date'].describe()





count                  992136
unique                    109
top       2013-04-30 00:00:00
freq                    34873
first     2005-01-31 00:00:00
last      2013-12-31 00:00:00
Name: Date, dtype: object

In [75]:
# Subset by HUC, find how the TN pollution has changed over time
ps_data.loc[(ps_data['PARAMETER'] == 'NH3'), :] \
    .groupby(['HUC', 'Month'])['VALUE'].sum() \
    .reset_index() \
    .pivot(columns = 'HUC', index = 'Month', values = 'VALUE') \
    .fillna(0)

HUC,20402050502,20403030402,20501011302,20501011304,20501011305,20501011308,20501011309,20501011310,20501030701,20501030707,...,20801110202,20801110203,20801110204,20801110205,20801110302,20801110303,20801110401,20801110502,50200060107,50200060301
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2005-01,5.5,0.0,0.0,0.0,7.49566,0.0,0.0,0.0,0.0,11.34848,...,11.42,0.0,0.0,0.24,0.0,0.20,1.52,0.0,0.0,0.0
2005-02,2.9,0.0,0.0,0.0,7.49566,0.0,0.0,0.0,0.0,11.34848,...,11.42,0.0,0.0,10.61,0.0,0.45,1.52,0.0,0.0,0.0
2005-03,4.9,0.0,0.0,0.0,9.73600,0.0,0.0,0.0,0.0,11.34848,...,11.42,0.0,0.0,3.18,0.0,0.20,1.52,0.0,0.0,0.0
2005-04,14.6,0.0,0.0,0.0,7.49566,0.0,0.0,0.0,0.0,11.34848,...,11.42,0.0,0.0,0.11,0.0,1.00,1.22,0.0,0.0,0.0
2005-05,4.0,0.0,0.0,0.0,8.80000,0.0,0.0,0.0,0.0,11.34848,...,10.40,0.0,0.0,0.08,0.0,0.20,1.83,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2013-08,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.00000,...,0.00,0.0,0.0,0.00,0.0,0.00,0.00,0.0,0.0,0.0
2013-09,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.00000,...,0.00,0.0,0.0,0.00,0.0,0.00,0.00,0.0,0.0,0.0
2013-10,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.00000,...,0.00,0.0,0.0,0.00,0.0,0.00,0.00,0.0,0.0,0.0
2013-11,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.00000,...,0.00,0.0,0.0,0.00,0.0,0.00,0.00,0.0,0.0,0.0


In [95]:
### Code for calculating single parameters of the HUC
parameter = 'TN'

month_data_huc = ps_data.loc[(ps_data['PARAMETER'] == parameter), ['HUC', 'Month', 'PARAMETER', 'VALUE']] \
    .groupby(['HUC', 'Month', 'PARAMETER'])['VALUE'].sum().reset_index() \
    .pivot(index = ['HUC', 'Month'], columns = 'PARAMETER', values = 'VALUE') 
month_data_huc_pivot = month_data_huc.reset_index() \
    .pivot(columns = 'HUC', index = 'Month', values = parameter) \
    .fillna(0)

Output is month_data_huc_pivot, which is a dataframe with HUC as columns and month as rows

In [96]:
# Get the HUCs that are represented in this Point Source dataset
hucs_with_ps = month_data_huc_pivot.columns.tolist()

# Subset HUC dataset to contain only HUCs with point sources
huc_ps = keephuc[keephuc['HUC12'].apply(lambda x: int(x) in hucs_with_ps)].reset_index(drop = True)

# Already converted centroid, LON, LAT, geometry_json
huc_ps['centroid'] = huc_ps['geometry'].apply(lambda row: row.centroid)
huc_ps['LON'] = huc_ps['centroid'].apply(lambda row: row.x)
huc_ps['LAT'] = huc_ps['centroid'].apply(lambda row: row.y)
huc_ps['geometry_json'] = huc_ps['geometry'].apply(lambda x: json.dumps(shapely.geometry.mapping(x)))

# Select columns wanted
huc_ps = huc_ps[['HUC12', 'NAME', 'TOHUC', 'geometry', 'geometry_json', 'centroid', 'LON', 'LAT']]

#huc_ps.to_csv('HUCS_with_ps.csv')

In [97]:
# Indicate month in string 
month = '2010-09'

# Select HUC data for one month - (Input: month_data_huc_pivot)
huc_month = month_data_huc_pivot[month:month].transpose().reset_index()
huc_month['HUC'] = huc_month['HUC'].astype(str)
huc_month.columns = ['HUC12', 'Value']

# Merge the HUC point source data with the HUC data
huc_ps = huc_ps.merge(huc_month, on = 'HUC12')

Now, huc_ps contains the HUC-level values that will be plotted.

### Using HUCs (Old Code)

In [297]:
keephuc['centroid'] = keephuc['geometry'].apply(lambda row: row.centroid)
keephuc['LON'] = keephuc['centroid'].apply(lambda row: row.x)
keephuc['LAT'] = keephuc['centroid'].apply(lambda row: row.y)
# Convert geometry from a shapely object to a geojson
keephuc['geometry_json'] = keephuc['geometry'].apply(lambda x: json.dumps(shapely.geometry.mapping(x)))

In [79]:
# Restrict HUCs to those within a cerain Latitude and Longitude
keephuc = keephuc.loc[(keephuc['LON'] > -76.866) & (keephuc['LON'] < -75.905) & (keephuc['LAT'] > 39.4965) & (keephuc['LAT'] < 40.1349), :]
keephuc['text'] = keephuc.apply(lambda row: '{} -> {}'.format(row['HUC12'], row['TOHUC']), axis = 1)
keephuc = keephuc.reset_index(drop = True)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



## Storing HUC Flows in DAG Data Structure

Inputs: keephuc, huc_points_dict

In [98]:
# EXTERNAL HUC level data - huc_data_dict

# Get the point source: HUC dataset
point_huc_dict = ps_data[['coordinates', 'HUC']].drop_duplicates().set_index('coordinates').to_dict()['HUC']
points_huc_tuple_rev = [(str(b),a) for a,b in point_huc_dict.items()]
# Flip it to get the HUC: point sources dataset
huc_points_dict = {}
for e in points_huc_tuple_rev:
    huc_points_dict.setdefault(e[0], set()).add(e[1])
    
# huc_points_dict is a dictionary of HUC: point sources
# Convert this into huc_num_points_dict (NODE DATA)
huc_num_ps_dict = {key: len(value) for key, value in huc_points_dict.items()}

In [99]:
# Function to get the point:load (TN, TP) data for certain month
def get_huc_load_data_for_month(month):

    tn_huc = get_load_data('TN')
    huc_tn_load_df = tn_huc[month:month].transpose().reset_index()
    huc_tn_load_dict = {str(huc):load for huc, load in zip(huc_tn_load_df.iloc[:,0], huc_tn_load_df.iloc[:, 1])}

    tp_huc = get_load_data('TP')
    huc_tp_load_dict = tp_huc[month:month].transpose().reset_index()
    huc_tp_load_dict = {str(huc):load for huc, load in zip(huc_tp_load_dict.iloc[:,0], huc_tp_load_dict.iloc[:, 1])}
    
    return huc_tn_load_dict, huc_tp_load_dict

In [100]:
def get_huc_cumulative_data(huc_data_dict, col_name):

    huc_data = pd.read_csv('HUCS_with_ps.csv')
    huc_data[['HUC12', 'TOHUC']] = huc_data[['HUC12', 'TOHUC']].astype(str)
    # Get a dataframe with all the HUC flows, and convert to str
    huc_flows = huc_data[['HUC12', 'TOHUC']]

    # List of unique HUCs (i.e. NODES of DAG), some of them might or might not have point sources
    unique_hucs = pd.unique(huc_flows.values.ravel()).tolist()
    unique_hucs = [e for e in unique_hucs]


    # Instantiate directed graph
    graph = nx.DiGraph()
    # Add nodes, with the property being the # point sources (huc_num_points_dict)
    hucs_to_add = [(huc, {'num_ps': (huc_data_dict[huc] if (huc in huc_data_dict) else 0)}) for huc in unique_hucs]
    graph.add_nodes_from(hucs_to_add)

    ### Add edges

    # Store the HUC from and to as EDGES of a directed graph
    huc_flows_edges = [(fr, to) for fr, to in zip(huc_flows['HUC12'], huc_flows['TOHUC'])]
    graph.add_edges_from(huc_flows_edges)

    # For all HUCs, get this sum of point sources for all ancestors of a given HUC + itself
    huc_cum_num_points_dict = {huc: sum(graph.nodes[node]['num_ps'] for node in nx.ancestors(graph, huc)) + graph.nodes[huc]['num_ps']
                               for huc in unique_hucs}
    huc_cum_num_points_df =  pd.DataFrame(huc_cum_num_points_dict.items())
    huc_cum_num_points_df.columns = ['HUC12', col_name]
    
    return huc_cum_num_points_df

In [122]:
# Get all the cumulative HUC data for all months
out_df = get_huc_cumulative_data(huc_num_ps_dict, 'num_ps')
for month in [str(e) for e in sorted(ps_data['Month'].unique())]:
    # Get the individual HUC 
    huc_tn_load_dict, huc_tp_load_dict = get_huc_load_data_for_month(month)
    # Get all the HUC cumulative data for that month
    huc_cumu_data = get_huc_cumulative_data(huc_tn_load_dict, 'tn_load')
    huc_cumu_data = huc_cumu_data.merge(get_huc_cumulative_data(huc_tp_load_dict, 'tp_load'), on = 'HUC12')

    huc_cumu_data['Month'] = month
    out_df = pd.concat([out_df, huc_cumu_data], axis = 0)

In [125]:
out_df.to_csv('HUC_cumu_ps_data.csv', index = None)

In [126]:
water_subset

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Unnamed: 0.1.1,Agency,BiasPC,CloudCover,Comments,Cruise,Database,Date,...,Point,HUC12_,HUCNAME_,FIPS_,COUNTY_,STATE_,coordinates,Year,Month,Day
0,4187,4187,562,,,,SFH/JRM,,CMC,2020-02-18,...,POINT (-76.06811 39.38261),20600010000,Upper Chesapeake Bay,24029,Kent County,MD,"(-76.06811, 39.38261)",2020,2,18
1,4188,4188,563,,,,SFH/JRM,,CMC,2020-02-18,...,POINT (-76.06811 39.38261),20600010000,Upper Chesapeake Bay,24029,Kent County,MD,"(-76.06811, 39.38261)",2020,2,18
2,4189,4189,564,,,,SFH/JRM,,CMC,2020-02-18,...,POINT (-76.06811 39.38261),20600010000,Upper Chesapeake Bay,24029,Kent County,MD,"(-76.06811, 39.38261)",2020,2,18
3,4190,4190,565,,,,SFH/JRM,,CMC,2020-02-18,...,POINT (-76.06811 39.38261),20600010000,Upper Chesapeake Bay,24029,Kent County,MD,"(-76.06811, 39.38261)",2020,2,18
4,4191,4191,566,,,,SFH/JRM,,CMC,2020-02-18,...,POINT (-76.06811 39.38261),20600010000,Upper Chesapeake Bay,24029,Kent County,MD,"(-76.06811, 39.38261)",2020,2,18
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
376901,2191619,2191619,3850542,MDDNR,,Partly To Broken (50-90%),,BAY677,CBP,2016-07-26,...,POINT (-76.40131 38.64498),20600010000,Upper Chesapeake Bay,24041,Talbot County,MD,"(-76.40131, 38.64498)",2016,7,26
376902,2191620,2191620,3850543,MDDNR,,Partly To Broken (50-90%),,BAY677,CBP,2016-07-26,...,POINT (-76.40131 38.64498),20600010000,Upper Chesapeake Bay,24041,Talbot County,MD,"(-76.40131, 38.64498)",2016,7,26
376903,2191621,2191621,3850544,MDDNR,,Partly To Broken (50-90%),,BAY677,CBP,2016-07-26,...,POINT (-76.40131 38.64498),20600010000,Upper Chesapeake Bay,24041,Talbot County,MD,"(-76.40131, 38.64498)",2016,7,26
376904,2191622,2191622,3850545,MDDNR,,Partly To Broken (50-90%),,BAY677,CBP,2016-07-26,...,POINT (-76.40131 38.64498),20600010000,Upper Chesapeake Bay,24041,Talbot County,MD,"(-76.40131, 38.64498)",2016,7,26


https://stackoverflow.com/questions/55224504/efficient-way-of-summing-all-unique-ancestor-values-in-incremental-dag

## Getting HUC-aggregated Point Source Data

In [244]:
a = huc_data.copy()