In [1]:
'''
Getting the ABDU model to work in notebook

EPSG: 5070
'''
import duckdb #version 0.9.2
import geopandas as gpd #version 0.14.1
import leafmap, os, time
from shapely import wkt
import pandas as pd
import pyarrow as pa
import geoarrow.pyarrow as ga
import geoarrow.pandas as _
import rasterio
from rasterio import mask
from shapely.geometry import shape
from threading import Thread, current_thread

con = duckdb.connect()
con.install_extension("spatial")
con.load_extension("spatial")
con.install_extension("azure")
con.load_extension("azure")
con.install_extension("json")
con.load_extension("json")

In [2]:
inaoifile = '28027'
local = True

In [3]:
if local:
    nwiurl = './nwi/**/*.parquet'
else:
    nwiurl = 'azure://abdu/nwi/**/*.parquet'

In [4]:
'''
SELECT fips geometry based on inaoifile to use as aoi for calculation.  All hucs should have center in fips.
'''
con.sql("SET azure_storage_connection_string = 'DefaultEndpointsProtocol=https;AccountName=giscog;EndpointSuffix=core.windows.net';")
con.sql("""
CREATE OR REPLACE TABLE selectedcounty AS
SELECT NAME, STATE_NAME, FIPS, geometry FROM read_parquet('azure://abdu/uscounties.parquet')
WHERE FIPS = '{0}'
""".format(inaoifile))

In [5]:
con.sql('select * from selectedcounty')

┌─────────┬─────────────┬─────────┬────────────────────────────────────────────────────────────────────────────────────┐
│  NAME   │ STATE_NAME  │  FIPS   │                                      geometry                                      │
│ varchar │   varchar   │ varchar │                                        blob                                        │
├─────────┼─────────────┼─────────┼────────────────────────────────────────────────────────────────────────────────────┤
│ Coahoma │ Mississippi │ 28027   │ \x01\x06\x00\x00\x00\x01\x00\x00\x00\x01\x03\x00\x00\x00\x01\x00\x00\x00\x27\x00…  │
└─────────┴─────────────┴─────────┴────────────────────────────────────────────────────────────────────────────────────┘

In [6]:
'''
Read in hucs partitioned to huc2/huc4 level that overlap with the aoi.  Don't clip hucs
'''
con.sql(f"""
CREATE OR REPLACE TABLE huc12 AS
SELECT LEFT(huc12,2) AS huc2,LEFT(huc12,4) AS huc4, huc12, areaacres, huc.geometry
FROM (SELECT huc12, areaacres, geometry FROM read_parquet('azure://abdu/huc/**/*.parquet')
WHERE CAST(LEFT(huc12,2) AS INTEGER)<=12) AS huc
JOIN selectedcounty ON 
ST_Within(ST_Centroid(ST_GeomFromWKB(huc.geometry)), ST_GeomFromWKB(selectedcounty.geometry))
""")

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

In [7]:
#################################
#################################
#################################
#################################
#################################
#################################
#################################
#################################
#################################
#################################
'''
Wetland energy calculation
'''

'\nWetland energy calculation\n'

In [8]:
hucs = con.sql("select huc4 from huc12 GROUP BY huc4").df().values.tolist()
hucs = sorted([item for items in hucs for item in items])
print(hucs)

['0802', '0803']


In [10]:
con.execute("""
    CREATE OR REPLACE TABLE my_wetlands (
        ATTRIBUTE VARCHAR,
        huc2 VARCHAR,
        huc4 VARCHAR,
        huc12 VARCHAR,
        geometry VARCHAR,
    )
""")
def write_from_thread(con):
    local_con = con.cursor()
    huc = str(current_thread().name)
    local_con.sql("SET azure_storage_connection_string = 'DefaultEndpointsProtocol=https;AccountName=giscog;EndpointSuffix=core.windows.net';")
    sql = '''INSERT INTO my_wetlands (SELECT ATTRIBUTE,huc2, huc4, huc12, ST_AsWKB(ST_Intersection(ST_GeomFromWKB(wetlnd.geometry), 
        ST_GeomFromWKB(huc12.geometry))) as geometry
        FROM (SELECT ATTRIBUTE, geometry FROM read_parquet('{1}') 
        WHERE WETLAND_TYPE != 'Riverine' AND huc4 = '{0}') AS wetlnd
        JOIN huc12 ON 
        ST_Intersects(ST_GeomFromWKB(wetlnd.geometry), ST_GeomFromWKB(huc12.geometry)))'''.format(huc, nwiurl)
    result = local_con.execute(sql).fetchall()

In [49]:
threads = []
print(hucs)
for i in range(len(hucs)):
    huc = hucs[i]
    threads.append(Thread(target = write_from_thread,
                            args = (con,),
                            name = huc))

['0802', '0803']


In [50]:
%%time
# Kick off all threads in parallel
for thread in threads:
    thread.start()

# Ensure all threads complete before printing final results
for thread in threads:
    thread.join()

con.sql("""
    CREATE OR REPLACE TABLE wetlands AS 
    SELECT * FROM my_wetlands 
""")

CPU times: total: 10.1 s
Wall time: 2.63 s


In [52]:
'''
Import wetland crossclass data and assign classes to the nwi table
'''
con.sql("""CREATE OR REPLACE TABLE crossnwi AS (UNPIVOT (FROM (SELECT * FROM read_json_auto('aoiWetland.json', maximum_object_size=100000000))) ON COLUMNS(*))""")
con.sql("""CREATE OR REPLACE TABLE crossnwi AS SELECT name, UNNEST(value) AS value FROM crossnwi""")
con.sql("""CREATE OR REPLACE TABLE wetlands AS
SELECT name, huc12, geometry FROM (SELECT DISTINCT geometry, ATTRIBUTE, huc2, huc4, huc12 FROM wetlands) AS wetselect
LEFT JOIN crossnwi ON wetselect.ATTRIBUTE LIKE crossnwi.value
""")
con.sql(f"""CREATE OR REPLACE TABLE wetlands AS
(SELECT replace(wetlands.name, '_', '') AS name, huc12, ST_Area(st_geomfromtext(geometry))*0.0001 AS ha, kcal, kcal*ha AS avalNrgy, st_buffer(st_geomfromtext(geometry),0) as geometry FROM wetlands
LEFT JOIN read_csv_auto('azure://abdu/kcal.csv') ON replace(wetlands.name, '_', '') = read_csv_auto.habitatType
WHERE wetlands.name IS NOT NULL)
""")

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

In [29]:
#################################
#################################
#################################
#################################
#################################
#################################
#################################
#################################
#################################
#################################

In [53]:
'''
Read in PADUS
'''
con.sql("""
CREATE OR REPLACE TABLE protected AS 
SELECT CATEGORY, huc12, huc2, huc4, ST_Intersection(ST_GeomFromWKB(huc12.geometry), ST_GeomFromWKB(prot.geometry)) as geometry
FROM (SELECT CATEGORY, geometry FROM read_parquet('azure://abdu/padus/**/*.parquet')
WHERE CATEGORY IN ('Fee', 'Easements', 'Other') AND huc4 IN {0}) AS prot
JOIN huc12 ON 
ST_Intersects(ST_GeomFromWKB(huc12.geometry), ST_GeomFromWKB(prot.geometry))
""".format(tuple(hucs)))

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

In [80]:
con.sql("""
CREATE OR REPLACE TABLE protwetlands AS
SELECT name, wetlands.huc12, kcal, ST_Intersection(protected.geometry, wetlands.geometry) as geometry
FROM (SELECT ST_Union_Agg(geometry) as geometry from protected) as protected
JOIN wetlands ON 
ST_Intersects(wetlands.geometry, protected.geometry)
""")

In [81]:
con.sql("""
CREATE OR REPLACE TABLE protwetlands AS
SELECT DISTINCT geometry, name, huc12, ST_Area(geometry)*0.0001 AS ProtHabHa, kcal, kcal*ProtHabHa AS protNrgy FROM protwetlands
""")

In [83]:
#################################
#################################
#################################
#################################
#################################
#################################
#################################
#################################
#################################
#################################

In [84]:
# Need huc12 geometry
df = con.sql('SELECT ST_AsText(ST_geomfromwkb(geometry)) as geometry from huc12').df()
df['geometry'] = df['geometry'].apply(wkt.loads)
df = gpd.GeoDataFrame(df, geometry='geometry', crs=5070)

In [85]:
'''
Read in NLCD clipped to hucs
'''
with rasterio.open('https://giscog.blob.core.windows.net/newcontainer/nlcd2019_cog.tif') as src:
    # Clip the raster to the geometry of the shapefile
    clipped_data, transform = mask.mask(src, df.geometry, crop=True)

clipped_data[clipped_data>23]=0
clipped_data[clipped_data<21]=0
clipped_data[clipped_data==21]=1
clipped_data[clipped_data==22]=1
clipped_data[clipped_data==23]=1
shapes = rasterio.features.shapes(clipped_data[0], transform=transform, mask=clipped_data[0] == 1)
# Create a GeoDataFrame from the vector polygons
gdf_vector = gpd.GeoDataFrame({'geometry': [shape(geom) for geom, value in shapes]})
gdf_vector['geometry'] = gdf_vector.to_wkb().geometry
con.sql("CREATE OR REPLACE TABLE urban AS SELECT * EXCLUDE geometry, ST_GeomFromWKB(geometry) AS geometry FROM gdf_vector")

In [89]:
con.sql('select * from urban')
#con.sql('select * from huc12')

┌──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│                                                       geometry                                                       │
│                                                       geometry                                                       │
├──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┤
│ POLYGON ((491985 1284705, 491985 1284645, 492015 1284645, 492015 1284705, 491985 1284705))                           │
│ POLYGON ((492075 1284705, 492075 1284675, 492135 1284675, 492135 1284705, 492075 1284705))                           │
│ POLYGON ((492135 1284675, 492135 1284645, 492165 1284645, 492165 1284675, 492135 1284675))                           │
│ POLYGON ((492195 1284675, 492195 1284645, 492225 1284645, 492225 1284615, 492285 1284615, 492285 1284645, 492255 1…  │
│ POLYGON ((492525 1284615, 4925

In [95]:
con.sql("""
CREATE OR REPLACE TABLE urban AS 
SELECT huc12, ST_Intersection(ST_GeomFromWKB(huc12.geometry), urban.geometry) as geometry
FROM (SELECT geometry FROM urban) as urban
JOIN huc12 ON 
ST_Intersects(ST_GeomFromWKB(huc12.geometry), urban.geometry)
""")

In [96]:
con.sql("""
CREATE OR REPLACE TABLE urbanwetlands AS
SELECT name, wetlands.huc12, kcal, ST_Intersection((urban.geometry), wetlands.geometry) as geometry
FROM (SELECT geometry from urban) as urban
JOIN wetlands ON 
ST_Intersects(wetlands.geometry, (urban.geometry))
""")


In [99]:
con.sql("""
CREATE OR REPLACE TABLE urbanwetlands AS
SELECT DISTINCT geometry, name, huc12, ST_Area(geometry)*0.0001 AS ha, kcal, kcal*ha AS urbanNrgy FROM urbanwetlands
""")

In [102]:
con.sql("""
CREATE OR REPLACE TABLE urban AS
SELECT huc12, ST_Area(geometry)*0.0001 AS urbanHa, geometry FROM urban
""")

In [210]:
con.sql('select huc12, sum(urbanHa) from urban where huc12 = 080302020501 group by huc12')

┌──────────────┬───────────────────┐
│    huc12     │   sum(urbanHa)    │
│   varchar    │      double       │
├──────────────┼───────────────────┤
│ 080302020501 │ 438.7220613664415 │
└──────────────┴───────────────────┘

In [103]:
con.sql("""
CREATE OR REPLACE TABLE unavailable AS
SELECT huc12, ST_Area(geometry)*0.0001 AS unavailHa, ST_Union_Agg(geometry) as geometry FROM
(
SELECT huc12, geometry FROM urban
UNION ALL
SELECT huc12, geometry from protected
)
group by huc12, geometry
""")

In [107]:
#################################
#################################
#################################
#################################
#################################
#################################
#################################
#################################
#################################
#################################
'''
#################################
End of data import
Starting model process
#################################
'''
#### Prepping energy - Join energy to nwi.  Need to create the spatial kcal table first. What's the best way to do this?
# parquet is the best to read in but it's not easily editable.  Rest service would be ok but again, not great because
# reading those is difficult.  I wonder if

'\n#################################\nEnd of data import\nStarting model process\n#################################\n'

In [108]:
'''
Demand

######
Need to proportion demand based on available energy.  Available energy is spatially explicit but demand is at the fips
count level.  We need to calculate total energy and demand at the huc12 scale.
To proportion demand we need to calclulate total energy by fips then calculate how much energy is in each huc12. A proportion
can then be calculated by dividing total energy within a fips by (huc12,fips) group.  Demand at the huc12 level is multiplied
by that energy proportion.
######
'''

'\nDemand\n\n######\nNeed to proportion demand based on available energy.  Available energy is spatially explicit but demand is at the fips\ncount level.  We need to calculate total energy and demand at the huc12 scale.\nTo proportion demand we need to calclulate total energy by fips then calculate how much energy is in each huc12. A proportion\ncan then be calculated by dividing total energy within a fips by (huc12,fips) group.  Demand at the huc12 level is multiplied\nby that energy proportion.\n######\n'

In [140]:
'''
Read in demand clipped by hucs
'''
con.sql(f"""
CREATE OR REPLACE TABLE demandfull AS SELECT * EXCLUDE geometry, ST_GeomFromWKB(read_parquet.geometry) as geometry
FROM read_parquet('azure://abdu/Demand9Species.parquet')
JOIN huc12 ON 
ST_Intersects(ST_GeomFromWKB(read_parquet.geometry), ST_GeomFromWKB(huc12.geometry))
""")
con.sql(f"""
CREATE OR REPLACE TABLE demand AS SELECT * EXCLUDE geometry, ST_Intersection(ST_GeomFromWKB(huc12.geometry), ST_GeomFromWKB(read_parquet.geometry)) as geometry
FROM read_parquet('azure://abdu/Demand9Species.parquet')
JOIN huc12 ON 
ST_Intersects(ST_GeomFromWKB(huc12.geometry), ST_GeomFromWKB(read_parquet.geometry))
""")
con.sql("""CREATE OR REPLACE TABLE demand AS SELECT fips, huc12, CODE, LTADUD, LTADemand, LTAPopObj, x80DUD, X80Demand, X80PopObj, ST_Area(geometry)*0.0001 AS ha, geometry FROM (
SELECT * FROM demand
WHERE species='All')
""")

In [144]:
con.sql("""
CREATE OR REPLACE TABLE demandfull AS SELECT DISTINCT fips, ST_Union_Agg(geometry) as geometry from demandfull
group by fips
""")
fullfips = con.sql("""SELECT fips from demandfull""").df().values.tolist()
listfips = sorted([item for items in fullfips for item in items])
listfips

['05041', '05107', '28011', '28027', '28119', '28133']

In [145]:
con.execute("""
    CREATE OR REPLACE TABLE my_inserts (
        ATTRIBUTE VARCHAR,
        fips VARCHAR,
        geometry VARCHAR
    )""")
def write_from_thread(con):
    local_con = con.cursor()
    fip = str(current_thread().name)
    local_con.sql("SET azure_storage_connection_string = 'DefaultEndpointsProtocol=https;AccountName=giscog;EndpointSuffix=core.windows.net';")
    result = local_con.execute("""
    INSERT INTO my_inserts
    (
    SELECT ATTRIBUTE,fips, ST_Intersection(ST_GeomFromWKB(wetlnd.geometry), dmd.geometry) as geometry
    FROM (SELECT ATTRIBUTE, geometry FROM read_parquet('{1}')
    WHERE huc4 IN {2} AND WETLAND_TYPE != 'Riverine') AS wetlnd
    JOIN (SELECT fips, geometry FROM demandfull WHERE fips='{0}') as dmd ON 
    ST_Intersects(ST_GeomFromWKB(wetlnd.geometry), dmd.geometry)
    )
    """.format(fip, nwiurl, tuple(hucs))).fetchall()

In [147]:
threads = []
for i in range(len(listfips)):
    fip = listfips[i]
    threads.append(Thread(target = write_from_thread,
                            args = (con,),
                            name = fip))

In [148]:
%%time
# Kick off all threads in parallel
for thread in threads:
    thread.start()

# Ensure all threads complete before printing final results
for thread in threads:
    thread.join()

In [167]:
%%time
#con.sql("""CREATE OR REPLACE TABLE crossnwi AS (UNPIVOT (FROM (SELECT * FROM read_json_auto('aoiWetland.json', maximum_object_size=100000000))) ON COLUMNS(*))""")
#con.sql("""CREATE OR REPLACE TABLE crossnwi AS SELECT name, UNNEST(value) AS value FROM crossnwi""")
con.sql("""CREATE OR REPLACE TEMP TABLE tmpwet AS
SELECT DISTINCT geometry, name, fips FROM (SELECT DISTINCT geometry, ATTRIBUTE, fips FROM my_inserts) AS wetselect
LEFT JOIN crossnwi ON wetselect.ATTRIBUTE LIKE crossnwi.value
""")
con.sql(f"""CREATE OR REPLACE TEMP TABLE tmpwet AS
(SELECT replace(tmpwet.name, '_', '') AS name, fips, geometry, kcal FROM tmpwet
LEFT JOIN read_csv_auto('azure://abdu/kcal.csv') ON replace(tmpwet.name, '_', '') = read_csv_auto.habitatType
WHERE tmpwet.name IS NOT NULL)
""")
con.sql("""CREATE OR REPLACE TEMP TABLE newdata AS (
SELECT name, fips, kcal, ST_Area(geometry)*0.0001 AS ha, ha*kcal AS avalNrgy FROM(
SELECT name, fips, kcal, ST_Union_Agg(ST_GeomFromText(geometry)) AS geometry FROM tmpwet
GROUP BY name, fips, kcal))
""")

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

CPU times: total: 44min 6s
Wall time: 17min 50s


In [169]:
print(con.sql('select count (fips) from tmpwet'))
print(con.sql('select count (distinct geometry) from tmpwet'))

┌─────────────┐
│ count(fips) │
│    int64    │
├─────────────┤
│       12772 │
└─────────────┘

┌──────────────────────────┐
│ count(DISTINCT geometry) │
│          int64           │
├──────────────────────────┤
│                    12772 │
└──────────────────────────┘



In [170]:
con.sql('select * from newdata order by fips, name')

┌──────────────────────────────┬─────────┬─────────┬─────────────────────┬────────────────────┐
│             name             │  fips   │  kcal   │         ha          │      avalNrgy      │
│           varchar            │ varchar │  int64  │       double        │       double       │
├──────────────────────────────┼─────────┼─────────┼─────────────────────┼────────────────────┤
│ DeepwaterFresh               │ 05041   │   61629 │  2679.6697475655674 │ 165145366.87271836 │
│ FreshMarsh                   │ 05041   │  347006 │  1027.3595218947305 │ 356499918.25460285 │
│ FreshShallowOpenWater        │ 05041   │   61629 │   616.4973814247918 │  37994117.11982849 │
│ FreshShores                  │ 05041   │   61629 │  10.443964706428448 │  643651.1008924788 │
│ FreshwaterAquaticBed         │ 05041   │  143562 │  145.58518024532407 │ 20900499.646379214 │
│ FreshwaterWoody              │ 05041   │  238961 │   53593.16547641877 │ 12806676415.410505 │
│ ManagedFreshMarsh            │ 05041  

In [171]:
con.sql("""CREATE OR REPLACE TABLE fipsavalNRG AS SELECT fips, sum(avalNrgy) as fipsnrgysum from newdata group by fips""")
#con.sql("""select * from fipsavalNRG""")

In [172]:
'''
######
Read in kcal.csv from azure and join to wetlands.
This file is the habitat type (habitatType) and the kcal/Ha (kcal)
######
'''
#
con.sql(f"""CREATE OR REPLACE TABLE demandwetlands AS
(SELECT replace(wetlands.name, '_', '') AS name, huc12, geometry, read_csv_auto.kcal FROM wetlands
LEFT JOIN read_csv_auto('azure://abdu/kcal.csv') ON replace(wetlands.name, '_', '') = read_csv_auto.habitatType
WHERE wetlands.name IS NOT NULL)
""")

In [177]:
con.sql('select * from demandwetlands')

┌─────────────────┬──────────────┬────────────────────────────────────────────────────────────────────────────┬────────┐
│      name       │    huc12     │                                  geometry                                  │  kcal  │
│     varchar     │   varchar    │                                  geometry                                  │ int64  │
├─────────────────┼──────────────┼────────────────────────────────────────────────────────────────────────────┼────────┤
│ FreshMarsh      │ 080201000200 │ POLYGON ((493679.19689999893 1273854.1123000002, 493662.3905999996 12738…  │ 347006 │
│ FreshMarsh      │ 080201000200 │ POLYGON ((482873.8872999996 1271788.0791999996, 482887.6768999994 127177…  │ 347006 │
│ FreshMarsh      │ 080201000200 │ POLYGON ((480508.1746999994 1265066.8224999998, 480498.20910000056 12650…  │ 347006 │
│ FreshMarsh      │ 080201000200 │ POLYGON ((470904.3658000007 1242377.2693999996, 470908.72940000147 12423…  │ 347006 │
│ FreshMarsh      │ 080201000200

In [178]:
con.sql("""CREATE OR REPLACE TABLE hucdemandenergy AS 
    (SELECT name,fips, demandwetlands.huc12, CODE, LTADUD, LTADemand, LTAPopObj, x80DUD, X80Demand, X80PopObj, kcal, 
    ST_Intersection(demandwetlands.geometry, (demand.geometry)) as geometry FROM demandwetlands
    JOIN demand ON ST_Intersects(demandwetlands.geometry, (demand.geometry)))""")

In [179]:
'''
######
Calculate available energy (avalNrgy) of wetlands by calculating area in Hectares (HA) and multiplying by kcal.
Select only distinct rows.
Create new table habitatenergy
######
'''
#
con.sql("""CREATE OR REPLACE TABLE hucdemandenergy AS (SELECT DISTINCT name, fips, huc12, CODE, LTADUD, LTADemand, LTAPopObj, x80DUD, X80Demand, X80PopObj, kcal, geometry, ST_Area(geometry)*0.0001 AS ha,ha*kcal AS avalNrgy FROM hucdemandenergy)""")
# DATA CHECK
#con.sql("""COPY (SELECT DISTINCT name, ST_AsWKB(ST_GeomFromText(geometry)) as geometry, ST_Area(ST_GeomFromText(geometry))*0.0001 AS ha, ha*kcal AS avalNrgy FROM wetlands) TO 'testwetland.parquet' (FORMAT PARQUET)""")

In [180]:
con.sql("""CREATE OR REPLACE TABLE test AS (select name, hucdemandenergy.fips as fips,huc12, CODE, LTADUD, LTADemand, LTAPopObj, x80DUD, X80Demand, X80PopObj, kcal, avalNrgy, fipsnrgysum, (hucdemandenergy.avalNrgy/fipsavalNRG.fipsnrgysum) as pct, geometry from hucdemandenergy
    JOIN fipsavalNRG on hucdemandenergy.fips = fipsavalNRG.fips
    GROUP BY name, hucdemandenergy.fips, huc12, CODE, LTADUD, LTADemand, LTAPopObj, x80DUD, X80Demand, X80PopObj, kcal, avalNrgy, fipsnrgysum, geometry)""")

In [181]:
fullfips = sorted([item for items in fullfips for item in items])
sqlcall ="""CREATE OR REPLACE TABLE rdydemand as SELECT * FROM test WHERE fips In {0}""".format(tuple(fullfips))
con.sql(sqlcall)

In [182]:
#con.sql("""describe rdydemand""")
con.sql("""CREATE OR REPLACE TABLE hucdemand AS (SELECT huc12, code, 
sum(pct * LTADUD) AS LTADUD,
sum(pct * LTADemand) AS LTADemand,
sum(pct * LTAPopObj) AS LTAPopObj,
sum(pct * x80DUD) AS x80DUD,
sum(pct * X80Demand) AS X80Demand,
sum(pct * X80PopObj) AS X80PopObj,
FROM rdydemand
GROUP BY huc12, code)""")

In [183]:
'''
END Demand
'''
#################################
#################################
#################################
#################################
#################################
#################################
#################################
#################################
#################################
#################################

'\nEND Demand\n'

In [235]:
print(con.sql('select huc12, sum(urbanHa) from urban where huc12 = 080302020501 group by huc12'))
print(con.sql('select huc12, CODE, sum(LTADUD) from hucdemand where huc12 = 080302020501 group by huc12, CODE'))

┌──────────────┬───────────────────┐
│    huc12     │   sum(urbanHa)    │
│   varchar    │      double       │
├──────────────┼───────────────────┤
│ 080302020501 │ 438.7220613664415 │
└──────────────┴───────────────────┘

┌──────────────┬─────────┬───────────────────┐
│    huc12     │  CODE   │    sum(LTADUD)    │
│   varchar    │ varchar │      double       │
├──────────────┼─────────┼───────────────────┤
│ 080302020501 │ 4B      │ 444.9738085676442 │
│ 080302020501 │ 4D      │ 508903.6047255656 │
└──────────────┴─────────┴───────────────────┘



In [279]:
con.sql("""CREATE OR REPLACE TABLE athuclevel AS
SELECT huc12.huc12, CODE, LTADUD, LTADemand, LTAPopObj, X80DUD, X80Demand, X80PopObj,
huc12.geometry
FROM huc12
LEFT JOIN hucdemand on hucdemand.huc12 = huc12.huc12
ORDER by huc12.huc12, CODE
""")

In [280]:
# Just added urbanNrgy line.  Going a very long time.  Might be a problem.
# Testing unavailHa.still running.  TAking a long time.  Check this line too.

# Specified selection in a cell or two below.  Many don't need geometry at this later point.  Joining is by huc12 so selecting
# only the required columns makes the join go much faster.
con.sql("""CREATE OR REPLACE TABLE athuclevel AS
SELECT athuclevel.huc12, CODE, LTADUD, LTADemand, LTAPopObj, X80DUD, X80Demand, X80PopObj, 
sum(avalNrgy) as tothabitat_kcal, 
athuclevel.geometry
FROM athuclevel
LEFT JOIN wetlands on wetlands.huc12 = athuclevel.huc12
GROUP BY athuclevel.huc12, CODE,LTADUD, LTADemand, LTAPopObj, X80DUD, X80Demand, X80PopObj, athuclevel.geometry
ORDER by athuclevel.huc12
""")

In [281]:
con.sql("""CREATE OR REPLACE TABLE athuclevel AS
SELECT athuclevel.huc12, CODE, LTADUD, LTADemand, LTAPopObj, X80DUD, X80Demand, X80PopObj, tothabitat_kcal, 
sum(urbanHa) as urbanHa,
athuclevel.geometry
FROM athuclevel
LEFT JOIN urban on urban.huc12 = athuclevel.huc12
GROUP BY athuclevel.huc12, CODE,LTADUD, LTADemand, LTAPopObj, X80DUD, X80Demand, X80PopObj, tothabitat_kcal, athuclevel.geometry
ORDER by athuclevel.huc12, CODE
""")

In [282]:
con.sql("""CREATE OR REPLACE TABLE athuclevel AS
SELECT athuclevel.huc12, CODE, LTADUD, LTADemand, LTAPopObj, X80DUD, X80Demand, X80PopObj, tothabitat_kcal, urbanHa,
sum(protNrgy) as protected_kcal,
athuclevel.geometry
FROM athuclevel
LEFT JOIN protwetlands on protwetlands.huc12 = athuclevel.huc12
GROUP BY athuclevel.huc12, CODE,LTADUD, LTADemand, LTAPopObj, X80DUD, X80Demand, X80PopObj, tothabitat_kcal, urbanHa, athuclevel.geometry
ORDER by athuclevel.huc12, CODE
""")

In [283]:
con.sql("""CREATE OR REPLACE TABLE athuclevel AS
SELECT athuclevel.huc12, CODE, LTADUD, LTADemand, LTAPopObj, X80DUD, X80Demand, X80PopObj, tothabitat_kcal, urbanHa, protected_kcal,
sum(ProtHabHa) as protectedhabitat_ha,
sum(protNrgy) as protected_kcal,
athuclevel.geometry
FROM athuclevel
LEFT JOIN protwetlands on protwetlands.huc12 = athuclevel.huc12
GROUP BY athuclevel.huc12, CODE,LTADUD, LTADemand, LTAPopObj, X80DUD, X80Demand, X80PopObj, tothabitat_kcal, urbanHa, protected_kcal,athuclevel.geometry
ORDER by athuclevel.huc12, CODE
""")

In [284]:
con.sql("""CREATE OR REPLACE TABLE athuclevel AS
SELECT athuclevel.huc12, CODE, LTADUD, LTADemand, LTAPopObj, X80DUD, X80Demand, X80PopObj, tothabitat_kcal, urbanHa, protectedhabitat_ha, protected_kcal,
sum(urbanNrgy) as urbanNrgy,
athuclevel.geometry
FROM athuclevel
LEFT JOIN (SELECT huc12, urbanNrgy FROM urbanwetlands) as urbanwetlands on urbanwetlands.huc12 = athuclevel.huc12
GROUP BY athuclevel.huc12, CODE,LTADUD, LTADemand, LTAPopObj, X80DUD, X80Demand, X80PopObj, tothabitat_kcal, urbanHa, protectedhabitat_ha, protected_kcal, athuclevel.geometry
""")

In [285]:
con.sql("""CREATE OR REPLACE TABLE athuclevel AS
SELECT athuclevel.huc12, CODE, LTADUD, LTADemand, LTAPopObj, X80DUD, X80Demand, X80PopObj, tothabitat_kcal, urbanHa, protectedhabitat_ha, protected_kcal,urbanNrgy,
sum(unavailHa) as unavailHa,
athuclevel.geometry
FROM athuclevel
LEFT JOIN (SELECT huc12, unavailHa FROM unavailable) as unavailable on unavailable.huc12 = athuclevel.huc12
GROUP BY athuclevel.huc12, CODE,LTADUD, LTADemand, LTAPopObj, X80DUD, X80Demand, X80PopObj, tothabitat_kcal, urbanHa, protectedhabitat_ha, protected_kcal, urbanNrgy, athuclevel.geometry
""")

In [293]:
con.sql("""CREATE OR REPLACE TABLE athuclevel AS
SELECT athuclevel.huc12,
ST_Area(ST_GeomFromWKB(geometry))*0.0001 huc12_ha,
CODE, 
COALESCE(LTADUD, 0) dud_lta,
COALESCE(LTADemand,0) demand_lta_kcal, 
COALESCE(LTAPopObj,0) popobj_lta, 
COALESCE(X80DUD,0) dud_80th, 
COALESCE(X80Demand,0) demand_80th_kcal, 
COALESCE(X80PopObj,0) popobj_80th, 
COALESCE(tothabitat_kcal,0) tothabitat_kcal,
COALESCE(protected_kcal,0) protected_kcal,
COALESCE(protectedhabitat_ha,0) protectedhabitat_ha,
COALESCE(urbanHa,0) urbanHa, 
COALESCE(sum(urbanNrgy),0) urbanNrgy,
COALESCE(sum(unavailHa),0) unavailha,
COALESCE(sum(unavailHa),0) huc12_ha_unavailha,
COALESCE(tothabitat_kcal - demand_lta_kcal,0) surpdef_lta_kcal,
COALESCE(tothabitat_kcal - demand_80th_kcal,0) surpdef_80th_kcal,
athuclevel.geometry
FROM athuclevel
GROUP BY athuclevel.huc12, CODE, LTADUD, LTADemand, LTAPopObj, X80DUD, X80Demand, X80PopObj, tothabitat_kcal, protected_kcal, protectedhabitat_ha,urbanHa, geometry
ORDER BY athuclevel.huc12, CODE
""")

In [294]:
con.sql('''CREATE OR REPLACE TABLE athuclevel AS 
SELECT *,
CASE WHEN 
demand_lta_kcal - protected_kcal > 0
THEN
demand_lta_kcal - protected_kcal
ELSE 0
END
AS nrgprot_lta_kcal,
CASE WHEN
demand_80th_kcal - protected_kcal > 0 
THEN
demand_80th_kcal - protected_kcal
ELSE 0
END
AS nrgprot_80th_kcal
FROM athuclevel
''')

In [295]:
'''
Calculate weighted mean
'''
con.sql('''
CREATE OR REPLACE TABLE wtmean AS 
SELECT huctotal.huc12, name, avalNrgname/avalNrgtot as pct, hucnametotal.avalNrgname * pct as wtmean FROM
((SELECT huc12, sum(avalNrgy) as avalNrgtot from wetlands group by huc12) huctotal
join
(SELECT huc12, name, sum(avalNrgy) as avalNrgname from wetlands group by huc12, name) hucnametotal
on hucnametotal.huc12 = huctotal.huc12)
''')
con.sql('''CREATE OR REPLACE TABLE wtmeanpivot AS
(select * exclude pct FROM
(pivot wtmean
    on name
    USING sum(wtmean)))
''')

In [296]:
cols = con.sql('describe wtmeanpivot').df()['column_name'].tolist()
for cls in ('DeepwaterFresh', 'FreshMarsh', 'FreshShallowOpenWater', 'FreshwaterWoody', 'ManagedFreshMarsh', 'ManagedFreshShallowOpenWater', 'ManagedFreshwaterAquaticBed'):
    if cls not in cols:
        con.sql('''ALTER TABLE wtmeanpivot ADD COLUMN {0} DOUBLE'''.format(cls))

In [297]:

con.sql('''CREATE OR REPLACE TABLE wtmeanpivot AS 
SELECT
huc12,
COALESCE(DeepwaterFresh, 0) DeepwaterFresh,
COALESCE(FreshMarsh, 0) FreshMarsh, 
COALESCE(FreshShallowOpenWater, 0) FreshShallowOpenWater,
COALESCE(FreshwaterWoody, 0) FreshwaterWoody,
COALESCE(ManagedFreshMarsh, 0) ManagedFreshMarsh,
COALESCE(ManagedFreshShallowOpenWater, 0) ManagedFreshShallowOpenWater,
COALESCE(ManagedFreshwaterAquaticBed, 0) ManagedFreshwaterAquaticBed
FROM wtmeanpivot
''')
con.sql('''create or replace table wtmeanbyhuc as
        select huc12, 
        sum(DeepwaterFresh + FreshMarsh + FreshShallowOpenWater + FreshwaterWoody + ManagedFreshMarsh +ManagedFreshShallowOpenWater + ManagedFreshwaterAquaticBed)
        as wtmean from wtmeanpivot group by huc12''')

In [298]:
#########
########
con.sql('''CREATE OR REPLACE TABLE athuclevel AS
SELECT * 
from athuclevel
left join wtmeanbyhuc on athuclevel.huc12=wtmeanbyhuc.huc12
order by athuclevel.huc12
''')
con.sql('ALTER TABLE athuclevel RENAME wtmean TO wtMean_kcal_per_ha')

In [299]:
con.sql('''CREATE OR REPLACE TABLE athuclevel AS
SELECT *,
CASE WHEN 
surpdef_lta_kcal < 0
THEN
abs(surpdef_lta_kcal/wtMean_kcal_per_ha)
ELSE 0
END
AS restoregoal_lta_ha,

CASE WHEN 
surpdef_80th_kcal < 0
THEN
abs(surpdef_80th_kcal/wtMean_kcal_per_ha)
ELSE 0
END
AS restoregoal_80th_ha

FROM athuclevel
''')

In [300]:
## Need to double check huc12_ha and unavailha
con.sql('''CREATE OR REPLACE TABLE athuclevel AS 
        select huc12, huc12_ha, CODE as code, dud_lta, demand_lta_kcal, popobj_lta, dud_80th, demand_80th_kcal, popobj_80th,
        tothabitat_kcal, protected_kcal, protectedhabitat_ha, urbanHa, urbanNrgy, unavailha, surpdef_lta_kcal, surpdef_80th_kcal,
        nrgprot_lta_kcal, nrgprot_80th_kcal, wtMean_kcal_per_ha, restoregoal_lta_ha, restoregoal_80th_ha, 
        CASE WHEN
        huc12_ha - unavailha > 0
        THEN
        huc12_ha - unavailha
        ELSE 0
        END 
        AS available_ha,
        geometry
        FROM athuclevel
        ''')

In [301]:
con.sql('''CREATE OR REPLACE TABLE athuclevel AS
SELECT * EXCLUDE (restoregoal_lta_ha, restoregoal_80th_ha),
CASE WHEN 
restoregoal_lta_ha > available_ha
THEN
available_ha
ELSE restoregoal_lta_ha
END
AS restoregoal_lta_ha,

CASE WHEN 
restoregoal_80th_ha > available_ha
THEN
available_ha
ELSE restoregoal_80th_ha
END
AS restoregoal_80th_ha,

FROM athuclevel
''')

In [302]:
#field='protectgoal_lta_ha', expression="(!nrgprot_lta_kcal!/!wtMean_kcal_per_ha!) if !nrgprot_lta_kcal! > 0 else 0"
#field='protectgoal_80th_ha', expression="(!nrgprot_80th_kcal!/!wtMean_kcal_per_ha!) if !nrgprot_80th_kcal! > 0 else 0"
con.sql('''CREATE OR REPLACE TABLE athuclevel AS
SELECT *,
CASE WHEN 
nrgprot_lta_kcal > 0 
THEN
nrgprot_lta_kcal/wtMean_kcal_per_ha
ELSE 0
END
AS protectgoal_lta_ha,

CASE WHEN 
nrgprot_80th_kcal > 0
THEN
nrgprot_80th_kcal/wtMean_kcal_per_ha
ELSE 0
END
AS protectgoal_80th_ha,
FROM athuclevel
''')

In [303]:
#field='protectgoal_lta_ha', expression="!available_ha! if !protectgoal_lta_ha! > !available_ha! else !protectgoal_lta_ha!"
#field='protectgoal_80th_ha', expression="!available_ha! if !protectgoal_80th_ha! > !available_ha! else !protectgoal_80th_ha!"
con.sql('''CREATE OR REPLACE TABLE athuclevel AS
SELECT * EXCLUDE (protectgoal_lta_ha, protectgoal_80th_ha),
CASE WHEN 
protectgoal_lta_ha > available_ha
THEN
available_ha
ELSE protectgoal_lta_ha
END
AS  protectgoal_lta_ha,

CASE WHEN 
protectgoal_80th_ha > available_ha
THEN
available_ha
ELSE protectgoal_80th_ha
END
AS protectgoal_80th_ha,
FROM athuclevel
''')

In [304]:
'''
Protected wetlands, urban wetlands, and wetland energy all calculated by huc12.  Need to calculate total urban outside of
wetland energy

Calculations:
    Energy supply
        Total habitat energy within huc - THabNrg
        Total habitat hectares within huc - THabHA

    Energy demand
        LTA and X80 DUD by huc - TLTADUD anc X80DUD
        LTA and X80 Demand by huc - TLTADemand and X80Demand
        LTA and X80 Population objective by huc - LTAPopObj and X80PopObj
        
    Protected lands
        Total protected hectares by huc - ProtHA

    Protected habitat hectares and energy
        Total protected hectares - ProtHabHA
        Total protected energy - ProtHabNrg

    Weighted mean and calculations based off of it
        Weighted mean kcal/ha with weight being Total habitat energy
        Energy Protection needed - NrgProtRq
        Restoration HA based off of weighted mean - RstorHA
        Protection HA based off weighted mean - RstorProtHA  

'''
#################################
#################################
#################################


'\nProtected wetlands, urban wetlands, and wetland energy all calculated by huc12.  Need to calculate total urban outside of\nwetland energy\n\nCalculations:\n    Energy supply\n        Total habitat energy within huc - THabNrg\n        Total habitat hectares within huc - THabHA\n\n    Energy demand\n        LTA and X80 DUD by huc - TLTADUD anc X80DUD\n        LTA and X80 Demand by huc - TLTADemand and X80Demand\n        LTA and X80 Population objective by huc - LTAPopObj and X80PopObj\n        \n    Protected lands\n        Total protected hectares by huc - ProtHA\n\n    Protected habitat hectares and energy\n        Total protected hectares - ProtHabHA\n        Total protected energy - ProtHabNrg\n\n    Weighted mean and calculations based off of it\n        Weighted mean kcal/ha with weight being Total habitat energy\n        Energy Protection needed - NrgProtRq\n        Restoration HA based off of weighted mean - RstorHA\n        Protection HA based off weighted mean - RstorProtHA 

In [305]:
con.sql('describe athuclevel').df()

Unnamed: 0,column_name,column_type,null,key,default,extra
0,huc12,VARCHAR,YES,,,
1,huc12_ha,DOUBLE,YES,,,
2,code,VARCHAR,YES,,,
3,dud_lta,DOUBLE,YES,,,
4,demand_lta_kcal,DOUBLE,YES,,,
5,popobj_lta,DOUBLE,YES,,,
6,dud_80th,DOUBLE,YES,,,
7,demand_80th_kcal,DOUBLE,YES,,,
8,popobj_80th,DOUBLE,YES,,,
9,tothabitat_kcal,DOUBLE,YES,,,


In [308]:
print(con.sql('select huc12, sum(urbanHa) from urban where huc12 = 080302020501 group by huc12'))
print(con.sql('select huc12, sum(urbanHa) from athuclevel where huc12 = 080302020501 group by huc12, CODE'))
print(con.sql('select huc12, CODE, sum(LTADUD) from hucdemand where huc12 = 080302020501 group by huc12, CODE'))
print(con.sql('select huc12, CODE, sum(dud_lta) from athuclevel where huc12 = 080302020501 group by huc12, CODE'))

┌──────────────┬───────────────────┐
│    huc12     │   sum(urbanHa)    │
│   varchar    │      double       │
├──────────────┼───────────────────┤
│ 080302020501 │ 438.7220613664415 │
└──────────────┴───────────────────┘

┌──────────────┬───────────────────┐
│    huc12     │   sum(urbanHa)    │
│   varchar    │      double       │
├──────────────┼───────────────────┤
│ 080302020501 │ 438.7220613664415 │
│ 080302020501 │ 438.7220613664415 │
└──────────────┴───────────────────┘

┌──────────────┬─────────┬───────────────────┐
│    huc12     │  CODE   │    sum(LTADUD)    │
│   varchar    │ varchar │      double       │
├──────────────┼─────────┼───────────────────┤
│ 080302020501 │ 4B      │ 444.9738085676442 │
│ 080302020501 │ 4D      │ 508903.6047255656 │
└──────────────┴─────────┴───────────────────┘

┌──────────────┬─────────┬───────────────────┐
│    huc12     │  code   │   sum(dud_lta)    │
│   varchar    │ varchar │      double       │
├──────────────┼─────────┼───────────────────┤

In [309]:
end_time = time.ctime(time.time())
end_time

'Fri Feb 23 09:24:08 2024'

In [311]:
con.sql("""COPY (SELECT * EXCLUDE geometry, ST_AsWKB(ST_GeomFromWKB(geometry)) as geometry, FROM athuclevel) TO './output/{0}.parquet' (FORMAT PARQUET)""".format(inaoifile))
print('Done')

Done
