In [None]:
#Import ee and required packages
import ee
ee.Initialize()
import pandas as pd 
import numpy as np
import glob
import geopandas as gpd

from src.gee_funs import *

In [None]:
# VARIABLE DECLARATIONS

STATE="Montana"
state_abbrevs = {
    'Montana' : 'MT'
}

start_year = 2002
end_year = 2018

gee_path='users/kjchristensen93/'



In [None]:
#Define Modular Variables:

#If you have a spatially thinned data set, start here after initializing ee

#Taxa thinned dataset
SThin = ee.FeatureCollection('users/kjchristensen93/EBT_data/EBT_SThin')
#Study dates
#Note we are limited to 2002 - 2018 due to the water year covariate 

### Returns a list of dates from 2002 - 2018 ###
years = range(start_year,end_year) 
s_dates = ee.List(list(map(lambda x: ee.Date(str(x) + '-01-01'), years)))

#HUC state geojson file 
HUC_state = ('./datasets/hucs/MT_HUCS.geojson')
#Define export locations:
#GEE yearly covariate folder
assetId = (gee_path+'covariates/covariates_test') 
#User training csv local directory folder
trainingdata = ('./datasets/training/')
#User decadal image local directory folder
decadalfolder = ('./datasets/decade/')
#Define export naming convention? Maybe we define a function within code above for naming conventions


#### ML Variables ####

#Training Glob
trainingglob = ('./datasets/training/*.csv')
# trainingglob = ((trainingdata)/*.csv) will this work?
#decadal CSV directory and naming conventions
decade1 = ('./datasets/decade/decade1_filename.csv')
decade2 =('./datasets/decade/decade2_filename.csv')
#decadal predictions
decade1_pred = ('./datasets/decade/decade1_pred_filename.csv')
decade2_pred = ('./datasets/decade/decade2_pred_filename.csv')

#######################

In [None]:
#If you need to create the spatially thinned asset...Otherwise skip to Define Modular Variables below
#Define GEE asset/location of desired dataset (Formatted CSV must be uploaded to your GEE assets with Lat/Long columns defined 
#before starting)
Taxa_og = ee.FeatureCollection(gee_path+'EBT_data/EBT_mfish_data_presence_heuristic')
coll = ee.FeatureCollection(Taxa_og) 
distance = 500

In [None]:
# Spatially thin locations and export to asset
# Performs the spatial thinning algorithm on each year separately
feats = s_dates.map(lambda x: filter_date_space(x,coll,distance))

# Combine each of the resultant filtered collections
first = ee.FeatureCollection(Taxa_og)
spatially_thin = ee.FeatureCollection(feats.iterate(merge_coll, first))

In [None]:
export3 = ee.batch.Export.table.toAsset(collection = spatially_thin,
                    description = 'EBT_SThin', # n<-------- CHANGE NAME FOR DIFFERENT DATA
                    assetId = gee_path+'EBT_data/EBT_SThin') # <----- CHANGE Export location FOR DIFFERENT USER

export3.start()

In [None]:
#This list dictates what years will be exported for both the Yearly Covariate Images and the Yearly Training CSVS
# can this be changed to a list for intermitent datasets missing years? Empty outputs causes issues later on....
import time
# Enter start year for Y and end year for Y
years = [str(y) for y in list(range(2002, 2005))]  ##FIXME: hardcoded


In [None]:
# Export data using python API magic
# Define geometry by changing state name so we can export the whole state at once
states = ee.FeatureCollection("TIGER/2016/States")
#Enter state 2-digit abbreviation for study area
geometry = states.filter(ee.Filter.eq('NAME',STATE)).geometry()

In [None]:
# Shape file containing HUC polygons
HUC = ee.FeatureCollection("USGS/WBD/2017/HUC12")
# Choose state to clip HUC by. Change Abbreviation to match dataset 
#Enter state full name for X (i.e., Illinois/ look at dataset for formats for this stuff)
HUC_clip = HUC.filter(ee.Filter.eq('states',state_abbrevs[STATE]))

In [None]:
#embed observation Year as system:start_time for thinned dataset 
# We have had to add this "Year Column" manually to the datasets.  Make sure your dataset has correct column headings
SThin_map = SThin.map(embedd_date)

In [None]:
#Build Big Raster Image
## Import assets
# MODIS Mission
modusGlobal = ee.ImageCollection("MODIS/006/MYD11A2")

# Primary Productivity
GPP = ee.ImageCollection("UMT/NTSG/v2/LANDSAT/GPP")

# Surface water
pikelSurfaceWater = ee.Image("JRC/GSW1_1/GlobalSurfaceWater")

# Elevation
DEM = ee.Image("USGS/NED")

# Enhanced Vegetation Index and NDVI
modusVeg = ee.ImageCollection("MODIS/006/MYD13A2")

# Heat Isolation Load
CHILI = ee.Image("CSP/ERGo/1_0/Global/SRTM_CHILI")

# Topographic Diversity
topoDiversity = ee.Image("CSP/ERGo/1_0/Global/ALOS_topoDiversity")

# Vegetation Continuous Field product - percent tree cover, etc
VCF = ee.ImageCollection("MODIS/006/MOD44B")

# Human Modification index
gHM = ee.ImageCollection("CSP/HM/GlobalHumanModification")

# Climate information
NLDAS = ee.ImageCollection("NASA/NLDAS/FORA0125_H002")

# Shape file containing Country Boundaries
countries = ee.FeatureCollection("USDOS/LSIB_SIMPLE/2017")

# Shape file containing HUC polygons
HUC = ee.FeatureCollection("USGS/WBD/2017/HUC12")

# Dynamic Surface Water metric
pekel_monthly_water = ee.ImageCollection("JRC/GSW1_2/MonthlyHistory")

# Static surface water metric
pekel_static_water = ee.ImageCollection('JRC/GSW1_2/MonthlyRecurrence')



In [None]:
## Select features, etc
#========================================================
#Rename Bands and select bands, etc
#========================================================


NLDAS_precip = NLDAS.select("total_precipitation");
NLDAS_temp = NLDAS.select("temperature");
NLDAS_humid = NLDAS.select("specific_humidity");
NLDAS_potEvap = NLDAS.select("potential_evaporation");


CHILI = CHILI.rename(['Heat_Insolation_Load'])
srtmChili = CHILI.select('Heat_Insolation_Load');
topoDiversity = topoDiversity.rename(["Topographic_Diversity"])
topoDiv = topoDiversity.select("Topographic_Diversity")
footprint = ee.Image(gHM.first().select("gHM"));

# Surface water occurrence
sw_occurrence = pekel_static_water\
                      .select('monthly_recurrence')\
                      .mean()\
                      .rename(['SurfaceWaterOccurrence'])\
                      .unmask()


In [None]:
## Define helper filters and lists to iterate over
#========================================================
# Build Lists from which to map over
#========================================================
# List from which absences will be built
ee_dates = ee.List(s_dates)


In [None]:
## Mask features by quality control bands
GPP_QC = GPP.map(gpp_qc);


LST = modusGlobal.map(lst_qc) \
                 .select("LST_Day_1km");

modusVeg_QC = modusVeg.map(modusQC)
EVI = modusVeg_QC.select("EVI")
NDVI = modusVeg_QC.select("NDVI")

VCF_qc = VCF.map(VCFqc)


#========================================================
# Define Point Joins such that each HUC contains a list of observational data:
#========================================================
distFilter = ee.Filter.intersects(**{
  'leftField': '.geo', 
  'rightField': '.geo', 
  'maxError': 100
});

pointJoin = ee.Join.saveAll(**{
  'matchesKey': 'Points',
});

In [None]:
## Annual Cube function
#========================================================
# "Builder Function" -- processes each annual variable into a list of images
#========================================================

def build_annual_cube(d):
    # Set start and end dates for filtering time dependent predictors (SR, NDVI, Phenology)
      # Advance startDate by 1 to begin with to account for water year (below)
    startDate = (ee.Date(d).advance(1.0,'year').millis()) ## FIXME: Why do we advance a year? this give 2003-2019 instead of 2002-2018
    endDate = ee.Date(d).advance(2.0,'year').millis()

  #========================================================
  #Define function to compute seasonal information for a given variable
  #========================================================
    def add_seasonal_info(imgCol,name,bandName):
        winter = imgCol.filterDate(winter_start,winter_end)
        spring = imgCol.filterDate(spring_start,spring_end)
        summer = imgCol.filterDate(summer_start,summer_end)
        fall = imgCol.filterDate(fall_start,fall_end)

        winter_tot = winter.sum()
        spring_tot = spring.sum()
        summer_tot = summer.sum()
        fall_tot = fall.sum()

        winter_max = winter.max()
        winter_min = winter.min()
        spring_max = spring.max()
        spring_min = spring.min()
        summer_max = summer.max()
        summer_min = summer.min()
        fall_max = fall.max()
        fall_min = fall.min()

        winter_diff = winter_max.subtract(winter_min)
        spring_diff = spring_max.subtract(spring_min)
        summer_diff = summer_max.subtract(summer_min)
        fall_diff = fall_max.subtract(fall_min)

        names = ['winter_total'+name,'spring_total'+name,'summer_total'+name,
                      'fall_total'+name]

        return winter_tot.addBands([spring_tot,summer_tot,fall_tot]) \
                         .rename(names)

  # Set up Seasonal dates for precip, seasonal predictors
    winter_start = ee.Date(startDate)
    winter_end = ee.Date(startDate).advance(3,'month')
    spring_start = ee.Date(startDate).advance(3,'month')
    spring_end = ee.Date(startDate).advance(6,'month')
    summer_start = ee.Date(startDate).advance(6,'month')
    summer_end = ee.Date(startDate).advance(9,'month')
    fall_start = ee.Date(startDate).advance(9,'month')
    fall_end = ee.Date(endDate)

  # Aggregate seasonal info for each variable of interest (potEvap neglected purposefully)
    seasonal_precip = add_seasonal_info(NLDAS_precip,"Precip","total_precipitation")
    seasonal_temp = add_seasonal_info(NLDAS_temp,"Temp","temperature")
    seasonal_humid = add_seasonal_info(NLDAS_humid,"Humidity","specific_humidity")

    waterYear_start = ee.Date(startDate).advance(10,'month')
    waterYear_end = waterYear_start.advance(1,'year')

  #========================================================
  # Aggregate Other Covariates
  #========================================================

  # Vegetative Continuous Fields
    meanVCF = VCF.filterDate(startDate, endDate)\
                 .mean()
    
#     VCF_qc.filterDate(startDate, endDate) \
#                       .mean()

  # Filter Precip by water year to get total precip annually

    waterYearTot = NLDAS_precip.filterDate(waterYear_start,waterYear_end) \
                                 .sum()

  # Find mean EVI per year:
    maxEVI = EVI.filterDate(startDate,endDate) \
                  .mean() \
                  .rename(['Mean_EVI'])

  #Find mean NDVI per year:
    maxNDVI = NDVI.filterDate(startDate,endDate) \
                    .mean() \
                    .rename(["Mean_NDVI"])

  # Find flashiness per year by taking a Per-pixel Standard Deviation:
    flashiness_yearly = ee.Image(pekel_monthly_water.filterDate(startDate,endDate) \
                                                      .reduce(ee.Reducer.sampleStdDev()) \
                                                      .select(["water_stdDev"])) \
                                                      .rename("Flashiness")

  # Find max LST per year:
    maxLST = LST.max().rename(["Max_LST_Annual"])

  # Find mean GPP per year:
    maxGPP = GPP_QC.filterDate(startDate,endDate) \
                      .mean() \
                      .rename(['Mean_GPP','QC'])

  # All banded images that don't change over time
    static_input_bands = sw_occurrence.addBands(DEM.select("elevation")) \
                                          .addBands(srtmChili) \
                                          .addBands(topoDiv) \
                                          .addBands(footprint)

  # Construct huge banded image
    banded_image = static_input_bands \
                          .addBands(srcImg = maxLST, names = ["Max_LST_Annual"]) \
                          .addBands(srcImg = maxGPP, names = ["Mean_GPP"]) \
                          .addBands(srcImg =  maxNDVI, names = ["Mean_NDVI"]) \
                          .addBands(srcImg = maxEVI, names = ["Mean_EVI"]) \
                          .addBands(meanVCF.select("Percent_Tree_Cover")) \
                          .addBands(seasonal_precip) \
                          .addBands(flashiness_yearly) \
                          .set("system:time_start",startDate)

    return banded_image.unmask()




In [None]:

#========================================================
# Run covariate algorithm and build a list of images
# with each image corresponding to each year and each band corresponding to each covariate
#========================================================

# Image Collection
banded_images = ee.ImageCollection(ee_dates.map(build_annual_cube))

# List form
banded_images_list = ee.List(ee_dates.map(build_annual_cube))

annual_stacks = ee.FeatureCollection(banded_images.map(lambda x: reduce_HUCS(x, SThin_map, HUC_clip)))




In [None]:
#Start Here if you have yearly covariates created

#Export training CSVs
## Reduce Regions from existing images

# COVARIATE IMAGES  

path = assetId
years = range(start_year, 2017)
images = list(map(lambda x: ee.Image(path + str(x)), years))
banded_images_asset_list = ee.List(images)

for i in range(len(years)):
    print("Starting", start_year+i)
    
    img = ee.Image(banded_images_asset_list.get(i))
    data = reduce_HUCS(img,SThin_map,HUC_clip) 
    
    ## PYTHON API MAGIC!! LOOK HERE
    my_csv = pd.DataFrame([x['properties'] for x in data.getInfo()['features']])
    
    # From there, we can write it directly to our directory and stitch it together afterwards
    my_csv.to_csv((trainingdata) + str(2002+i) + '.csv', index=False) 
    print("Finished", start_year+i)

In [None]:
#export the information that we will use to project habitat suitability. 
#Decades were convenient for RBT, but not other taxa with less data/ we can change.
# Change to match dataset

#Can we set this up such that this is automatically defined when we define the year range above?
first_decade = ee.ImageCollection.fromImages(images[0:7]).mean()

#second_decade = ee.ImageCollection.fromImages(images[7:]).mean()


In [None]:
# Export these data as csvs
first_decade_img = ee.Image(first_decade)

first_csv = first_decade_img.reduceRegions(**{
                              'collection': HUC_clip,
                              'reducer': ee.Reducer.mean(),
                              'crs': 'EPSG:4326',
                              'scale': 100,
                              'tileScale': 16})

#PYTHON API MAGIC!! LOOK HERE
first_decade_data = pd.DataFrame([x['properties'] for x in first_csv.getInfo()['features']])

# From there, we can write it directly to our directory and stitch it together afterwards
#maybe we should think about 2 and 5 year bins due to limitations of datasets for some taxa/ to make more useful for managers
first_decade_data.to_csv(decade1)

