In [None]:
#Import ee and required packages
import ee
ee.Initialize()
import pandas as pd 
import numpy as np
import glob
import geopandas as gpd

from src.gee_funs import *
import src.build_annual_cube as bac

In [None]:
# VARIABLE DECLARATIONS

STATE="Montana"
state_abbrevs = {
    'Montana' : 'MT'
}

start_year = 2002
end_year = 2018

gee_path='users/kjchristensen93/'



In [None]:
#Define Modular Variables:

#If you have a spatially thinned data set, start here after initializing ee

#Taxa thinned dataset
SThin = ee.FeatureCollection('users/kjchristensen93/EBT_data/EBT_SThin')
#Study dates
#Note we are limited to 2002 - 2018 due to the water year covariate 

### Returns a list of dates from 2002 - 2018 ###
years = range(start_year,end_year) 
s_dates = ee.List(list(map(lambda x: ee.Date(str(x) + '-01-01'), years)))

#HUC state geojson file 
HUC_state = ('./datasets/hucs/MT_HUCS.geojson')
#Define export locations:
#GEE yearly covariate folder
assetId = (gee_path+'covariates/covariates_test') 
#User training csv local directory folder
trainingdata = ('./datasets/training/')
#User decadal image local directory folder
decadalfolder = ('./datasets/decade/')
#Define export naming convention? Maybe we define a function within code above for naming conventions


#### ML Variables ####

#Training Glob
trainingglob = ('./datasets/training/*.csv')
# trainingglob = ((trainingdata)/*.csv) will this work?
#decadal CSV directory and naming conventions
decade1 = ('./datasets/decade/decade1_filename.csv')
decade2 =('./datasets/decade/decade2_filename.csv')
#decadal predictions
decade1_pred = ('./datasets/decade/decade1_pred_filename.csv')
decade2_pred = ('./datasets/decade/decade2_pred_filename.csv')

#######################

In [None]:
#If you need to create the spatially thinned asset...Otherwise skip to Define Modular Variables below
#Define GEE asset/location of desired dataset (Formatted CSV must be uploaded to your GEE assets with Lat/Long columns defined 
#before starting)
Taxa_og = ee.FeatureCollection(gee_path+'EBT_data/EBT_mfish_data_presence_heuristic')
coll = ee.FeatureCollection(Taxa_og) 
distance = 500

In [None]:
# Spatially thin locations and export to asset
# Performs the spatial thinning algorithm on each year separately
feats = s_dates.map(lambda x: filter_date_space(x,coll,distance))

# Combine each of the resultant filtered collections
first = ee.FeatureCollection(Taxa_og)
spatially_thin = ee.FeatureCollection(feats.iterate(merge_coll, first))

In [None]:
export3 = ee.batch.Export.table.toAsset(collection = spatially_thin,
                    description = 'EBT_SThin', # n<-------- CHANGE NAME FOR DIFFERENT DATA
                    assetId = gee_path+'EBT_data/EBT_SThin') # <----- CHANGE Export location FOR DIFFERENT USER

export3.start()

In [None]:
#This list dictates what years will be exported for both the Yearly Covariate Images and the Yearly Training CSVS
# can this be changed to a list for intermitent datasets missing years? Empty outputs causes issues later on....
import time
# Enter start year for Y and end year for Y
years = [str(y) for y in list(range(2002, 2005))]  ##FIXME: hardcoded


In [None]:
# Export data using python API magic
# Define geometry by changing state name so we can export the whole state at once
states = ee.FeatureCollection("TIGER/2016/States")
#Enter state 2-digit abbreviation for study area
geometry = states.filter(ee.Filter.eq('NAME',STATE)).geometry()

In [None]:
# Shape file containing HUC polygons
HUC = ee.FeatureCollection("USGS/WBD/2017/HUC12")
# Choose state to clip HUC by. Change Abbreviation to match dataset 
#Enter state full name for X (i.e., Illinois/ look at dataset for formats for this stuff)
HUC_clip = HUC.filter(ee.Filter.eq('states',state_abbrevs[STATE]))

In [None]:
#embed observation Year as system:start_time for thinned dataset 
# We have had to add this "Year Column" manually to the datasets.  Make sure your dataset has correct column headings
SThin_map = SThin.map(embedd_date)

In [None]:
## Define helper filters and lists to iterate over
#========================================================
# Build Lists from which to map over
#========================================================
# List from which absences will be built
ee_dates = ee.List(s_dates)


In [None]:

#========================================================
# Run covariate algorithm and build a list of images
# with each image corresponding to each year and each band corresponding to each covariate
#========================================================

banded_images_list = bac.build_all_cubes(start_year, end_year)



In [None]:
#Skip this step if you already have them stored in GEE
#Export Yearly Covariate Images

# Export each image within the for loop
for i,y in zip(range(len(years)), years):
    print("Starting", y)
    img = ee.Image(ee.List(banded_images_list).get(ee.Number(i)))
    export = ee.batch.Export.image.toAsset(image = img,
                    description = 'covariate_'+y,
                    assetId = ('users/mstokowski/covariates/covariates_test') +y, 
                    region = ee.Geometry(geometry),
                    scale =  100,
                    maxPixels = 1e13)
    export.start()
    
    print(y,"status:    ", export.status()['state'])

    # Wait for 30 seconds so that the export['state'] gives insightful information
    time.sleep(15)
    print(y,"status:    ", export.status()['state'])
    
    
    # If this status is "RUNNING", then there are no egretious syntax errors. 
    # However, it is still possible that these export commands fail after more than 30 seconds.
    # In that case, it is likely that there is a Computation Time Out Error (remember exporting the annual stacks)
    time.sleep(15)
    print(y,"status:    ", export.status()['state'])
    

In [None]:
#Start Here if you have yearly covariates created

#Export training CSVs
## Reduce Regions from existing images

# COVARIATE IMAGES  

path = assetId
years = range(start_year, 2005)
images = list(map(lambda x: ee.Image(path + str(x)), years))
banded_images_asset_list = ee.List(images)

for i in range(len(years)):
    print("Starting", start_year+i)
    
    img = ee.Image(banded_images_asset_list.get(i))
    data = reduce_HUCS(img,SThin_map,HUC_clip) 
    
    ## PYTHON API MAGIC!! LOOK HERE
    my_csv = pd.DataFrame([x['properties'] for x in data.getInfo()['features']])
    
    # From there, we can write it directly to our directory and stitch it together afterwards
    my_csv.to_csv((trainingdata) + str(2002+i) + '.csv', index=False) 
    print("Finished", start_year+i)

In [None]:
#export the information that we will use to project habitat suitability. 
#Decades were convenient for RBT, but not other taxa with less data/ we can change.
# Change to match dataset

#Can we set this up such that this is automatically defined when we define the year range above?
first_decade = ee.ImageCollection.fromImages(images[0:7]).mean()

#second_decade = ee.ImageCollection.fromImages(images[7:]).mean()


In [None]:
# Export these data as csvs
first_decade_img = ee.Image(first_decade)

first_csv = first_decade_img.reduceRegions(**{
                              'collection': HUC_clip,
                              'reducer': ee.Reducer.mean(),
                              'crs': 'EPSG:4326',
                              'scale': 100,
                              'tileScale': 16})

#PYTHON API MAGIC!! LOOK HERE
first_decade_data = pd.DataFrame([x['properties'] for x in first_csv.getInfo()['features']])

# From there, we can write it directly to our directory and stitch it together afterwards
#maybe we should think about 2 and 5 year bins due to limitations of datasets for some taxa/ to make more useful for managers
first_decade_data.to_csv(decade1)



In [None]:
import src.ml_funs