# Import Google Earth Engine python package (ee)
# Connect your service account (found in Google Cloud > IAM & Admin > Service Accounts)
# Load your Service Account Credientials (found in Google Cloud > APIs & Services > Credentials) through a JSON Key.
# https://developers.google.com/earth-engine/guides/service_account
# Note: Never share your personal JSON key, or upload it to github, etc. It should live in a private location.
# You may also need to register your service account with Earth Engine, using the same link as above.

In [1]:
#!/usr/bin/python

# Connect GCE service account to Earth engine API
# Note: Accessing EE Api through Cloud requires connecting your service account through a JSON Key
# https://gis.stackexchange.com/questions/350527/authentication-issue-earth-engine-python-using-ee-serviceaccountcredentials
# 

import ee
service_account = 'earthengine-lakex055-oct2022@spurgeeecloudprojectv2oct2022.iam.gserviceaccount.com'
credentials = ee.ServiceAccountCredentials(service_account, '/home/moeller/lakex055/LeafySpurgeDemography/jsonKeys/spurgeeecloudprojectv2oct2022-557c961f0e42.json')
ee.Initialize(credentials)



# If you want to connect to google cloud, or manually connect your google earth engine account to this notebook, reference the code blocks below.

In [None]:
# Connect to to google cloud

# import os
# from google.cloud import storage
# import gcloud
# from google.oauth2 import service_account

# # Set environment variables
# # Set environment variable GOOGLE_APPLICATION_CREDENTIALS to the path to a service account credentials file
# os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = '/home/moeller/lakex055/LeafySpurgeDemography/jsonKeys/pacific-engine-346519-244161a98ea0.json'

# # Solves issue connecting SSL cert request to google cloud storage bucket
# #https://stackoverflow.com/questions/63177156/tensorflow-dataloading-issue
# os.environ['CURL_CA_BUNDLE'] = "/etc/ssl/certs/ca-bundle.crt"

# SCOPES = ['https://www.googleapis.com/auth/drive']

# SERVICE_ACCOUNT_FILE = '/home/moeller/lakex055/LeafySpurgeDemography/jsonKeys/pacific-engine-346519-244161a98ea0.json'
# credentials = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)

# ee.Initialize(credentials)


In [None]:
# Manually connect to google drive
#import ee
#ee.Authenticate()
#ee.Initialize()

# To automate export to google drive, one must:
# 1. Enable the Google Drive API on their Google Cloud Project Service Account
# 2. Share their target Google Drive folder (in their UMN account) with their google drive service account

# https://stackoverflow.com/questions/45492703/google-drive-api-oauth-and-service-account
# https://stackoverflow.com/questions/55882991/gee-python-api-export-image-to-google-drive-fails?rq=1


In [None]:
# Specify Arguments for file input (to select bounding box and extract training datasets)
# Used in Python script version (here I just specify a value for testing)
#from sys import argv

# Argument (interger) specifies what bounding box to select for sampling data
#input_value = int(argv[1])

# Other Python Library Imports

In [2]:
# Other module imports

# Use Conda environment: earthengine

import os
import pandas as pd
import numpy as np
import datetime
import pprint
import time
from functools import reduce
from pprint import pprint
import geemap #some advanced python functions for GEE
import fsspec # file system specification


In [None]:
# Tensorflow setup v 2.9.1

#import tensorflow as tf
#print(tf.__version__)

Load Model Functions (documentation needed)

In [3]:
# Define a function to transfer feature properties to a dictionary.
def fc_to_dict(fc):
  prop_names = fc.first().propertyNames()
  prop_lists = fc.reduceColumns(
      reducer=ee.Reducer.toList().repeat(prop_names.size()),
      selectors=prop_names).get('list')

  return ee.Dictionary.fromLists(prop_names, prop_lists)


#Cloud Mask: https://gis.stackexchange.com/questions/274048/apply-cloud-mask-to-landsat-imagery-in-google-earth-engine-python-api
def getQABits(image, start, end, mascara): 
    # Compute the bits we need to extract.
    pattern = 0
    for i in range(start,end+1):
        pattern += 2**i
    # Return a single band image of the extracted QA bits, giving the     band a new name.
    return image.select([0], [mascara]).bitwiseAnd(pattern).rightShift(start)


#Saturated band Mask: https://gis.stackexchange.com/questions/363929/how-to-apply-a-bitmask-for-radiometric-saturation-qa-in-a-image-collection-eart
def bitwiseExtract(value, fromBit, toBit):
  maskSize = ee.Number(1).add(toBit).subtract(fromBit)
  mask = ee.Number(1).leftShift(maskSize).subtract(1)
  return value.rightShift(fromBit).bitwiseAnd(mask)


#Function to mask out cloudy and saturated pixels and harmonize between Landsat 5/7/8 imagery 
def maskQuality(image):
    # Select the QA band.
    QA = image.select('QA_PIXEL')
    # Get the internal_cloud_algorithm_flag bit.
    sombra = getQABits(QA,3,3,'cloud_shadow')
    nubes = getQABits(QA,5,5,'cloud')
    #  var cloud_confidence = getQABits(QA,6,7,  'cloud_confidence')
    cirrus_detected = getQABits(QA,9,9,'cirrus_detected')
    #var cirrus_detected2 = getQABits(QA,8,8,  'cirrus_detected2')
    #Return an image masking out cloudy areas.
    QA_radsat = image.select('QA_RADSAT')
    saturated = bitwiseExtract(QA_radsat, 1, 7)

    #Apply the scaling factors to the appropriate bands.
    def getFactorImg(factorNames):
      factorList = image.toDictionary().select(factorNames).values()
      return ee.Image.constant(factorList)

    scaleImg = getFactorImg(['REFLECTANCE_MULT_BAND_.|TEMPERATURE_MULT_BAND_ST_B10'])

    offsetImg = getFactorImg(['REFLECTANCE_ADD_BAND_.|TEMPERATURE_ADD_BAND_ST_B10'])
    
    scaled = image.select('SR_B.|ST_B10').multiply(scaleImg).add(offsetImg)

    #Replace original bands with scaled bands and apply masks.
    return image.addBands(scaled, None, True).updateMask(sombra.eq(0)).updateMask(nubes.eq(0).updateMask(cirrus_detected.eq(0).updateMask(saturated.eq(0))))


# Selects and renames bands of interest for Landsat OLI.
def renameOli(img):
  return img.select(
    ['SR_B2', 'SR_B3', 'SR_B4', 'SR_B5', 'SR_B6', 'SR_B7', 'QA_PIXEL', 'QA_RADSAT'],
    ['Blue', 'Green', 'Red', 'NIR', 'SWIR1', 'SWIR2', 'QA_PIXEL', 'QA_RADSAT'])


# Selects and renames bands of interest for TM/ETM+.
def renameEtm(img):
  return img.select(
    ['SR_B1', 'SR_B2', 'SR_B3', 'SR_B4', 'SR_B5', 'SR_B7', 'QA_PIXEL', 'QA_RADSAT'],
    ['Blue', 'Green', 'Red', 'NIR', 'SWIR1', 'SWIR2', 'QA_PIXEL', 'QA_RADSAT'])


# Adding a NDVI band
def addNDVI(image):
  ndvi = image.normalizedDifference(['NIR', 'Red']).toDouble().rename('NDVI')
  return image.addBands([ndvi])


def mapDates(image):
  date = ee.Date(image.get('system:time_start')).format("YYYY-MM-dd")
  return image.addBands([date])

# Prepares (renames) OLI images.
def prepOli(img):
  img = renameOli(img)
  return img


# Prepares (renames) TM/ETM+ images.
def prepEtm(img):
  orig = img
  img = renameEtm(img)
  return ee.Image(img.copyProperties(orig, orig.propertyNames()))


# Selects and renames bands of interest for TM/ETM+.
def renameImageBands_TM(img, year, season):
  return img.select(
      ['Blue_median', 'Green_median', 'Red_median', 'NIR_median', 
       'SWIR1_median', 'SWIR2_median', 'NDVI_median'],
      ['Blue'+str(season)+str(year), 'Green'+str(season)+str(year), 'Red'+str(season)+str(year), 'NIR'+str(season)+str(year),
       'SWIR1'+str(season)+str(year), 'SWIR2'+str(season)+str(year), 'NDVI'+str(season)+str(year)])

# Selects and renames bands of interest for TM/ETM+.
def renameImageBands_ETMOLI(img, year, season):
  return img.select(
      ['Blue_median_median', 'Green_median_median', 'Red_median_median', 'NIR_median_median', 
       'SWIR1_median_median', 'SWIR2_median_median', 'NDVI_median_median'],
      ['Blue'+str(season)+str(year), 'Green'+str(season)+str(year), 'Red'+str(season)+str(year), 'NIR'+str(season)+str(year),
       'SWIR1'+str(season)+str(year), 'SWIR2'+str(season)+str(year), 'NDVI'+str(season)+str(year)])


def getLandsatMosaicFromPoints(year, points):
  '''
  #Time-series extraction developed from
  #https://developers.google.com/earth-engine/tutorials/community/time-series-visualization-with-altair#combine_dataframes  

  '''

  #if Year is between 1985 and 1999 use Landsat 5 TM imagery
  if 1985 <= year <= 1999:

    tmColMarchApril = ee.ImageCollection('LANDSAT/LT05/C02/T1_L2') \
      .filterDate('{}-03-01'.format(year), '{}-04-30'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepEtm) \
      .map(addNDVI) \
      .reduce('median')

    tmColMarchApril = renameImageBands_TM(tmColMarchApril, year, 'MarchApril')

    tmColMayJune = ee.ImageCollection('LANDSAT/LT05/C02/T1_L2') \
      .filterDate('{}-05-01'.format(year), '{}-06-30'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepEtm) \
      .map(addNDVI) \
      .reduce('median')

    tmColMayJune = renameImageBands_TM(tmColMayJune, year, 'MayJune')

    tmColJulyAug = ee.ImageCollection('LANDSAT/LT05/C02/T1_L2') \
      .filterDate('{}-07-01'.format(year), '{}-08-31'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepEtm) \
      .map(addNDVI) \
      .reduce('median')

    tmColJulyAug = renameImageBands_TM(tmColJulyAug, year, 'JulyAug')

    landsat5ImageCol = [tmColMarchApril, tmColMayJune, tmColJulyAug]
    return landsat5ImageCol

  #if Year is between 2000 and 2012 use mosaic from Landsat 5 TM and Landsat 7 ETM imagery
  elif 2000 <= year <= 2012:

    etmColMarchApril = ee.ImageCollection('LANDSAT/LE07/C02/T1_L2') \
      .filterDate('{}-03-01'.format(year), '{}-04-30'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepEtm) \
      .map(addNDVI) \
      .reduce('median')

    tmColMarchApril = ee.ImageCollection('LANDSAT/LT05/C02/T1_L2') \
      .filterDate('{}-03-01'.format(year), '{}-04-30'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepEtm) \
      .map(addNDVI) \
      .reduce('median')

    MarchApril = ee.ImageCollection([etmColMarchApril, tmColMarchApril])

    etmColMarchApril = MarchApril.reduce('median')

    etmColMarchApril = renameImageBands_ETMOLI(etmColMarchApril, year, 'MarchApril')

    etmColMayJune = ee.ImageCollection('LANDSAT/LE07/C02/T1_L2') \
      .filterDate('{}-05-01'.format(year), '{}-06-30'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepEtm) \
      .map(addNDVI) \
      .reduce('median')

    tmColMayJune = ee.ImageCollection('LANDSAT/LT05/C02/T1_L2') \
      .filterDate('{}-05-01'.format(year), '{}-06-30'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepEtm) \
      .map(addNDVI) \
      .reduce('median')

    MayJune = ee.ImageCollection([etmColMayJune, tmColMayJune])

    etmColMayJune = MayJune.reduce('median')

    etmColMayJune = renameImageBands_ETMOLI(etmColMayJune, year, 'MayJune')

    etmColJulyAug = ee.ImageCollection('LANDSAT/LE07/C02/T1_L2') \
      .filterDate('{}-07-01'.format(year), '{}-08-31'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepEtm) \
      .map(addNDVI) \
      .reduce('median')

    tmColJulyAug = ee.ImageCollection('LANDSAT/LT05/C02/T1_L2') \
      .filterDate('{}-07-01'.format(year), '{}-08-31'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepEtm) \
      .map(addNDVI) \
      .reduce('median')

    JulyAug = ee.ImageCollection([etmColJulyAug, tmColJulyAug])

    etmColJulyAug = JulyAug.reduce('median')

    etmColJulyAug = renameImageBands_ETMOLI(etmColJulyAug, year, 'JulyAug')

    landsat5_7ImageCol = [etmColMarchApril, etmColMayJune, etmColJulyAug]
    return landsat5_7ImageCol

  #if Year is between 2013 and 2020 use mosaic from Landsat 7 ETM and Landsat 8 OLI imagery
  elif 2013 <= year <= 2020:

    etmColMarchApril = ee.ImageCollection('LANDSAT/LE07/C02/T1_L2') \
      .filterDate('{}-03-01'.format(year), '{}-04-30'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepEtm) \
      .map(addNDVI) \
      .reduce('median')

    oliColMarchApril = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2') \
      .filterDate('{}-03-01'.format(year), '{}-04-30'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepOli) \
      .map(addNDVI) \
      .reduce('median')

    MarchApril = ee.ImageCollection([etmColMarchApril, oliColMarchApril])

    etmColMarchApril = MarchApril.reduce('median')

    etmColMarchApril = renameImageBands_ETMOLI(etmColMarchApril, year, 'MarchApril')

    etmColMayJune = ee.ImageCollection('LANDSAT/LE07/C02/T1_L2') \
      .filterDate('{}-05-01'.format(year), '{}-06-30'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepEtm) \
      .map(addNDVI) \
      .reduce('median')

    oliColMayJune = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2') \
      .filterDate('{}-05-01'.format(year), '{}-06-30'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepOli) \
      .map(addNDVI) \
      .reduce('median')

    MayJune = ee.ImageCollection([etmColMayJune, oliColMayJune])

    etmColMayJune = MayJune.reduce('median')

    etmColMayJune = renameImageBands_ETMOLI(etmColMayJune, year, 'MayJune')

    etmColJulyAug = ee.ImageCollection('LANDSAT/LE07/C02/T1_L2') \
      .filterDate('{}-07-01'.format(year), '{}-08-31'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepEtm) \
      .map(addNDVI) \
      .reduce('median') \

    oliColJulyAug = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2') \
      .filterDate('{}-07-01'.format(year), '{}-08-31'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepOli) \
      .map(addNDVI) \
      .reduce('median')

    JulyAug = ee.ImageCollection([etmColJulyAug, oliColJulyAug])

    etmColJulyAug = JulyAug.reduce('median')

    etmColJulyAug = renameImageBands_ETMOLI(etmColJulyAug, year, 'JulyAug')

    landsat7_8ImageCol = [etmColMarchApril, etmColMayJune, etmColJulyAug]

    return landsat7_8ImageCol

# Add lat/long coordinate points to the landsat sample features
def AddPointCoordinates(feature):
    coordinate = feature.geometry().transform('epsg:4326').coordinates() #returns an ee.List object with lat/long for each point
    resul = feature.set('lon', coordinate.get(0),'lat', coordinate.get(1))
    return resul.setGeometry(None)

def sampleImagestoDataFrame(listofEEImages):
    '''
    Function takes in a list of three images from a Landsat imagery year (T1, T2, T3)
    Returns a merged pandas dataframe of dimensions (rows/samples x bands) ordered from t-1, t, t+1
    '''
    image1 = listofEEImages[0]
    image2 = listofEEImages[1]
    image3 = listofEEImages[2]

    image1_fc = image1.sampleRegions(collection=newpts, properties=['class'], scale=30)
    image2_fc = image2.sampleRegions(collection=newpts, properties=['class'], scale=30)
    image3_fc = image3.sampleRegions(collection=newpts, properties=['class'], scale=30)

    image1_db_dict = fc_to_dict(image1_fc).getInfo()
    image2_db_dict = fc_to_dict(image2_fc).getInfo()
    image3_db_dict = fc_to_dict(image3_fc).getInfo()

    image1_df = pd.DataFrame(image1_db_dict)
    image2_df = pd.DataFrame(image2_db_dict)
    image3_df = pd.DataFrame(image3_db_dict)

    data_frames = [image1_df, image2_df, image3_df]

    df_merged = reduce(lambda left,right: pd.merge(left, right, on='system:index', how='outer'), data_frames).fillna(np.nan)

    df_merged_dropna = df_merged.dropna(axis=0, how = 'any')

    return df_merged_dropna



In [4]:


#Generate Bounding Box Coordinate List for Study Region ###
#Starting position of bounding box
XY_topLeft = [-116.976099, 48.904682]
XY_topRight = [-115.976099, 48.904682]
XY_bottomLeft = [-116.976099, 47.904682]
XY_bottomRight = [-115.976099, 47.904682]

lon_range = 31 #study area spans 31 deg lon
lat_range = 13 #study area spans 12 deg lat

stepSize = 1 #step by 1 degree of long/latitude

def sliding_window(longitude_range, latitude_range, stepSize_box):
    lon_list = []
    lat_list = []
    for lon in range(0, longitude_range, stepSize_box):
      for lat in range(0, latitude_range,stepSize_box):
        lon_list.append(lon)
        lat_list.append(lat)
    
    return(lon_list, lat_list)

def bbox(longitude_range, latitude_range, stepSize_box, topLeft_coord, topRight_coord, bottomLeft_coord, bottomRight_coord, origin_coord):
  #Creates a sliding window across the lat/long range
  #Returns a list of all lat/long boxes to sample 
     
  lon_list, lat_list = sliding_window(longitude_range, latitude_range, stepSize_box) #Generates two lists: one of longitude[0-31] and one of latitude [0-12] defining study region

  #Top Left Coordinates for BBox
  lon_list_X_topLeft = [x + topLeft_coord[0] for x in lon_list]
  lat_list_Y_topLeft = [abs(x - topLeft_coord[1]) for x in lat_list]
  XY_topLeft_list = list(zip(lon_list_X_topLeft, lat_list_Y_topLeft))

  #Bottom Left Coordinates for BBox
  lon_list_X_bottomLeft = [x + bottomLeft_coord[0] for x in lon_list]
  lat_list_Y_bottomLeft = [abs(x - bottomLeft_coord[1]) for x in lat_list]
  XY_bottomLeft_list = list(zip(lon_list_X_bottomLeft, lat_list_Y_bottomLeft))

  #Top Right Coordinates for BBox
  lon_list_X_topRight = [x + topRight_coord[0] for x in lon_list]
  lat_list_Y_topRight = [abs(x - topRight_coord[1]) for x in lat_list]
  XY_topRight_list = list(zip(lon_list_X_topRight, lat_list_Y_topRight))

  #Bottom Right Coordinates for BBox
  lon_list_X_bottomRight = [x + bottomRight_coord[0] for x in lon_list]
  lat_list_Y_bottomRight = [abs(x - bottomRight_coord[1]) for x in lat_list]
  XY_bottomRight_list = list(zip(lon_list_X_bottomRight, lat_list_Y_bottomRight))

  ### Bounding Box Coordinate List
  bbox = list(zip(XY_topLeft_list, XY_topRight_list, XY_bottomLeft_list, XY_bottomRight_list, XY_topLeft_list))

  return bbox


bbox_windows = bbox(lon_range, lat_range, stepSize, XY_topLeft, XY_topRight, XY_bottomRight, XY_bottomLeft, XY_topLeft)

print(len(bbox_windows))

print(bbox_windows[1]) #First bounding box (top left of study region)
print(bbox_windows[-1]) #Last bounding box (bottom right of study region)


403
((-116.976099, 47.904682), (-115.976099, 47.904682), (-115.976099, 46.904682), (-116.976099, 46.904682), (-116.976099, 47.904682))
((-86.976099, 36.904682), (-85.976099, 36.904682), (-85.976099, 35.904682), (-86.976099, 35.904682), (-86.976099, 36.904682))


In [5]:
# Code for making smaller bounding boxes within each larger 1degree lat/long box

#Top left coordinate of first bounding box window, as a list
topleft_coord = list(bbox_windows[0][0])
#Top right coordinate of first bounding box window, as a list
topright_coord = list(bbox_windows[0][1])
bottomright_coord = list(bbox_windows[0][2])
bottomleft_coord = list(bbox_windows[0][3])

#Return evenly spaced numbers between top left and top right coordinate
row_coords = np.linspace(topleft_coord[0], topright_coord[0], num=10)

#Return evenly spaced numbers between top right and bottom right coordinate
col_coords = np.linspace(topleft_coord[1], bottomleft_coord[1], num=10)

# Create a list of points that would correspond to a grid
g = np.meshgrid(row_coords, col_coords)
grid = np.append(g[0].reshape(-1,1),g[1].reshape(-1,1),axis=1).tolist()

bbox_list = []

#Logic for small tiles within large tile
for j in range(0, 90, 10):
    for i in range(0, 9, 1):
        #print(j+i, j+i+1, j+i+11, j+i+10, j+i)
        #print(grid[j+i], grid[j+i+1], grid[j+i+11], grid[j+i+10], grid[j+i])
        origin = list(grid[j+i])
        topright = list(grid[j+i+1])
        bottomright = list(grid[j+i+11])
        bottomleft = list(grid[j+i+10])
        bbox = [origin, topright, bottomright, bottomleft, origin]
        bbox_list.append(bbox)


bbox_list

[[[-116.976099, 48.904682],
  [-116.86498788888889, 48.904682],
  [-116.86498788888889, 48.79357088888889],
  [-116.976099, 48.79357088888889],
  [-116.976099, 48.904682]],
 [[-116.86498788888889, 48.904682],
  [-116.75387677777778, 48.904682],
  [-116.75387677777778, 48.79357088888889],
  [-116.86498788888889, 48.79357088888889],
  [-116.86498788888889, 48.904682]],
 [[-116.75387677777778, 48.904682],
  [-116.64276566666668, 48.904682],
  [-116.64276566666668, 48.79357088888889],
  [-116.75387677777778, 48.79357088888889],
  [-116.75387677777778, 48.904682]],
 [[-116.64276566666668, 48.904682],
  [-116.53165455555556, 48.904682],
  [-116.53165455555556, 48.79357088888889],
  [-116.64276566666668, 48.79357088888889],
  [-116.64276566666668, 48.904682]],
 [[-116.53165455555556, 48.904682],
  [-116.42054344444445, 48.904682],
  [-116.42054344444445, 48.79357088888889],
  [-116.53165455555556, 48.79357088888889],
  [-116.53165455555556, 48.904682]],
 [[-116.42054344444445, 48.904682],
  [

# Define Variables

This is a set of global variables used throughout the notebook. You must have a Google Cloud Storage bucket into which you can write files. You'll also need to specify your Earth Engine username i.e. users/USER_NAME.

In [7]:

# Define export for feature class assets
#OUTPUT_BUCKET = 'landcover_samples_nlcd2019_tfrecord_june2022_v2'

# Make sure the bucket exists.
#print('Found Cloud Storage bucket.' if tf.io.gfile.exists('gs://' + OUTPUT_BUCKET) 
#  else 'Output Cloud Storage bucket does not exist.')

TRAIN_FILE_PREFIX = 'Training_nlcd2019'
TEST_FILE_PREFIX = 'Testing_nlcd2019'
VALID_FILE_PREFIX = 'Validation_nlcd2019'

file_extension = '.tfrecord.gz'

IMAGE_FILE_PREFIX = 'landsat_image'

#TRAIN_FILE_PATH = 'gs://' + OUTPUT_BUCKET + '/' + TRAIN_FILE_PREFIX + file_extension
#TEST_FILE_PATH = 'gs://' + OUTPUT_BUCKET + '/' + TEST_FILE_PREFIX + file_extension
#VALID_FILE_PATH = 'gs://' + OUTPUT_BUCKET + '/' + TEST_FILE_PREFIX + file_extension

USER_NAME = 'lakex055'

# File name for the prediction (image) dataset.  The trained model will read
# this dataset and make predictions in each pixel.
#IMAGE_FILE_PREFIX = 'spurge_temporalcnn_demo'

# The output path for the classified image (i.e. predictions) TFRecord file.
#OUTPUT_IMAGE_FILE = 'gs://' + OUTPUT_BUCKET + '/spurge_temporalcnndemo.TFRecord'

# The name of the Earth Engine asset to be created by importing
# the classified image from the TFRecord file in Cloud Storage.
#OUTPUT_ASSET_ID = 'users/' + USER_NAME + '/spurge_temporalcnndemo'


BANDS = ['0_BlueMarchApril2018',
 '0_GreenMarchApril2018',
 '0_RedMarchApril2018',
 '0_NIRMarchApril2018',
 '0_SWIR1MarchApril2018',
 '0_SWIR2MarchApril2018',
 '0_NDVIMarchApril2018',
 '0_BlueMayJune2018',
 '0_GreenMayJune2018',
 '0_RedMayJune2018',
 '0_NIRMayJune2018',
 '0_SWIR1MayJune2018',
 '0_SWIR2MayJune2018',
 '0_NDVIMayJune2018',
 '0_BlueJulyAug2018',
 '0_GreenJulyAug2018',
 '0_RedJulyAug2018',
 '0_NIRJulyAug2018',
 '0_SWIR1JulyAug2018',
 '0_SWIR2JulyAug2018',
 '0_NDVIJulyAug2018',
 '1_BlueMarchApril2019',
 '1_GreenMarchApril2019',
 '1_RedMarchApril2019',
 '1_NIRMarchApril2019',
 '1_SWIR1MarchApril2019',
 '1_SWIR2MarchApril2019',
 '1_NDVIMarchApril2019',
 '1_BlueMayJune2019',
 '1_GreenMayJune2019',
 '1_RedMayJune2019',
 '1_NIRMayJune2019',
 '1_SWIR1MayJune2019',
 '1_SWIR2MayJune2019',
 '1_NDVIMayJune2019',
 '1_BlueJulyAug2019',
 '1_GreenJulyAug2019',
 '1_RedJulyAug2019',
 '1_NIRJulyAug2019',
 '1_SWIR1JulyAug2019',
 '1_SWIR2JulyAug2019',
 '1_NDVIJulyAug2019',
 '2_BlueMarchApril2020',
 '2_GreenMarchApril2020',
 '2_RedMarchApril2020',
 '2_NIRMarchApril2020',
 '2_SWIR1MarchApril2020',
 '2_SWIR2MarchApril2020',
 '2_NDVIMarchApril2020',
 '2_BlueMayJune2020',
 '2_GreenMayJune2020',
 '2_RedMayJune2020',
 '2_NIRMayJune2020',
 '2_SWIR1MayJune2020',
 '2_SWIR2MayJune2020',
 '2_NDVIMayJune2020',
 '2_BlueJulyAug2020',
 '2_GreenJulyAug2020',
 '2_RedJulyAug2020',
 '2_NIRJulyAug2020',
 '2_SWIR1JulyAug2020',
 '2_SWIR2JulyAug2020',
 '2_NDVIJulyAug2020']

LABEL = 'class'

# Number of label values, i.e. number of classes in the classification.
N_CLASSES = 10

# These names are used to specify properties in the export of
# training/testing data and to define the mapping between names and data
# when reading into TensorFlow datasets.
FEATURE_NAMES = list(BANDS)
FEATURE_NAMES.append(LABEL)


# List all objects currently in google cloud storage bucket (modify this for google drive)

In [None]:

# # Total number of files
# bucket_files = tf.io.gfile.listdir('gs://' + OUTPUT_BUCKET)
# #print(bucket_files)

# # File prefix for images
# IMAGE_FILE_PREFIX = 'spurge_temporalcnn_demo'

# # File prefix for training data 
# DATA_PREFIX = 'Training_nlcd2019'

# # Get a list of all the files in the output bucket.
# files_list = tf.io.gfile.listdir('gs://' + OUTPUT_BUCKET)

# # Get only the files generated by the image export.
# exported_files_list = [s for s in files_list if IMAGE_FILE_PREFIX in s]

# training_files_list = [t for t in files_list if DATA_PREFIX in t]

# print(training_files_list[0:5])

# # Get the list of image files and the JSON mixer file.
# image_files_list = []
# json_files_list = []
                       
# for f in exported_files_list:
#   if f.endswith('.tfrecord.gz'):
#     image_files_list.append(f)
#   elif f.endswith('.json'):
#     json_files_list.append(f)
    
                       
# training_files = []
# for t in training_files_list:
#     training_files.append(t)

                                  
# # Make sure the files are in the right order.
# image_files_list.sort()
# training_files.sort()


# # Print out the list of training tiles with training data and their corresponding index along the study area (range from 0 to ~370)
# # Note, some numbers may be skipped, if they fall outside of the study area with no data available
# training_files_split = [i.split('_')[2] for i in training_files]
# training_files_split2 = [j.split('.')[0] for j in training_files_split] #Intergers corresponding to tile number for sampling
# training_tile_list_intergers = [int(x) for x in training_files_split2]
# training_tile_list_intergers.sort()
# print(training_tile_list_intergers)

# # One JSON file is exported per image tile
# print(len(training_files))
# print(len(json_files_list))


# Generate Landsat Images from Small Tiles

In [None]:

# Specify tile input value to determine extent of selected Landsat samples
input_value = 302 #This value can be modified from python script via a Slurm job array!

#define years to sample data (corresponds to satellite image year)
years = [2018, 2019, 2020]

#Training points for leafy spurge & land cover classes (defines extent of landsat imagery)

#Load 1m training points sampled from 2019 NLCD and leafy spurge from 2018-2019-2020
pts = ee.FeatureCollection('projects/ee-lakex055/assets/leafyspurge_landcover_points_nlcd2019_sept2022')

print('Tile ' + str(input_value))

# Define Bounding Box
#bbox = bbox_windows[input_value]
#print(bbox)

# Define a custom, small bounding box.
#bbox = ((-93.47017, 44.74670), (-93.43953, 44.74694), (-93.43972, 44.72475), (-93.46871, 44.72493), (-93.47017, 44.74670))\

# Define small bounding boxes around larger bbox_windows
#Top left coordinate of first bounding box window, as a list
topleft_coord = list(bbox_windows[input_value][0])
topright_coord = list(bbox_windows[input_value][1])
bottomright_coord = list(bbox_windows[input_value][2])
bottomleft_coord = list(bbox_windows[input_value][3])

# Return evenly spaced numbers between top left and top right coordinate (X dimension)
row_coords = np.linspace(topleft_coord[0], topright_coord[0], num=10)

# Return evenly spaced numbers between top right and bottom right coordinate (Y dimension)
col_coords = np.linspace(topleft_coord[1], bottomleft_coord[1], num=10)

# Create a list of points that would correspond to a grid
g = np.meshgrid(row_coords, col_coords)
grid = np.append(g[0].reshape(-1,1),g[1].reshape(-1,1),axis=1).tolist()

bbox_list = []

# Logic for small bbox tiles within large bbox_windows tile
for j in range(0, 90, 10):
    for i in range(0, 9, 1):
        #print(j+i, j+i+1, j+i+11, j+i+10, j+i)
        #print(grid[j+i], grid[j+i+1], grid[j+i+11], grid[j+i+10], grid[j+i])
        origin = list(grid[j+i])
        topright = list(grid[j+i+1])
        bottomright = list(grid[j+i+11])
        bottomleft = list(grid[j+i+10])
        bbox = [origin, topright, bottomright, bottomleft, origin]
        bbox_list.append(bbox)

#Sample one of the small bounding boxes
print(bbox_list[0])
#type(bbox_list) list type
#len(bbox_list) #length 84

for k in range(0, len(bbox_list)):
    print(k)
    # Filter points based on AOI
    aoi = ee.Geometry.Polygon(bbox_list[k])
    #print(aoi)

    #Apply Filter
    newpts = pts.filterBounds(aoi)

    #How many points?
    count = newpts.size() #returns an EE.Number object that we need to convert to an interger
    num_points = int(count.getInfo())
    print('Number of Points within AOI (Count): ', str(count.getInfo())+'\n')
    
    if num_points > 0:
        # Sample imagery in a year filtered by input points
        # Output is a list of length 3 EEimages, corresponding to three seasons in a year (e.g 2018: MarchApril, MayJune, JulyAug)
        LandsatCol_year0 = getLandsatMosaicFromPoints(years[0], newpts)

        LandsatCol_year1 = getLandsatMosaicFromPoints(years[1], newpts)

        LandsatCol_year2 = getLandsatMosaicFromPoints(years[2], newpts)

        LandsatCol_timeseries = ee.ImageCollection([LandsatCol_year0, LandsatCol_year1, LandsatCol_year2])

        LandsatCol_timeseries_image = LandsatCol_timeseries.toBands().multiply(10000)
        #LandsatCol_timeseries_image.bandNames().getInfo()
         # Export imagery in this region.
        EXPORT_REGION = aoi

        # Specify patch and file dimensions.
        #image_export_options = {
        #  'patchDimensions': [512, 512],
        #  'maxFileSize': 104857600,
        #  'compressed': True
        #}

        # Setup the task.
        # https://developers.google.com/earth-engine/apidocs/export-image-todrive
        #image_task = ee.batch.Export.image.toDrive(
        #  image=LandsatCol_timeseries_image,
        #  description='Image_Export',
        #  folder='test_landsat_GEE_image_export',
        #  fileNamePrefix=IMAGE_FILE_PREFIX + "_" + str(input_value) + "_",
        #  scale=30,
        #  fileFormat='TFRecord',
        #  region=EXPORT_REGION.toGeoJSON()['coordinates'],
        #  formatOptions=image_export_options,
        #)

        #Takes 8*120 seconds to complete

        image_task = ee.batch.Export.image.toDrive(
          image=LandsatCol_timeseries_image.toInt16(),
          description='Image_Export',
          folder='test_landsat_GEE_image_export',
          fileNamePrefix=IMAGE_FILE_PREFIX + "_" + str(input_value) + "_" + str(k),
          scale=30,
          fileFormat='GeoTIFF',
          region=EXPORT_REGION.toGeoJSON()['coordinates'],
          shardSize=256, #must be no greater than 1024; larger shardSize --> larger tile
          fileDimensions=10240, #must be a multiple of 1024 and no greater than 131072
        )

        # Start the task.
        image_task.start() #takes around 20 minutes?

        #Wait for export tasks to finish
        while image_task.active():
          print('Polling for image task (state: {}).'.format(ee.data.getTaskStatus(image_task.id)[0].get('state')))
          time.sleep(60)



print("Image Export Finished for Subtask" + str(k)) 
     
#EOF


Tile 302
[[-93.976099, 45.904682], [-93.86498788888889, 45.904682], [-93.86498788888889, 45.79357088888889], [-93.976099, 45.79357088888889], [-93.976099, 45.904682]]
0
Number of Points within AOI (Count):  56

Polling for image task (state: READY).
Polling for image task (state: RUNNING).
Polling for image task (state: RUNNING).
Polling for image task (state: RUNNING).
Polling for image task (state: RUNNING).
1
Number of Points within AOI (Count):  72

Polling for image task (state: READY).
Polling for image task (state: RUNNING).
Polling for image task (state: RUNNING).
Polling for image task (state: RUNNING).
Polling for image task (state: RUNNING).
2
Number of Points within AOI (Count):  42

Polling for image task (state: READY).
Polling for image task (state: RUNNING).
3
Number of Points within AOI (Count):  52

Polling for image task (state: READY).
Polling for image task (state: RUNNING).
Polling for image task (state: RUNNING).
Polling for image task (state: RUNNING).
Polling f

In [None]:

# Examine the output operations of GEE exports.

# name': 'projects/earthengine-legacy/operations/M3YQYF6RHKXXZY3OOX6C3SJA',
#   'metadata': {'@type': 'type.googleapis.com/google.earthengine.v1alpha.OperationMetadata',
#    'state': 'SUCCEEDED',
#    'description': 'Testing Export',
#    'createTime': '2022-09-12T16:24:36.243155Z',
#    'updateTime': '2022-09-12T16:24:59.677571Z',
#    'startTime': '2022-09-12T16:24:53.680785Z',
#    'endTime': '2022-09-12T16:24:59.677571Z',
#    'type': 'EXPORT_FEATURES',
#    'destinationUris': ['https://drive.google.com/#folders/1RtgXcFgprd2vKt81RFymvQhtJOR_r9FH'],
#    'attempt': 1,
#    'progress': 1,
#    'stages': [{'displayName': 'Create Local Files',
#      'completeWorkUnits': 1,
#      'totalWorkUnits': '1',
#      'description': 'Computation and writing of temporary files.'},
#     {'displayName': 'Write Files to Destination',
#      'completeWorkUnits': 1,
#      'totalWorkUnits': '1',
#      'description': 'Uploading of files to the export destination.'}],
#    'batchEecuUsageSeconds': 6.92227840423584},
#   'done': True,
#   'response': {'@type': 'type.googleapis.com/google.protobuf.Empty'}}
    
#print('Polling for training (id: {}).'.format(ee.data.getTaskStatus(training_task.id)[0].get('state')))
#print('Polling for validation (id: {}).'.format(ee.data.getTaskStatus(validation_task.id)[0].get('state')))
#print('Polling for testing (id: {}).'.format(ee.data.getTaskStatus(testing_task.id)[0].get('state')))

ee.data.listOperations()

In [None]:

#Watch status of task array for exporting datasets

def getSlurmOutputs(filepath):
    filepath = '/home/moeller/lakex055/LeafySpurgeDemography/slurmScripts'
    file_name_list = []
    file_output_list = []
    for filename in os.listdir(directory):
        if filename.endswith(".out"): 
            #print(os.path.join(directory, filename))
            file = os.path.join(directory, filename)
            with open(file, "r") as f:
                last_line = f.readlines()[-1:] #read last line from slurm.out file
                file_name_list.append(file)
                file_output_list.append(last_line)
                #print(last_line)
    return file_output_list




directory = '/home/moeller/lakex055/LeafySpurgeDemography/slurmScripts'

while True:
    #File Export States
    file_output_list = getSlurmOutputs(directory)
    
    starting = "cuda"
    starting_count = 0
    starting_files = []
    
    finished = "Data Task Export Finished"
    finished_count = 0
    finished_files = []
    
    ready = "READY"
    ready_count = 0
    ready_files = []
    
    failed = "FAILED"
    failed_count = 0
    failed_files = []
    

    for i in range(len(file_output_list)):
        if finished in file_output_list[i][0]:
            finished_count += 1
        if starting in file_output_list[i][0]:
            starting_count += 1

    #summarize results
    status = {'finished': finished_count, 'starting': starting_count}
    df = pd.DataFrame(data = status, index = [0])
    print(df)
    
    time.sleep(60)
#print(file_output_list[0][0]) #file status case 1
#print(file_output_list[1]) #file status case 2

#print(file_name_list)
#print(file_name_list[1])

#test_case = ['Polling for image task (state: RUNNING).\n', 'Polling for training (id: COMPLETED).\n', 'Polling for validation (id: COMPLETED).\n', 'Polling for testing (id: COMPLETED).\n']


