<a href="https://colab.research.google.com/github/lake-thomas/spurge-temporal-cnn/blob/main/Temporal_CNN_Leafy_Spurge_Generate_Training_Datasets.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Introduction
This is a working Python notebook to implement Google Earth Engine <> TensorFlow for mapping invasive plant species from a time-series of Landsat imagery. In this example, the inputs are invasive species occurrence records from public databases. The model uses 1D-Conv layers in a temporal CNN framework.

In [3]:
# Cloud Authentication 
# Required When Using Default Google Cloud (i.e. Not Using a Hosted VM Runtime Environment)

#Connect to hosted VM https://console.cloud.google.com/marketplace/product/colab-marketplace-image-public/colab?project=pacific-engine-346519

from google.colab import auth
auth.authenticate_user()

In [4]:
# Import, authenticate and initialize the Earth Engine library.
import ee
ee.Authenticate()
ee.Initialize()


To authorize access needed by Earth Engine, open the following URL in a web browser and follow the instructions. If the web browser does not start automatically, please manually browse the URL below.

    https://code.earthengine.google.com/client-auth?scopes=https%3A//www.googleapis.com/auth/earthengine%20https%3A//www.googleapis.com/auth/devstorage.full_control&request_id=Pj-6MhdhQGcMf_JY8iIU60_9lLbNT-9rR7fO6LbFCaQ&tc=mgxjR-UVCgJZlE0Mbhi3Ik7A0WWkza9QC_p9QSeKb_g&cc=YHDXGeJcgDX2oy93m3OpPEAkUfTDHJLeM1s0c2Ch99A

The authorization workflow will generate a code, which you should paste in the box below. 
Enter verification code: 4/1AX4XfWgABNTWzAgJyxiBq1cSHbvcn9QxpbTS-ujljU-bqq9sssFp8bhRlY4

Successfully saved authorization token.


In [5]:
#Mount Google Drive for CSV reading
from google.colab import drive
drive.mount('/content/drive')

#Used to export to google cloud
from google.cloud import storage
import os

# Only need this if you're running on GCE
#os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = r'/content/drive/MyDrive/Colab Notebooks/pacific-engine-346519-8368a64310cd.json'



Mounted at /content/drive


In [None]:
#Ignore Warnings and Errors
!pip install geemap
import geemap #advanced python function for GEE
!pip install geopandas
import geopandas #Pandas library to handle geospatial data
!pip install fsspec
import fsspec # file system specification
!pip install gcsfs
import gcsfs #google cloud file system

In [7]:
import pandas as pd
import numpy as np
import datetime
import pprint
import time
from functools import reduce
from pprint import pprint

In [8]:
# Tensorflow setup.
import tensorflow as tf
print(tf.__version__)


2.8.2


#Load Functions
Functions to process Landsat imagery

In [9]:
# Define a function to transfer feature properties to a dictionary.
def fc_to_dict(fc):
  prop_names = fc.first().propertyNames()
  prop_lists = fc.reduceColumns(
      reducer=ee.Reducer.toList().repeat(prop_names.size()),
      selectors=prop_names).get('list')

  return ee.Dictionary.fromLists(prop_names, prop_lists)


#Cloud Mask: https://gis.stackexchange.com/questions/274048/apply-cloud-mask-to-landsat-imagery-in-google-earth-engine-python-api
def getQABits(image, start, end, mascara): 
    # Compute the bits we need to extract.
    pattern = 0
    for i in range(start,end+1):
        pattern += 2**i
    # Return a single band image of the extracted QA bits, giving the     band a new name.
    return image.select([0], [mascara]).bitwiseAnd(pattern).rightShift(start)


#Saturated band Mask: https://gis.stackexchange.com/questions/363929/how-to-apply-a-bitmask-for-radiometric-saturation-qa-in-a-image-collection-eart
def bitwiseExtract(value, fromBit, toBit):
  maskSize = ee.Number(1).add(toBit).subtract(fromBit)
  mask = ee.Number(1).leftShift(maskSize).subtract(1)
  return value.rightShift(fromBit).bitwiseAnd(mask)


#Function to mask out cloudy and saturated pixels and harmonize between Landsat 5/7/8 imagery 
def maskQuality(image):
    # Select the QA band.
    QA = image.select('QA_PIXEL')
    # Get the internal_cloud_algorithm_flag bit.
    sombra = getQABits(QA,3,3,'cloud_shadow')
    nubes = getQABits(QA,5,5,'cloud')
    #  var cloud_confidence = getQABits(QA,6,7,  'cloud_confidence')
    cirrus_detected = getQABits(QA,9,9,'cirrus_detected')
    #var cirrus_detected2 = getQABits(QA,8,8,  'cirrus_detected2')
    #Return an image masking out cloudy areas.
    QA_radsat = image.select('QA_RADSAT')
    saturated = bitwiseExtract(QA_radsat, 1, 7)

    #Apply the scaling factors to the appropriate bands.
    def getFactorImg(factorNames):
      factorList = image.toDictionary().select(factorNames).values()
      return ee.Image.constant(factorList)

    scaleImg = getFactorImg(['REFLECTANCE_MULT_BAND_.|TEMPERATURE_MULT_BAND_ST_B10'])

    offsetImg = getFactorImg(['REFLECTANCE_ADD_BAND_.|TEMPERATURE_ADD_BAND_ST_B10'])
    
    scaled = image.select('SR_B.|ST_B10').multiply(scaleImg).add(offsetImg)

    #Replace original bands with scaled bands and apply masks.
    return image.addBands(scaled, None, True).updateMask(sombra.eq(0)).updateMask(nubes.eq(0).updateMask(cirrus_detected.eq(0).updateMask(saturated.eq(0))))


# Selects and renames bands of interest for Landsat OLI.
def renameOli(img):
  return img.select(
    ['SR_B2', 'SR_B3', 'SR_B4', 'SR_B5', 'SR_B6', 'SR_B7', 'QA_PIXEL', 'QA_RADSAT'],
    ['Blue', 'Green', 'Red', 'NIR', 'SWIR1', 'SWIR2', 'QA_PIXEL', 'QA_RADSAT'])


# Selects and renames bands of interest for TM/ETM+.
def renameEtm(img):
  return img.select(
    ['SR_B1', 'SR_B2', 'SR_B3', 'SR_B4', 'SR_B5', 'SR_B7', 'QA_PIXEL', 'QA_RADSAT'],
    ['Blue', 'Green', 'Red', 'NIR', 'SWIR1', 'SWIR2', 'QA_PIXEL', 'QA_RADSAT'])


# Adding a NDVI band
def addNDVI(image):
  ndvi = image.normalizedDifference(['NIR', 'Red']).rename('NDVI')
  return image.addBands([ndvi])


def mapDates(image):
  date = ee.Date(image.get('system:time_start')).format("YYYY-MM-dd")
  return image.addBands([date])

# Prepares (renames) OLI images.
def prepOli(img):
  img = renameOli(img)
  return img


# Prepares (renames) TM/ETM+ images.
def prepEtm(img):
  orig = img
  img = renameEtm(img)
  return ee.Image(img.copyProperties(orig, orig.propertyNames()))


# Selects and renames bands of interest for TM/ETM+.
def renameImageBands_TM(img, year, season):
  return img.select(
      ['Blue_median', 'Green_median', 'Red_median', 'NIR_median', 
       'SWIR1_median', 'SWIR2_median', 'NDVI_median'],
      ['Blue'+str(season)+str(year), 'Green'+str(season)+str(year), 'Red'+str(season)+str(year), 'NIR'+str(season)+str(year),
       'SWIR1'+str(season)+str(year), 'SWIR2'+str(season)+str(year), 'NDVI'+str(season)+str(year)])

# Selects and renames bands of interest for TM/ETM+.
def renameImageBands_ETMOLI(img, year, season):
  return img.select(
      ['Blue_median_median', 'Green_median_median', 'Red_median_median', 'NIR_median_median', 
       'SWIR1_median_median', 'SWIR2_median_median', 'NDVI_median_median'],
      ['Blue'+str(season)+str(year), 'Green'+str(season)+str(year), 'Red'+str(season)+str(year), 'NIR'+str(season)+str(year),
       'SWIR1'+str(season)+str(year), 'SWIR2'+str(season)+str(year), 'NDVI'+str(season)+str(year)])


def getLandsatMosaicFromPoints(year, points):
  '''
  #Time-series extraction developed from
  #https://developers.google.com/earth-engine/tutorials/community/time-series-visualization-with-altair#combine_dataframes  

  '''

  #if Year is between 1985 and 1999 use Landsat 5 TM imagery
  if 1985 <= year <= 1999:

    tmColMarchApril = ee.ImageCollection('LANDSAT/LT05/C02/T1_L2') \
      .filterDate('{}-03-01'.format(year), '{}-04-30'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepEtm) \
      .map(addNDVI) \
      .reduce('median')

    tmColMarchApril = renameImageBands_TM(tmColMarchApril, year, 'MarchApril')

    tmColMayJune = ee.ImageCollection('LANDSAT/LT05/C02/T1_L2') \
      .filterDate('{}-05-01'.format(year), '{}-06-30'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepEtm) \
      .map(addNDVI) \
      .reduce('median')

    tmColMayJune = renameImageBands_TM(tmColMayJune, year, 'MayJune')

    tmColJulyAug = ee.ImageCollection('LANDSAT/LT05/C02/T1_L2') \
      .filterDate('{}-07-01'.format(year), '{}-08-31'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepEtm) \
      .map(addNDVI) \
      .reduce('median')

    tmColJulyAug = renameImageBands_TM(tmColJulyAug, year, 'JulyAug')

    landsat5ImageCol = [tmColMarchApril, tmColMayJune, tmColJulyAug]
    return landsat5ImageCol

  #if Year is between 2000 and 2012 use mosaic from Landsat 5 TM and Landsat 7 ETM imagery
  elif 2000 <= year <= 2012:

    etmColMarchApril = ee.ImageCollection('LANDSAT/LE07/C02/T1_L2') \
      .filterDate('{}-03-01'.format(year), '{}-04-30'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepEtm) \
      .map(addNDVI) \
      .reduce('median')

    tmColMarchApril = ee.ImageCollection('LANDSAT/LT05/C02/T1_L2') \
      .filterDate('{}-03-01'.format(year), '{}-04-30'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepEtm) \
      .map(addNDVI) \
      .reduce('median')

    MarchApril = ee.ImageCollection([etmColMarchApril, tmColMarchApril])

    etmColMarchApril = MarchApril.reduce('median')

    etmColMarchApril = renameImageBands_ETMOLI(etmColMarchApril, year, 'MarchApril')

    etmColMayJune = ee.ImageCollection('LANDSAT/LE07/C02/T1_L2') \
      .filterDate('{}-05-01'.format(year), '{}-06-30'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepEtm) \
      .map(addNDVI) \
      .reduce('median')

    tmColMayJune = ee.ImageCollection('LANDSAT/LT05/C02/T1_L2') \
      .filterDate('{}-05-01'.format(year), '{}-06-30'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepEtm) \
      .map(addNDVI) \
      .reduce('median')

    MayJune = ee.ImageCollection([etmColMayJune, tmColMayJune])

    etmColMayJune = MayJune.reduce('median')

    etmColMayJune = renameImageBands_ETMOLI(etmColMayJune, year, 'MayJune')

    etmColJulyAug = ee.ImageCollection('LANDSAT/LE07/C02/T1_L2') \
      .filterDate('{}-07-01'.format(year), '{}-08-31'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepEtm) \
      .map(addNDVI) \
      .reduce('median')

    tmColJulyAug = ee.ImageCollection('LANDSAT/LT05/C02/T1_L2') \
      .filterDate('{}-07-01'.format(year), '{}-08-31'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepEtm) \
      .map(addNDVI) \
      .reduce('median')

    JulyAug = ee.ImageCollection([etmColJulyAug, tmColJulyAug])

    etmColJulyAug = JulyAug.reduce('median')

    etmColJulyAug = renameImageBands_ETMOLI(etmColJulyAug, year, 'JulyAug')

    landsat5_7ImageCol = [etmColMarchApril, etmColMayJune, etmColJulyAug]
    return landsat5_7ImageCol

  #if Year is between 2013 and 2020 use mosaic from Landsat 7 ETM and Landsat 8 OLI imagery
  elif 2013 <= year <= 2020:

    etmColMarchApril = ee.ImageCollection('LANDSAT/LE07/C02/T1_L2') \
      .filterDate('{}-03-01'.format(year), '{}-04-30'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepEtm) \
      .map(addNDVI) \
      .reduce('median')

    oliColMarchApril = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2') \
      .filterDate('{}-03-01'.format(year), '{}-04-30'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepOli) \
      .map(addNDVI) \
      .reduce('median')

    MarchApril = ee.ImageCollection([etmColMarchApril, oliColMarchApril])

    etmColMarchApril = MarchApril.reduce('median')

    etmColMarchApril = renameImageBands_ETMOLI(etmColMarchApril, year, 'MarchApril')

    etmColMayJune = ee.ImageCollection('LANDSAT/LE07/C02/T1_L2') \
      .filterDate('{}-05-01'.format(year), '{}-06-30'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepEtm) \
      .map(addNDVI) \
      .reduce('median')

    oliColMayJune = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2') \
      .filterDate('{}-05-01'.format(year), '{}-06-30'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepOli) \
      .map(addNDVI) \
      .reduce('median')

    MayJune = ee.ImageCollection([etmColMayJune, oliColMayJune])

    etmColMayJune = MayJune.reduce('median')

    etmColMayJune = renameImageBands_ETMOLI(etmColMayJune, year, 'MayJune')

    etmColJulyAug = ee.ImageCollection('LANDSAT/LE07/C02/T1_L2') \
      .filterDate('{}-07-01'.format(year), '{}-08-31'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepEtm) \
      .map(addNDVI) \
      .reduce('median') \

    oliColJulyAug = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2') \
      .filterDate('{}-07-01'.format(year), '{}-08-31'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepOli) \
      .map(addNDVI) \
      .reduce('median')

    JulyAug = ee.ImageCollection([etmColJulyAug, oliColJulyAug])

    etmColJulyAug = JulyAug.reduce('median')

    etmColJulyAug = renameImageBands_ETMOLI(etmColJulyAug, year, 'JulyAug')

    landsat7_8ImageCol = [etmColMarchApril, etmColMayJune, etmColJulyAug]

    return landsat7_8ImageCol



def sampleImagestoDataFrame(listofEEImages):
    '''
    Function takes in a list of three images from a Landsat imagery year (T1, T2, T3)
    Returns a merged pandas dataframe of dimensions (rows/samples x bands) ordered from t-1, t, t+1
    '''
    image1 = listofEEImages[0]
    image2 = listofEEImages[1]
    image3 = listofEEImages[2]

    image1_fc = image1.sampleRegions(collection=newpts, properties=['class'], scale=30)
    image2_fc = image2.sampleRegions(collection=newpts, properties=['class'], scale=30)
    image3_fc = image3.sampleRegions(collection=newpts, properties=['class'], scale=30)

    image1_db_dict = fc_to_dict(image1_fc).getInfo()
    image2_db_dict = fc_to_dict(image2_fc).getInfo()
    image3_db_dict = fc_to_dict(image3_fc).getInfo()

    image1_df = pd.DataFrame(image1_db_dict)
    image2_df = pd.DataFrame(image2_db_dict)
    image3_df = pd.DataFrame(image3_db_dict)

    data_frames = [image1_df, image2_df, image3_df]

    df_merged = reduce(lambda left,right: pd.merge(left, right, on='system:index', how='outer'), data_frames).fillna(np.nan)

    df_merged_dropna = df_merged.dropna(axis=0, how = 'any')

    return df_merged_dropna



#Create Bounding Box Moving Windows Across Study Region

In [10]:


#Generate Bounding Box Coordinate List for Study Region ###
#Starting position of bounding box
XY_topLeft = [-116.976099, 48.904682]
XY_topRight = [-115.976099, 48.904682]
XY_bottomLeft = [-116.976099, 47.904682]
XY_bottomRight = [-115.976099, 47.904682]

lon_range = 31 #study area spans 31 deg lon
lat_range = 12 #study area spans 12 deg lat

stepSize = 1 #step by 1 degree of long/latitude


def sliding_window(longitude_range, latitude_range, stepSize_box):
    lon_list = []
    lat_list = []
    for lon in range(0, longitude_range, stepSize_box):
      for lat in range(0, latitude_range,stepSize_box):
        lon_list.append(lon)
        lat_list.append(lat)
    
    return(lon_list, lat_list)

def bbox(longitude_range, latitude_range, stepSize_box, topLeft_coord, topRight_coord, bottomLeft_coord, bottomRight_coord):
  #Creates a sliding window across the lat/long range
  #Returns a list of all lat/long boxes to sample 
     
  lon_list, lat_list = sliding_window(longitude_range, latitude_range, stepSize_box) #Generates two lists: one of longitude[0-31] and one of latitude [0-12] defining study region

  #for w in range(len(windows[0])):
  #  w_lon = windows[0][w]
  #  w_lat = windows[1][w]
  #  #print(w_lon, w_lat)

  #Top Left Coordinates for BBox
  lon_list_X_topLeft = [x + topLeft_coord[0] for x in lon_list]
  lat_list_Y_topLeft = [abs(x - topLeft_coord[1]) for x in lat_list]
  XY_topLeft_list = list(zip(lon_list_X_topLeft, lat_list_Y_topLeft))

  #Bottom Left Coordinates for BBox
  lon_list_X_bottomLeft = [x + bottomLeft_coord[0] for x in lon_list]
  lat_list_Y_bottomLeft = [abs(x - bottomLeft_coord[1]) for x in lat_list]
  XY_bottomLeft_list = list(zip(lon_list_X_bottomLeft, lat_list_Y_bottomLeft))

  #Top Right Coordinates for BBox
  lon_list_X_topRight = [x + topRight_coord[0] for x in lon_list]
  lat_list_Y_topRight = [abs(x - topRight_coord[1]) for x in lat_list]
  XY_topRight_list = list(zip(lon_list_X_topRight, lat_list_Y_topRight))

  #Bottom Right Coordinates for BBox
  lon_list_X_bottomRight = [x + bottomRight_coord[0] for x in lon_list]
  lat_list_Y_bottomRight = [abs(x - bottomRight_coord[1]) for x in lat_list]
  XY_bottomRight_list = list(zip(lon_list_X_bottomRight, lat_list_Y_bottomRight))

  ### Bounding Box Coordinate List
  bbox = list(zip(XY_topLeft_list, XY_bottomLeft_list, XY_topRight_list, XY_bottomRight_list))

  return bbox


bbox_windows = bbox(lon_range, lat_range, stepSize, XY_topLeft, XY_topRight, XY_bottomLeft, XY_bottomRight)


# Define Variables

This is a set of global variables used throughout the notebook. You must have a Google Cloud Storage bucket into which you can write files. You'll also need to specify your Earth Engine username i.e. users/USER_NAME.

In [11]:
# Define export for feature class assets
OUTPUT_BUCKET = 'landcover_samples_nlcd2019_tfrecord_june2022'

TRAIN_FILE_PREFIX = 'Training_nlcd2019'
TEST_FILE_PREFIX = 'Testing_nlcd2019'
file_extension = '.tfrecord.gz'
TRAIN_FILE_PATH = 'gs://' + OUTPUT_BUCKET + '/' + TRAIN_FILE_PREFIX + file_extension
TEST_FILE_PATH = 'gs://' + OUTPUT_BUCKET + '/' + TEST_FILE_PREFIX + file_extension
USER_NAME = 'lakex055'

# File name for the prediction (image) dataset.  The trained model will read
# this dataset and make predictions in each pixel.
IMAGE_FILE_PREFIX = 'spurge_temporalcnn_demo_'

# The output path for the classified image (i.e. predictions) TFRecord file.
OUTPUT_IMAGE_FILE = 'gs://' + OUTPUT_BUCKET + '/spurge_temporalcnndemo.TFRecord'

# The name of the Earth Engine asset to be created by importing
# the classified image from the TFRecord file in Cloud Storage.
OUTPUT_ASSET_ID = 'users/' + USER_NAME + '/spurge_temporalcnndemo'

# Make sure the bucket exists.
print('Found Cloud Storage bucket.' if tf.io.gfile.exists('gs://' + OUTPUT_BUCKET) 
  else 'Output Cloud Storage bucket does not exist.')


BANDS = ['0_BlueMarchApril2018',
 '0_GreenMarchApril2018',
 '0_RedMarchApril2018',
 '0_NIRMarchApril2018',
 '0_SWIR1MarchApril2018',
 '0_SWIR2MarchApril2018',
 '0_NDVIMarchApril2018',
 '0_BlueMayJune2018',
 '0_GreenMayJune2018',
 '0_RedMayJune2018',
 '0_NIRMayJune2018',
 '0_SWIR1MayJune2018',
 '0_SWIR2MayJune2018',
 '0_NDVIMayJune2018',
 '0_BlueJulyAug2018',
 '0_GreenJulyAug2018',
 '0_RedJulyAug2018',
 '0_NIRJulyAug2018',
 '0_SWIR1JulyAug2018',
 '0_SWIR2JulyAug2018',
 '0_NDVIJulyAug2018',
 '1_BlueMarchApril2019',
 '1_GreenMarchApril2019',
 '1_RedMarchApril2019',
 '1_NIRMarchApril2019',
 '1_SWIR1MarchApril2019',
 '1_SWIR2MarchApril2019',
 '1_NDVIMarchApril2019',
 '1_BlueMayJune2019',
 '1_GreenMayJune2019',
 '1_RedMayJune2019',
 '1_NIRMayJune2019',
 '1_SWIR1MayJune2019',
 '1_SWIR2MayJune2019',
 '1_NDVIMayJune2019',
 '1_BlueJulyAug2019',
 '1_GreenJulyAug2019',
 '1_RedJulyAug2019',
 '1_NIRJulyAug2019',
 '1_SWIR1JulyAug2019',
 '1_SWIR2JulyAug2019',
 '1_NDVIJulyAug2019',
 '2_BlueMarchApril2020',
 '2_GreenMarchApril2020',
 '2_RedMarchApril2020',
 '2_NIRMarchApril2020',
 '2_SWIR1MarchApril2020',
 '2_SWIR2MarchApril2020',
 '2_NDVIMarchApril2020',
 '2_BlueMayJune2020',
 '2_GreenMayJune2020',
 '2_RedMayJune2020',
 '2_NIRMayJune2020',
 '2_SWIR1MayJune2020',
 '2_SWIR2MayJune2020',
 '2_NDVIMayJune2020',
 '2_BlueJulyAug2020',
 '2_GreenJulyAug2020',
 '2_RedJulyAug2020',
 '2_NIRJulyAug2020',
 '2_SWIR1JulyAug2020',
 '2_SWIR2JulyAug2020',
 '2_NDVIJulyAug2020']

LABEL = 'class'

# Number of label values, i.e. number of classes in the classification.
N_CLASSES = 10

# These names are used to specify properties in the export of
# training/testing data and to define the mapping between names and data
# when reading into TensorFlow datasets.
FEATURE_NAMES = list(BANDS)
FEATURE_NAMES.append(LABEL)


Found Cloud Storage bucket.


# Basic Workflow to Generate Training Datasets

Iteratively generate bounding box arcross study area. Within each bounding box, extract points with labeled land cover values (including leafy spurge) and Landsat imagery. 

There are several limitations on the size and shape of Earth Engine table assets:

Maximum of 100 million features

Maximum of 1000 properties (columns)

Maximum of 100,000 vertices for each row's geometry

Maximum of 100,000 characters per string value


In [41]:

#define years to sample data (corresponds to satellite image year)
years = [2018, 2019, 2020]

#Training points for leafy spurge & land cover classes (defines extent of landsat imagery)

#Load 1m training points sampled from 2019 NLCD and leafy spurge from 2018-2019-2020
pts = ee.FeatureCollection('projects/pacific-engine-346519/assets/spurge_landcover_nlcd2019_onemillionpoints_april2022')

#Moving Bounding Box Loop to Generate Sample Points
for i in range(0, 2):
#for i in range(3): testing only
    
  # Define Bounding Box
  bbox = bbox_windows[i]
  print(bbox)

    # Filter points based on AOI
  aoi = ee.Geometry.Polygon(bbox)

  #Apply Filter
  newpts = pts.filterBounds(aoi)

  #How many points?
  count = newpts.size() #returns an EE.Number object that we need to convert to an interger
  num_points = int(count.getInfo())
  print('Number of Points within AOI (Count): ', str(count.getInfo())+'\n')

  if num_points > 0:

    # Sample imagery in a year filtered by input points
    # Output is a list of length 3 EEimages, corresponding to three seasons in a year (e.g 2018: MarchApril, MayJune, JulyAug)
    LandsatCol_year0 = getLandsatMosaicFromPoints(years[0], newpts)

    LandsatCol_year1 = getLandsatMosaicFromPoints(years[1], newpts)
    
    LandsatCol_year2 = getLandsatMosaicFromPoints(years[2], newpts)

    LandsatCol_timeseries = ee.ImageCollection([LandsatCol_year0, LandsatCol_year1, LandsatCol_year2])
    
    LandsatCol_timeseries_image = LandsatCol_timeseries.toBands()
    #LandsatCol_timeseries_image.bandNames().getInfo()

    # Sample the image at the points and add a random column.
    sample = LandsatCol_timeseries_image.sampleRegions(collection=newpts, properties=['class'], scale=30).randomColumn()

    # Partition the sample approximately 70-30.
    training = sample.filter(ee.Filter.lt('random', 0.2)) #lt 0.7
    testing = sample.filter(ee.Filter.gt('random', 0.8)) #gte 0.3

    count = training.size() #returns an EE.Number object that we need to convert to an interger
    num_points = int(count.getInfo())
    print('Number of Points within AOI (Count): ', str(count.getInfo())+'\n')

    #from pprint import pprint

    # Print the first couple points to verify.
    #pprint({'training': training.first().getInfo()})
    #pprint({'testing': testing.first().getInfo()})

        
    # Create the tasks.
    training_task = ee.batch.Export.table.toCloudStorage(
      collection=training,
      description='Training Export',
      fileNamePrefix=TRAIN_FILE_PREFIX + "_" + str(i),
      bucket=OUTPUT_BUCKET,
      fileFormat='TFRecord',
      selectors=FEATURE_NAMES)

    testing_task = ee.batch.Export.table.toCloudStorage(
      collection=testing,
      description='Testing Export',
      fileNamePrefix=TEST_FILE_PREFIX + "_" + str(i),
      bucket=OUTPUT_BUCKET,
      fileFormat='TFRecord',
      selectors=FEATURE_NAMES)

    # Start the tasks.
    training_task.start()
    testing_task.start()

    # Export imagery in this region.
    EXPORT_REGION = aoi

    # Specify patch and file dimensions.
    image_export_options = {
      'patchDimensions': [512, 512],
      'maxFileSize': 104857600,
      'compressed': True
    }

    # Setup the task.
    image_task = ee.batch.Export.image.toCloudStorage(
      image=LandsatCol_timeseries_image,
      description='Image Export',
      fileNamePrefix=IMAGE_FILE_PREFIX + "_" + str(i),
      bucket=OUTPUT_BUCKET,
      scale=30,
      fileFormat='TFRecord',
      region=EXPORT_REGION.toGeoJSON()['coordinates'],
      formatOptions=image_export_options,
    )

    # Start the task.
    image_task.start()

    #Wait for export tasks to finish

    while image_task.active():
      print('Polling for image task (state: {}).'.format(ee.data.getTaskStatus(image_task.id)[0].get('state')))
      print('Polling for training (id: {}).'.format(ee.data.getTaskStatus(training_task.id)[0].get('state')))
      print('Polling for testing (id: {}).'.format(ee.data.getTaskStatus(testing_task.id)[0].get('state')))
      time.sleep(15)





 

  


Polling for image task (state: RUNNING).
Polling for training (id: COMPLETED).
Polling for testing (id: COMPLETED).
Polling for image task (state: RUNNING).
Polling for training (id: COMPLETED).
Polling for testing (id: COMPLETED).
Polling for image task (state: RUNNING).
Polling for training (id: COMPLETED).
Polling for testing (id: COMPLETED).
Polling for image task (state: RUNNING).
Polling for training (id: COMPLETED).
Polling for testing (id: COMPLETED).
Polling for image task (state: RUNNING).
Polling for training (id: COMPLETED).
Polling for testing (id: COMPLETED).
Polling for image task (state: RUNNING).
Polling for training (id: COMPLETED).
Polling for testing (id: COMPLETED).
Polling for image task (state: RUNNING).
Polling for training (id: COMPLETED).
Polling for testing (id: COMPLETED).
Polling for image task (state: RUNNING).
Polling for training (id: COMPLETED).
Polling for testing (id: COMPLETED).
Polling for image task (state: RUNNING).
Polling for training (id: COMPL

# Data preparation and pre-processing

Read data from the TFRecord file into a `tf.data.Dataset`.  Pre-process the dataset to get it into a suitable format for input to the model.

## Read into a `tf.data.Dataset`

Here we are going to read a file in Cloud Storage into a `tf.data.Dataset`.  ([these TensorFlow docs](https://www.tensorflow.org/guide/data) explain more about reading data into a `Dataset`).  Check that you can read examples from the file.  The purpose here is to ensure that we can read from the file without an error.  The actual content is not necessarily human readable.


In [119]:
# Create a dataset from the TFRecord file in Cloud Storage.
train_dataset = tf.data.TFRecordDataset(TRAIN_FILE_PATH, compression_type='GZIP')
# Print the first record to check.
print(iter(train_dataset).next())

tf.Tensor(b'\n\xfc\x0f\n \n\x140_BlueMarchApril2018\x12\x08\x12\x06\n\x04\x15W\x05=\n!\n\x150_GreenMarchApril2018\x12\x08\x12\x06\n\x04_$4=\n\x1f\n\x130_RedMarchApril2018\x12\x08\x12\x06\n\x04\t\x16\x07=\n\x1f\n\x130_NIRMarchApril2018\x12\x08\x12\x06\n\x04j\xa4\xa5<\n!\n\x150_SWIR1MarchApril2018\x12\x08\x12\x06\n\x04\xc5\xac\x97<\n!\n\x150_SWIR2MarchApril2018\x12\x08\x12\x06\n\x04\xc9v~<\n \n\x140_NDVIMarchApril2018\x12\x08\x12\x06\n\x04c\xf7`\xbe\n\x1d\n\x110_BlueMayJune2018\x12\x08\x12\x06\n\x04j\xfb\xf7<\n\x1e\n\x120_GreenMayJune2018\x12\x08\x12\x06\n\x04\xb4<?=\n\x1c\n\x100_RedMayJune2018\x12\x08\x12\x06\n\x04\xff\x95\x1d=\n\x1c\n\x100_NIRMayJune2018\x12\x08\x12\x06\n\x04\x83\xc0\xda<\n\x1e\n\x120_SWIR1MayJune2018\x12\x08\x12\x06\n\x04=D\xc3<\n\x1e\n\x120_SWIR2MayJune2018\x12\x08\x12\x06\n\x04\xeew\x88<\n\x1d\n\x110_NDVIMayJune2018\x12\x08\x12\x06\n\x04YR+\xbe\n\x1d\n\x110_BlueJulyAug2018\x12\x08\x12\x06\n\x04^\x11\x14=\n\x1e\n\x120_GreenJulyAug2018\x12\x08\x12\x06\n\x04\xd5x!=\n\x

## Define the structure of your data

For parsing the exported TFRecord files, `featuresDict` is a mapping between feature names (recall that `featureNames` contains the band and label names) and `float32` [`tf.io.FixedLenFeature`](https://www.tensorflow.org/api_docs/python/tf/io/FixedLenFeature) objects.  This mapping is necessary for telling TensorFlow how to read data in a TFRecord file into tensors.  Specifically, **all numeric data exported from Earth Engine is exported as `float32`**.

(Note: *features* in the TensorFlow context (i.e. [`tf.train.Feature`](https://www.tensorflow.org/api_docs/python/tf/train/Feature)) are not to be confused with Earth Engine features (i.e. [`ee.Feature`](https://developers.google.com/earth-engine/api_docs#eefeature)), where the former is a protocol message type for serialized data input to the model and the latter is a geometry-based geographic data structure.)

In [121]:
# List of fixed-length features, all of which are float32.
columns = [
  tf.io.FixedLenFeature(shape=[1], dtype=tf.float32) for k in FEATURE_NAMES
]

# Dictionary with names as keys, features as values.
features_dict = dict(zip(FEATURE_NAMES, columns))

pprint(features_dict)

{'0_BlueJulyAug2018': FixedLenFeature(shape=[1], dtype=tf.float32, default_value=None),
 '0_BlueMarchApril2018': FixedLenFeature(shape=[1], dtype=tf.float32, default_value=None),
 '0_BlueMayJune2018': FixedLenFeature(shape=[1], dtype=tf.float32, default_value=None),
 '0_GreenJulyAug2018': FixedLenFeature(shape=[1], dtype=tf.float32, default_value=None),
 '0_GreenMarchApril2018': FixedLenFeature(shape=[1], dtype=tf.float32, default_value=None),
 '0_GreenMayJune2018': FixedLenFeature(shape=[1], dtype=tf.float32, default_value=None),
 '0_NDVIJulyAug2018': FixedLenFeature(shape=[1], dtype=tf.float32, default_value=None),
 '0_NDVIMarchApril2018': FixedLenFeature(shape=[1], dtype=tf.float32, default_value=None),
 '0_NDVIMayJune2018': FixedLenFeature(shape=[1], dtype=tf.float32, default_value=None),
 '0_NIRJulyAug2018': FixedLenFeature(shape=[1], dtype=tf.float32, default_value=None),
 '0_NIRMarchApril2018': FixedLenFeature(shape=[1], dtype=tf.float32, default_value=None),
 '0_NIRMayJune2018'

## Parse the dataset

Now we need to make a parsing function for the data in the TFRecord files.  The data comes in flattened 2D arrays per record and we want to use the first part of the array for input to the model and the last element of the array as the class label.  The parsing function reads data from a serialized [`Example` proto](https://www.tensorflow.org/api_docs/python/tf/train/Example) into a dictionary in which the keys are the feature names and the values are the tensors storing the value of the features for that example.  ([These TensorFlow docs](https://www.tensorflow.org/tutorials/load_data/tfrecord) explain more about reading `Example` protos from TFRecord files).

Note that each record of the parsed dataset contains a tuple.  The first element of the tuple is a dictionary with bands for keys and the numeric value of the bands for values.  The second element of the tuple is a class label.

In [122]:
def parse_tfrecord(example_proto):
  """The parsing function.

  Read a serialized example into the structure defined by featuresDict.

  Args:
    example_proto: a serialized Example.

  Returns:
    A tuple of the predictors dictionary and the label, cast to an `int32`.
  """
  parsed_features = tf.io.parse_single_example(example_proto, features_dict)
  labels = parsed_features.pop(LABEL)
  return parsed_features, tf.cast(labels, tf.int32)

# Map the function over the dataset.
parsed_dataset = train_dataset.map(parse_tfrecord, num_parallel_calls=5)

# Print the first parsed record to check.
pprint(iter(parsed_dataset).next())

({'0_BlueJulyAug2018': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.03614938], dtype=float32)>,
  '0_BlueMarchApril2018': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.03255375], dtype=float32)>,
  '0_BlueMayJune2018': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.03027125], dtype=float32)>,
  '0_GreenJulyAug2018': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.03942188], dtype=float32)>,
  '0_GreenMarchApril2018': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.04398], dtype=float32)>,
  '0_GreenMayJune2018': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.04668875], dtype=float32)>,
  '0_NDVIJulyAug2018': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.06392911], dtype=float32)>,
  '0_NDVIMarchApril2018': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([-0.2196937], dtype=float32)>,
  '0_NDVIMayJune2018': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([-0.16730632], dtype=float32)>,
  '0_NIRJulyAug2018': <tf.Tensor: shape=(1,),

## Create the Keras model

Before we create the model, there's still a wee bit of pre-processing to get the data into the right input shape and a format that can be used with cross-entropy loss.  Specifically, Keras expects a list of inputs and a one-hot vector for the class. (See [the Keras loss function docs](https://keras.io/losses/), [the TensorFlow categorical identity docs](https://www.tensorflow.org/guide/feature_columns#categorical_identity_column) and [the `tf.one_hot` docs](https://www.tensorflow.org/api_docs/python/tf/one_hot) for details).  

Here we will use a simple neural network model with a 64 node hidden layer, a dropout layer and an output layer.  Once the dataset has been prepared, define the model, compile it, fit it to the training data.  See [the Keras `Sequential` model guide](https://keras.io/getting-started/sequential-model-guide/) for more details.

In [124]:
from tensorflow import keras

# Keras requires inputs as a tuple.  Note that the inputs must be in the
# right shape.  Also note that to use the categorical_crossentropy loss,
# the label needs to be turned into a one-hot vector.
def to_tuple(inputs, label):
  return (tf.transpose(list(inputs.values())),
          tf.one_hot(indices=label, depth=N_CLASSES))

# Map the to_tuple function, shuffle and batch.
input_dataset = parsed_dataset.map(to_tuple).batch(8)

# Define the layers in the model.
model = tf.keras.models.Sequential([
  tf.keras.layers.Dense(64, activation=tf.nn.relu),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(N_CLASSES, activation=tf.nn.softmax)
])

# Compile the model with the specified loss function.
model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Fit the model to the training data.
model.fit(x=input_dataset, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f0a4cf81510>

## Check model accuracy on the test set

Now that we have a trained model, we can evaluate it using the test dataset.  To do that, read and prepare the test dataset in the same way as the training dataset.  Here we specify a batch size of 1 so that each example in the test set is used exactly once to compute model accuracy.  For model steps, just specify a number larger than the test dataset size (ignore the warning).

In [125]:
test_dataset = (
  tf.data.TFRecordDataset(TEST_FILE_PATH, compression_type='GZIP')
    .map(parse_tfrecord, num_parallel_calls=5)
    .map(to_tuple)
    .batch(1))

model.evaluate(test_dataset)



[1.2486305236816406, 0.5583405494689941]

# Use the trained model to classify an image from Earth Engine

Now it's time to classify the image that was exported from Earth Engine.  If the exported image is large, it will be split into multiple TFRecord files in its destination folder.  There will also be a JSON sidecar file called "the mixer" that describes the format and georeferencing of the image.  Here we will find the image files and the mixer file, getting some info out of the mixer that will be useful during model inference.

## Find the image files and JSON mixer file in Cloud Storage

Use `gsutil` to locate the files of interest in the output Cloud Storage bucket.  Check to make sure your image export task finished before running the following.

In [127]:
# Get a list of all the files in the output bucket.
files_list = !gsutil ls 'gs://'{OUTPUT_BUCKET}
# Get only the files generated by the image export.
exported_files_list = [s for s in files_list if IMAGE_FILE_PREFIX in s]

# Get the list of image files and the JSON mixer file.
image_files_list = []
json_file = None
for f in exported_files_list:
  if f.endswith('.tfrecord.gz'):
    image_files_list.append(f)
  elif f.endswith('.json'):
    json_file = f

# Make sure the files are in the right order.
image_files_list.sort()

pprint(image_files_list)
print(json_file)

['gs://landcover_samples_nlcd2019_tfrecord_june2022/spurge_temporalcnn_demo_00000.tfrecord.gz',
 'gs://landcover_samples_nlcd2019_tfrecord_june2022/spurge_temporalcnn_demo_00001.tfrecord.gz',
 'gs://landcover_samples_nlcd2019_tfrecord_june2022/spurge_temporalcnn_demo_00002.tfrecord.gz',
 'gs://landcover_samples_nlcd2019_tfrecord_june2022/spurge_temporalcnn_demo_00003.tfrecord.gz',
 'gs://landcover_samples_nlcd2019_tfrecord_june2022/spurge_temporalcnn_demo_00004.tfrecord.gz',
 'gs://landcover_samples_nlcd2019_tfrecord_june2022/spurge_temporalcnn_demo_00005.tfrecord.gz',
 'gs://landcover_samples_nlcd2019_tfrecord_june2022/spurge_temporalcnn_demo_00006.tfrecord.gz',
 'gs://landcover_samples_nlcd2019_tfrecord_june2022/spurge_temporalcnn_demo_00007.tfrecord.gz',
 'gs://landcover_samples_nlcd2019_tfrecord_june2022/spurge_temporalcnn_demo_00008.tfrecord.gz',
 'gs://landcover_samples_nlcd2019_tfrecord_june2022/spurge_temporalcnn_demo_00009.tfrecord.gz',
 'gs://landcover_samples_nlcd2019_tfreco

## Read the JSON mixer file

The mixer contains metadata and georeferencing information for the exported patches, each of which is in a different file.  Read the mixer to get some information needed for prediction.

In [128]:
import json

# Load the contents of the mixer file to a JSON object.
json_text = !gsutil cat {json_file}
# Get a single string w/ newlines from the IPython.utils.text.SList
mixer = json.loads(json_text.nlstr)
pprint(mixer)

{'patchDimensions': [512, 512],
 'patchesPerRow': 7,
 'projection': {'affine': {'doubleMatrix': [0.00026949458523585647,
                                            0.0,
                                            -116.97628067830539,
                                            0.0,
                                            -0.00026949458523585647,
                                            47.904818482355374]},
                'crs': 'EPSG:4326'},
 'totalPatches': 49}


## Read the image files into a dataset

You can feed the list of files (`imageFilesList`) directly to the `TFRecordDataset` constructor to make a combined dataset on which to perform inference.  The input needs to be preprocessed differently than the training and testing.  Mainly, this is because the pixels are written into records as patches, we need to read the patches in as one big tensor (one patch for each band), then flatten them into lots of little tensors.

In [133]:
# Get relevant info from the JSON mixer file.
patch_width = mixer['patchDimensions'][0]
patch_height = mixer['patchDimensions'][1]
patches = mixer['totalPatches']
patch_dimensions_flat = [patch_width * patch_height, 1]

# Note that the tensors are in the shape of a patch, one patch for each band.
image_columns = [
  tf.io.FixedLenFeature(shape=patch_dimensions_flat, dtype=tf.float32) 
    for k in BANDS
]

# Parsing dictionary.
image_features_dict = dict(zip(BANDS, image_columns))

# Note that you can make one dataset from many files by specifying a list.
image_dataset = tf.data.TFRecordDataset(image_files_list, compression_type='GZIP')

# Parsing function.
def parse_image(example_proto):
  return tf.io.parse_single_example(example_proto, image_features_dict)

# Parse the data into tensors, one long tensor per patch.
image_dataset = image_dataset.map(parse_image, num_parallel_calls=5)

# Break our long tensors into many little ones.
image_dataset = image_dataset.flat_map(
  lambda features: tf.data.Dataset.from_tensor_slices(features)
)

# Turn the dictionary in each record into a tuple without a label.
image_dataset = image_dataset.map(
  lambda data_dict: (tf.transpose(list(data_dict.values())), )
)

# Turn each patch into a batch.
image_dataset = image_dataset.batch(patch_width * patch_height)

## Generate predictions for the image pixels

To get predictions in each pixel, run the image dataset through the trained model using `model.predict()`.  Print the first prediction to see that the output is a list of the three class probabilities for each pixel.  Running all predictions might take a while.

In [141]:
# Run prediction in batches, with as many steps as there are patches.
predictions = model.predict(image_dataset, steps=patches, verbose=1)

# Note that the predictions come as a numpy array.  Check the first one.
print(predictions[0])

predictions.shape

(12845056, 1, 10)

## Write the predictions to a TFRecord file

Now that there's a list of class probabilities in `predictions`, it's time to write them back into a file, optionally including a class label which is simply the index of the maximum probability.  We'll write directly from TensorFlow to a file in the output Cloud Storage bucket.

Iterate over the list, compute class label and write the class and the probabilities in patches.  Specifically, we need to write the pixels into the file as patches in the same order they came out.  The records are written as serialized `tf.train.Example` protos.  This might take a while.

In [146]:
print('Writing to file ' + OUTPUT_IMAGE_FILE)

# Instantiate the writer.
writer = tf.io.TFRecordWriter(OUTPUT_IMAGE_FILE)

# Every patch-worth of predictions we'll dump an example into the output
# file with a single feature that holds our predictions. Since our predictions
# are already in the order of the exported data, the patches we create here
# will also be in the right order.
patch = [[], [], [], [], [], [], [], [], [], []]
cur_patch = 1
for prediction in predictions:
  patch[0].append(tf.argmax(prediction, 1))
  patch[1].append(prediction[0][0])
  patch[2].append(prediction[0][1])
  patch[3].append(prediction[0][2])
  patch[4].append(prediction[0][3])
  patch[5].append(prediction[0][4])
  patch[6].append(prediction[0][5])
  patch[7].append(prediction[0][6])
  patch[8].append(prediction[0][7])
  patch[9].append(prediction[0][8])
  # Once we've seen a patches-worth of class_ids...
  if (len(patch[0]) == patch_width * patch_height):
    print('Done with patch ' + str(cur_patch) + ' of ' + str(patches) + '...')
    # Create an example
    example = tf.train.Example(
      features=tf.train.Features(
        feature={
          'prediction': tf.train.Feature(
              int64_list=tf.train.Int64List(
                  value=patch[0])),
          'class1': tf.train.Feature(
              float_list=tf.train.FloatList(
                  value=patch[1])),
          'class2': tf.train.Feature(
              float_list=tf.train.FloatList(
                  value=patch[2])),
          'class3': tf.train.Feature(
              float_list=tf.train.FloatList(
                  value=patch[3])),
          'class4': tf.train.Feature(
              float_list=tf.train.FloatList(
                  value=patch[4])),
          'class5': tf.train.Feature(
              float_list=tf.train.FloatList(
                  value=patch[5])),
          'class6': tf.train.Feature(
              float_list=tf.train.FloatList(
                  value=patch[6])),
          'class7': tf.train.Feature(
              float_list=tf.train.FloatList(
                  value=patch[7])),
          'class8': tf.train.Feature(
              float_list=tf.train.FloatList(
                  value=patch[8])),
          'class9': tf.train.Feature(
              float_list=tf.train.FloatList(
                  value=patch[9])),
        }
      )
    )
    # Write the example to the file and clear our patch array so it's ready for
    # another batch of class ids
    writer.write(example.SerializeToString())
    patch = [[], [], [], [], [], [], [], [], [], []]
    cur_patch += 1

writer.close()

Writing to file gs://landcover_samples_nlcd2019_tfrecord_june2022/spurge_temporalcnndemo.TFRecord
Done with patch 1 of 49...
Done with patch 2 of 49...
Done with patch 3 of 49...
Done with patch 4 of 49...
Done with patch 5 of 49...
Done with patch 6 of 49...
Done with patch 7 of 49...
Done with patch 8 of 49...
Done with patch 9 of 49...
Done with patch 10 of 49...
Done with patch 11 of 49...
Done with patch 12 of 49...
Done with patch 13 of 49...
Done with patch 14 of 49...
Done with patch 15 of 49...
Done with patch 16 of 49...
Done with patch 17 of 49...
Done with patch 18 of 49...
Done with patch 19 of 49...
Done with patch 20 of 49...
Done with patch 21 of 49...
Done with patch 22 of 49...
Done with patch 23 of 49...
Done with patch 24 of 49...
Done with patch 25 of 49...
Done with patch 26 of 49...
Done with patch 27 of 49...
Done with patch 28 of 49...
Done with patch 29 of 49...
Done with patch 30 of 49...
Done with patch 31 of 49...
Done with patch 32 of 49...
Done with patch

# Upload the classifications to an Earth Engine asset

## Verify the existence of the predictions file

At this stage, there should be a predictions TFRecord file sitting in the output Cloud Storage bucket.  Use the `gsutil` command to verify that the predictions image (and associated mixer JSON) exist and have non-zero size.



In [148]:
!gsutil ls -l {OUTPUT_IMAGE_FILE}

print('Uploading to ' + OUTPUT_ASSET_ID)

# Start the upload.
!earthengine upload image --asset_id={OUTPUT_ASSET_ID} --pyramiding_policy=mode {OUTPUT_IMAGE_FILE} {json_file}


 475280057  2022-06-06T17:31:55Z  gs://landcover_samples_nlcd2019_tfrecord_june2022/spurge_temporalcnndemo.TFRecord
TOTAL: 1 objects, 475280057 bytes (453.26 MiB)
Uploading to users/lakex055/spurge_temporalcnndemo
Started upload task with ID: FKKMRPQ44JPMS3YXF5PQNPAO


## Map the Prediction!

In [None]:
import folium

predictions_image = ee.Image(OUTPUT_ASSET_ID)

prediction_vis = {
  'bands': 'prediction',
  'min': 0,
  'max': 2,
  'palette': ['red', 'green', 'blue']
}
probability_vis = {'bands': ['class8', 'class1', 'class6'], 'max': 0.5}

prediction_map_id = predictions_image.getMapId(prediction_vis)
probability_map_id = predictions_image.getMapId(probability_vis)

map = folium.Map(location=[37.6413, -122.2582])
folium.TileLayer(
  tiles=prediction_map_id['tile_fetcher'].url_format,
  attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
  overlay=True,
  name='prediction',
).add_to(map)
folium.TileLayer(
  tiles=probability_map_id['tile_fetcher'].url_format,
  attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
  overlay=True,
  name='probability',
).add_to(map)
map.add_child(folium.LayerControl())
map

# Function to Gather Landsat ImageCollection Mosaic from Date/Points Input


In [None]:


#Function to Gather Landsat ImageCollection Mosaic from Date/Points Input


# Selects and renames bands of interest for TM/ETM+.
def renameImageBands(img, year, season):
  return img.select(
      ['Blue_median', 'Green_median', 'Red_median', 'NIR_median', 
       'SWIR1_median', 'SWIR2_median', 'QA_PIXEL_median', 'QA_RADSAT_median', 'NDVI_median'],
      ['Blue'+str(season)+str(year), 'Green'+str(season)+str(year), 'Red'+str(season)+str(year), 'NIR'+str(season)+str(year),
       'SWIR1'+str(season)+str(year), 'SWIR2'+str(season)+str(year), 'QA_PIXEL'+str(season)+str(year), 'QA_RADSAT'+str(season)+str(year), 'NDVI'+str(season)+str(year)])


def getLandsatMosaicFromPoints(year, points):
  '''
  #Time-series extraction developed from
  #https://developers.google.com/earth-engine/tutorials/community/time-series-visualization-with-altair#combine_dataframes  

  '''

  #if Year is between 1985 and 1999 use Landsat 5 TM imagery
  if 1985 <= year <= 1999:

    tmColMarchApril = ee.ImageCollection('LANDSAT/LT05/C02/T1_L2') \
      .filterDate('{}-03-01'.format(year), '{}-04-30'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepEtm) \
      .map(addNDVI) \
      .reduce('median')

    tmColMarchApril = renameImageBands(tmColMarchApril, year, 'MarchApril')

    tmColMayJune = ee.ImageCollection('LANDSAT/LT05/C02/T1_L2') \
      .filterDate('{}-05-01'.format(year), '{}-06-30'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepEtm) \
      .map(addNDVI) \
      .reduce('median')

    tmColMayJune = renameImageBands(tmColMayJune, year, 'MayJune')

    tmColJulyAug = ee.ImageCollection('LANDSAT/LT05/C02/T1_L2') \
      .filterDate('{}-07-01'.format(year), '{}-08-31'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepEtm) \
      .map(addNDVI) \
      .reduce('median')

    tmColJulyAug = renameImageBands(tmColJulyAug, year, 'JulyAug')

    landsat5ImageCol = [tmColMarchApril, tmColMayJune, tmColJulyAug]
    return landsat5ImageCol

  #if Year is between 2000 and 2012 use mosaic from Landsat 5 TM and Landsat 7 ETM imagery
  if 2000 <= year <= 2012:

    etmColMarchApril = ee.ImageCollection('LANDSAT/LE07/C02/T1_L2') \
      .filterDate('{}-03-01'.format(year), '{}-04-30'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepEtm) \
      .map(addNDVI) \
      .reduce('median')

    tmColMarchApril = ee.ImageCollection('LANDSAT/LT05/C02/T1_L2') \
      .filterDate('{}-03-01'.format(year), '{}-04-30'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepEtm) \
      .map(addNDVI) \
      .reduce('median')

    MarchApril = ee.ImageCollection([etmColMarchApril, tmColMarchApril])

    etmColMarchApril = MarchApril.reduce('median')

    etmColMayJune = ee.ImageCollection('LANDSAT/LE07/C02/T1_L2') \
      .filterDate('{}-05-01'.format(year), '{}-06-30'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepEtm) \
      .map(addNDVI) \
      .reduce('median')

    tmColMayJune = ee.ImageCollection('LANDSAT/LT05/C02/T1_L2') \
      .filterDate('{}-05-01'.format(year), '{}-06-30'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepEtm) \
      .map(addNDVI) \
      .reduce('median')

    MayJune = ee.ImageCollection([etmColMayJune, tmColMayJune])

    etmColMayJune = MayJune.reduce('median')

    etmColJulyAug = ee.ImageCollection('LANDSAT/LE07/C02/T1_L2') \
      .filterDate('{}-07-01'.format(year), '{}-08-31'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepEtm) \
      .map(addNDVI) \
      .reduce('median')

    tmColJulyAug = ee.ImageCollection('LANDSAT/LT05/C02/T1_L2') \
      .filterDate('{}-07-01'.format(year), '{}-08-31'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepEtm) \
      .map(addNDVI) \
      .reduce('median')

    JulyAug = ee.ImageCollection([etmColJulyAug, tmColJulyAug])

    etmColJulyAug = JulyAug.reduce('median')

    landsat5_7ImageCol = ee.ImageCollection([etmColMarchApril, etmColMayJune, etmColJulyAug])
    return landsat5_7ImageCol

  #if Year is between 2013 and 2020 use mosaic from Landsat 7 ETM and Landsat 8 OLI imagery
  if 2013 <= year <= 2020:

    etmColMarchApril = ee.ImageCollection('LANDSAT/LE07/C02/T1_L2') \
      .filterDate('{}-03-01'.format(year), '{}-04-30'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepEtm) \
      .map(addNDVI) \
      .reduce('median')

    oliColMarchApril = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2') \
      .filterDate('{}-03-01'.format(year), '{}-04-30'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepOli) \
      .map(addNDVI) \
      .reduce('median')

    MarchApril = ee.ImageCollection([etmColMarchApril, oliColMarchApril])

    etmColMarchApril = MarchApril.reduce('median')

    etmColMayJune = ee.ImageCollection('LANDSAT/LE07/C02/T1_L2') \
      .filterDate('{}-05-01'.format(year), '{}-06-30'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepEtm) \
      .map(addNDVI) \
      .reduce('median')

    oliColMayJune = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2') \
      .filterDate('{}-05-01'.format(year), '{}-06-30'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepOli) \
      .map(addNDVI) \
      .reduce('median')

    MayJune = ee.ImageCollection([etmColMayJune, oliColMayJune])

    etmColMayJune = MayJune.reduce('median')

    etmColJulyAug = ee.ImageCollection('LANDSAT/LE07/C02/T1_L2') \
      .filterDate('{}-07-01'.format(year), '{}-08-31'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepEtm) \
      .map(addNDVI) \
      .reduce('median') \

    oliColJulyAug = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2') \
      .filterDate('{}-07-01'.format(year), '{}-08-31'.format(year)) \
      .filterBounds(points) \
      .map(maskQuality) \
      .map(prepOli) \
      .map(addNDVI) \
      .reduce('median')

    JulyAug = ee.ImageCollection([etmColJulyAug, oliColJulyAug])

    etmColJulyAug = JulyAug.reduce('median')

    landsat7_8ImageCol = ee.ImageCollection([etmColMarchApril, etmColMayJune, etmColJulyAug])
    return landsat7_8ImageCol


year = 1987

landsatCol = getLandsatMosaicFromPoints(year, newpts)

image1 = landsatCol[0]
image2 = landsatCol[1]
image3 = landsatCol[2]

image1_fc = image1.sampleRegions(collection=newpts, properties=['class'], scale=30)
image2_fc = image2.sampleRegions(collection=newpts, properties=['class'], scale=30)
image3_fc = image3.sampleRegions(collection=newpts, properties=['class'], scale=30)


# Define a function to transfer feature properties to a dictionary.
def fc_to_dict(fc):
  prop_names = fc.first().propertyNames()
  prop_lists = fc.reduceColumns(
      reducer=ee.Reducer.toList().repeat(prop_names.size()),
      selectors=prop_names).get('list')

  return ee.Dictionary.fromLists(prop_names, prop_lists)

image1_db_dict = fc_to_dict(image1_fc).getInfo()
image2_db_dict = fc_to_dict(image2_fc).getInfo()
image3_db_dict = fc_to_dict(image3_fc).getInfo()

image1_df = pd.DataFrame(image1_db_dict)
image2_df = pd.DataFrame(image2_db_dict)
image3_df = pd.DataFrame(image3_db_dict)

display(image1_df)

data_frames = [image1_df, image2_df, image3_df]


from functools import reduce
df_merged = reduce(lambda left,right: pd.merge(left, right, on='system:index', how='outer'), data_frames).fillna(np.nan)
display(df_merged)

df_merged_dropna = df_merged.dropna(axis=0, how = 'any')
#display(df_merged_dropna)


df_merged_removeQA = df_merged_dropna.drop(['QA_PIXEL_mean_1', 'QA_RADSAT_mean_1', 'QA_PIXEL_mean_2', 'QA_RADSAT_mean_2', 'QA_PIXEL_mean_3', 'QA_RADSAT_mean_3',
                                     'QA_PIXEL_mean_4', 'QA_RADSAT_mean_4', 'QA_PIXEL_mean_5', 'QA_RADSAT_mean_5', 'QA_PIXEL_mean_6', 'QA_RADSAT_mean_6',
                                     'QA_PIXEL_mean_7', 'QA_RADSAT_mean_7', 'QA_PIXEL_mean_8', 'QA_RADSAT_mean_8', 'QA_PIXEL_mean_9', 'QA_RADSAT_mean_9',
                                     'class_x', 'class_y', '.geo_x', '.geo_y', '.geo', 'system:index'], 1)
display(df_merged_removeQA)










data_frames = [db_MarchApril2018_df, db_MayJune2018_df, db_JulyAug2018_df,
               db_MarchApril2019_df, db_MayJune2019_df, db_JulyAug2019_df,
               db_MarchApril2020_df, db_MayJune2020_df, db_JulyAug2020_df]

from functools import reduce
df_merged = reduce(lambda left,right: pd.merge(left, right, on='system:index', how='outer'), data_frames).fillna(np.nan)
#display(df_merged)

df_merged_dropna = df_merged.dropna(axis=0, how = 'any')
#display(df_merged_dropna)


df_merged_removeQA = df_merged_dropna.drop(['QA_PIXEL_mean_1', 'QA_RADSAT_mean_1', 'QA_PIXEL_mean_2', 'QA_RADSAT_mean_2', 'QA_PIXEL_mean_3', 'QA_RADSAT_mean_3',
                                     'QA_PIXEL_mean_4', 'QA_RADSAT_mean_4', 'QA_PIXEL_mean_5', 'QA_RADSAT_mean_5', 'QA_PIXEL_mean_6', 'QA_RADSAT_mean_6',
                                     'QA_PIXEL_mean_7', 'QA_RADSAT_mean_7', 'QA_PIXEL_mean_8', 'QA_RADSAT_mean_8', 'QA_PIXEL_mean_9', 'QA_RADSAT_mean_9',
                                     'class_x', 'class_y', '.geo_x', '.geo_y', '.geo', 'system:index'], 1)
display(df_merged_removeQA)



# Define a function to transfer feature properties to a dictionary.
def fc_to_dict(fc):
  prop_names = fc.first().propertyNames()
  prop_lists = fc.reduceColumns(
      reducer=ee.Reducer.toList().repeat(prop_names.size()),
      selectors=prop_names).get('list')

  return ee.Dictionary.fromLists(prop_names, prop_lists)

train_db_dict = fc_to_dict(subset_train_db).getInfo()
train_df = pd.DataFrame(train_db_dict)
display(train_df)
#print(nbr_df.dtypes)




# Get Landsat-8 Imagery from 2018, 2019, and 2020 (Centered on 2019 - corresponding to NLCD 2019 land cover classes in the training points)

In [None]:

##########
## 2018 ##
##########

etmColMarchApril2018 = ee.ImageCollection('LANDSAT/LE07/C02/T1_L2') \
  .filterDate('2018-03-01', '2018-04-30') \
  .filterBounds(pts) \
  .map(maskQuality) \
  .map(prepEtm) \
  .map(addNDVI) \
  .reduce('mean')

oliColMarchApril2018 = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2') \
  .filterDate('2018-03-01', '2018-04-30') \
  .filterBounds(pts) \
  .map(maskQuality) \
  .map(prepOli) \
  .map(addNDVI) \
  .reduce('mean')

MarchApril2018 = ee.ImageCollection([etmColMarchApril2018, oliColMarchApril2018])

etmColMarchApril2018 = MarchApril2018.reduce('mean')

# Selects and renames bands of interest for TM/ETM+.
def renameEtm_mean_1(img):
  return img.select(
      ['Blue_mean_mean', 'Green_mean_mean', 'Red_mean_mean', 'NIR_mean_mean', 'SWIR1_mean_mean', 'SWIR2_mean_mean', 'QA_PIXEL_mean_mean', 'QA_RADSAT_mean_mean', 'NDVI_mean_mean'],
      ['Blue_mean_1', 'Green_mean_1', 'Red_mean_1', 'NIR_mean_1','SWIR1_mean_1', 'SWIR2_mean_1', 'QA_PIXEL_mean_1', 'QA_RADSAT_mean_1', 'NDVI_mean_1'])

etmColMarchApril2018 = renameEtm_mean_1(etmColMarchApril2018)

etmColMayJune2018 = ee.ImageCollection('LANDSAT/LE07/C02/T1_L2') \
  .filterDate('2018-05-01', '2018-06-30') \
  .filterBounds(pts) \
  .map(maskQuality) \
  .map(prepEtm) \
  .map(addNDVI) \
  .reduce('mean')

oliColMayJune2018 = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2') \
  .filterDate('2018-05-01', '2018-06-30') \
  .filterBounds(pts) \
  .map(maskQuality) \
  .map(prepOli) \
  .map(addNDVI) \
  .reduce('mean')

MayJune2018 = ee.ImageCollection([etmColMayJune2018, oliColMayJune2018])

etmColMayJune2018 = MayJune2018.reduce('mean')

# Selects and renames bands of interest for TM/ETM+.
def renameEtm_mean_2(img):
  return img.select(
      ['Blue_mean_mean', 'Green_mean_mean', 'Red_mean_mean', 'NIR_mean_mean', 'SWIR1_mean_mean', 'SWIR2_mean_mean', 'QA_PIXEL_mean_mean', 'QA_RADSAT_mean_mean', 'NDVI_mean_mean'],
      ['Blue_mean_2', 'Green_mean_2', 'Red_mean_2', 'NIR_mean_2','SWIR1_mean_2', 'SWIR2_mean_2', 'QA_PIXEL_mean_2', 'QA_RADSAT_mean_2', 'NDVI_mean_2'])

etmColMayJune2018 = renameEtm_mean_2(etmColMayJune2018)

etmColJulyAug2018 = ee.ImageCollection('LANDSAT/LE07/C02/T1_L2') \
  .filterDate('2018-07-01', '2018-08-31') \
  .filterBounds(pts) \
  .map(maskQuality) \
  .map(prepEtm) \
  .map(addNDVI) \
  .reduce('mean') \

oliColJulyAug2018 = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2') \
  .filterDate('2018-07-01', '2018-08-31') \
  .filterBounds(pts) \
  .map(maskQuality) \
  .map(prepOli) \
  .map(addNDVI) \
  .reduce('mean')

JulyAug2018 = ee.ImageCollection([etmColJulyAug2018, oliColJulyAug2018])

etmColJulyAug2018 = JulyAug2018.reduce('mean')

# Selects and renames bands of interest for TM/ETM+.
def renameEtm_mean_3(img):
  return img.select(
      ['Blue_mean_mean', 'Green_mean_mean', 'Red_mean_mean', 'NIR_mean_mean', 'SWIR1_mean_mean', 'SWIR2_mean_mean', 'QA_PIXEL_mean_mean', 'QA_RADSAT_mean_mean', 'NDVI_mean_mean'],
      ['Blue_mean_3', 'Green_mean_3', 'Red_mean_3', 'NIR_mean_3','SWIR1_mean_3', 'SWIR2_mean_3', 'QA_PIXEL_mean_3', 'QA_RADSAT_mean_3', 'NDVI_mean_3'])

etmColJulyAug2018 = renameEtm_mean_3(etmColJulyAug2018)





##########
## 2019 ##
##########

etmColMarchApril2019 = ee.ImageCollection('LANDSAT/LE07/C02/T1_L2') \
  .filterDate('2019-03-01', '2019-04-30') \
  .filterBounds(pts) \
  .map(maskQuality) \
  .map(prepEtm) \
  .map(addNDVI) \
  .reduce('mean')

oliColMarchApril2019 = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2') \
  .filterDate('2019-03-01', '2019-04-30') \
  .filterBounds(pts) \
  .map(maskQuality) \
  .map(prepOli) \
  .map(addNDVI) \
  .reduce('mean')

MarchApril2019 = ee.ImageCollection([etmColMarchApril2019, oliColMarchApril2019])

etmColMarchApril2019 = MarchApril2019.reduce('mean')

# Selects and renames bands of interest for TM/ETM+.
def renameEtm_mean_4(img):
  return img.select(
      ['Blue_mean_mean', 'Green_mean_mean', 'Red_mean_mean', 'NIR_mean_mean', 'SWIR1_mean_mean', 'SWIR2_mean_mean', 'QA_PIXEL_mean_mean', 'QA_RADSAT_mean_mean', 'NDVI_mean_mean'],
      ['Blue_mean_4', 'Green_mean_4', 'Red_mean_4', 'NIR_mean_4','SWIR1_mean_4', 'SWIR2_mean_4', 'QA_PIXEL_mean_4', 'QA_RADSAT_mean_4', 'NDVI_mean_4'])

etmColMarchApril2019 = renameEtm_mean_4(etmColMarchApril2019)

etmColMayJune2019 = ee.ImageCollection('LANDSAT/LE07/C02/T1_L2') \
  .filterDate('2019-05-01', '2019-06-30') \
  .filterBounds(pts) \
  .map(maskQuality) \
  .map(prepEtm) \
  .map(addNDVI) \
  .reduce('mean')

oliColMayJune2019 = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2') \
  .filterDate('2019-05-01', '2019-06-30') \
  .filterBounds(pts) \
  .map(maskQuality) \
  .map(prepOli) \
  .map(addNDVI) \
  .reduce('mean')

MayJune2019 = ee.ImageCollection([etmColMayJune2019, oliColMayJune2019])

etmColMayJune2019 = MayJune2019.reduce('mean')

# Selects and renames bands of interest for TM/ETM+.
def renameEtm_mean_5(img):
  return img.select(
      ['Blue_mean_mean', 'Green_mean_mean', 'Red_mean_mean', 'NIR_mean_mean', 'SWIR1_mean_mean', 'SWIR2_mean_mean', 'QA_PIXEL_mean_mean', 'QA_RADSAT_mean_mean', 'NDVI_mean_mean'],
      ['Blue_mean_5', 'Green_mean_5', 'Red_mean_5', 'NIR_mean_5','SWIR1_mean_5', 'SWIR2_mean_5', 'QA_PIXEL_mean_5', 'QA_RADSAT_mean_5', 'NDVI_mean_5'])

etmColMayJune2019 = renameEtm_mean_5(etmColMayJune2019)

etmColJulyAug2019 = ee.ImageCollection('LANDSAT/LE07/C02/T1_L2') \
  .filterDate('2019-07-01', '2019-08-31') \
  .filterBounds(pts) \
  .map(maskQuality) \
  .map(prepEtm) \
  .map(addNDVI) \
  .reduce('mean') \

oliColJulyAug2019 = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2') \
  .filterDate('2019-07-01', '2019-08-31') \
  .filterBounds(pts) \
  .map(maskQuality) \
  .map(prepOli) \
  .map(addNDVI) \
  .reduce('mean')

JulyAug2019 = ee.ImageCollection([etmColJulyAug2019, oliColJulyAug2019])

etmColJulyAug2019 = JulyAug2019.reduce('mean')

# Selects and renames bands of interest for TM/ETM+.
def renameEtm_mean_6(img):
  return img.select(
      ['Blue_mean_mean', 'Green_mean_mean', 'Red_mean_mean', 'NIR_mean_mean', 'SWIR1_mean_mean', 'SWIR2_mean_mean', 'QA_PIXEL_mean_mean', 'QA_RADSAT_mean_mean', 'NDVI_mean_mean'],
      ['Blue_mean_6', 'Green_mean_6', 'Red_mean_6', 'NIR_mean_6','SWIR1_mean_6', 'SWIR2_mean_6', 'QA_PIXEL_mean_6', 'QA_RADSAT_mean_6', 'NDVI_mean_6'])

etmColJulyAug2019 = renameEtm_mean_6(etmColJulyAug2019)


##########
## 2020 ##
##########

etmColMarchApril2020 = ee.ImageCollection('LANDSAT/LE07/C02/T1_L2') \
  .filterDate('2020-03-01', '2020-04-30') \
  .filterBounds(pts) \
  .map(maskQuality) \
  .map(prepEtm) \
  .map(addNDVI) \
  .reduce('mean')

oliColMarchApril2020 = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2') \
  .filterDate('2020-03-01', '2020-04-30') \
  .filterBounds(pts) \
  .map(maskQuality) \
  .map(prepOli) \
  .map(addNDVI) \
  .reduce('mean')

MarchApril2020 = ee.ImageCollection([etmColMarchApril2020, oliColMarchApril2020])

etmColMarchApril2020 = MarchApril2020.reduce('mean')

# Selects and renames bands of interest for TM/ETM+.
def renameEtm_mean_7(img):
  return img.select(
      ['Blue_mean_mean', 'Green_mean_mean', 'Red_mean_mean', 'NIR_mean_mean', 'SWIR1_mean_mean', 'SWIR2_mean_mean', 'QA_PIXEL_mean_mean', 'QA_RADSAT_mean_mean', 'NDVI_mean_mean'],
      ['Blue_mean_7', 'Green_mean_7', 'Red_mean_7', 'NIR_mean_7','SWIR1_mean_7', 'SWIR2_mean_7', 'QA_PIXEL_mean_7', 'QA_RADSAT_mean_7', 'NDVI_mean_7'])

etmColMarchApril2020 = renameEtm_mean_7(etmColMarchApril2020)

etmColMayJune2020 = ee.ImageCollection('LANDSAT/LE07/C02/T1_L2') \
  .filterDate('2020-05-01', '2020-06-30') \
  .filterBounds(pts) \
  .map(maskQuality) \
  .map(prepEtm) \
  .map(addNDVI) \
  .reduce('mean')

oliColMayJune2020 = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2') \
  .filterDate('2020-05-01', '2020-06-30') \
  .filterBounds(pts) \
  .map(maskQuality) \
  .map(prepOli) \
  .map(addNDVI) \
  .reduce('mean')

MayJune2020 = ee.ImageCollection([etmColMayJune2020, oliColMayJune2020])

etmColMayJune2020 = MayJune2020.reduce('mean')

# Selects and renames bands of interest for TM/ETM+.
def renameEtm_mean_8(img):
  return img.select(
      ['Blue_mean_mean', 'Green_mean_mean', 'Red_mean_mean', 'NIR_mean_mean', 'SWIR1_mean_mean', 'SWIR2_mean_mean', 'QA_PIXEL_mean_mean', 'QA_RADSAT_mean_mean', 'NDVI_mean_mean'],
      ['Blue_mean_8', 'Green_mean_8', 'Red_mean_8', 'NIR_mean_8','SWIR1_mean_8', 'SWIR2_mean_8', 'QA_PIXEL_mean_8', 'QA_RADSAT_mean_8', 'NDVI_mean_8'])

etmColMayJune2020 = renameEtm_mean_8(etmColMayJune2020)

etmColJulyAug2020 = ee.ImageCollection('LANDSAT/LE07/C02/T1_L2') \
  .filterDate('2020-07-01', '2020-08-31') \
  .filterBounds(pts) \
  .map(maskQuality) \
  .map(prepEtm) \
  .map(addNDVI) \
  .reduce('mean') \

oliColJulyAug2020 = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2') \
  .filterDate('2020-07-01', '2020-08-31') \
  .filterBounds(pts) \
  .map(maskQuality) \
  .map(prepOli) \
  .map(addNDVI) \
  .reduce('mean')

JulyAug2020 = ee.ImageCollection([etmColJulyAug2020, oliColJulyAug2020])

etmColJulyAug2020 = JulyAug2020.reduce('mean')

# Selects and renames bands of interest for TM/ETM+.
def renameEtm_mean_9(img):
  return img.select(
      ['Blue_mean_mean', 'Green_mean_mean', 'Red_mean_mean', 'NIR_mean_mean', 'SWIR1_mean_mean', 'SWIR2_mean_mean', 'QA_PIXEL_mean_mean', 'QA_RADSAT_mean_mean', 'NDVI_mean_mean'],
      ['Blue_mean_9', 'Green_mean_9', 'Red_mean_9', 'NIR_mean_9','SWIR1_mean_9', 'SWIR2_mean_9', 'QA_PIXEL_mean_9', 'QA_RADSAT_mean_9', 'NDVI_mean_9'])

etmColJulyAug2020 = renameEtm_mean_9(etmColJulyAug2020)

etmColJulyAug2020.getInfo()


#Extract Landsat-8 Bands from Points, Format into Pandas DataFrame

In [None]:

# Sample Regions

# 2018
etmColMarchApril2018_fc = etmColMarchApril2018.sampleRegions(collection=pts, properties=['class'], scale=30)
#print(etmColMarchApril2018_fc.getInfo())
etmColMayJune2018_fc = etmColMayJune2018.sampleRegions(collection=pts, properties=['class'], scale=30, geometries=True)
#print(etmColMayJune2018_fc.getInfo())
etmColJulyAug2018_fc = etmColJulyAug2018.sampleRegions(collection=pts, properties=['class'], scale=30, geometries=True)
#print(etmColJulyAug2018_fc.getInfo())

# 2019
etmColMarchApril2019_fc = etmColMarchApril2019.sampleRegions(collection=pts, properties=['class'], scale=30, geometries=True)
#print(etmColMarchApril2019_fc.getInfo())
etmColMayJune2019_fc = etmColMayJune2019.sampleRegions(collection=pts, properties=['class'], scale=30, geometries=True)
#print(etmColMayJune2019_fc.getInfo())
etmColJulyAug2019_fc = etmColJulyAug2019.sampleRegions(collection=pts, properties=['class'], scale=30, geometries=True)
#print(etmColJulyAug2019_fc.getInfo())

# 2020
etmColMarchApril2020_fc = etmColMarchApril2020.sampleRegions(collection=pts, properties=['class'], scale=30, geometries=True)
#print(etmColMarchApril2020_fc.getInfo())
etmColMayJune2020_fc = etmColMayJune2020.sampleRegions(collection=pts, properties=['class'], scale=30, geometries=True)
#print(etmColMayJune2020_fc.getInfo())
etmColJulyAug2020_fc = etmColJulyAug2020.sampleRegions(collection=pts, properties=['class'], scale=30, geometries=True)
#print(etmColJulyAug2020_fc.getInfo())



# Export feature class assets
outputBucket = 'landcover_samples_nlcd2019_onemillionpoints'
# Make sure the bucket exists.
print('Found Cloud Storage bucket.' if tf.io.gfile.exists('gs://' + outputBucket) 
    else 'Output Cloud Storage bucket does not exist.')


# 2018
task1 = ee.batch.Export.table.toCloudStorage(
    collection=etmColMarchApril2018_fc,
    description='etmColMarchApril2018_fc_landcover export',
    bucket = outputBucket,
    fileNamePrefix='etmColMarchApril2018_fc_landcover',
    fileFormat='CSV')

task2 = ee.batch.Export.table.toCloudStorage(
    collection=etmColMayJune2018_fc,
    description='etmColMayJune2018_fc_landcover export',
    bucket = outputBucket,
    fileNamePrefix='etmColMayJune2018_fc_landcover',
    fileFormat='CSV')

task3 = ee.batch.Export.table.toCloudStorage(
    collection=etmColJulyAug2018_fc,
    description='etmColJulyAug2018_fc_landcover export',
    bucket = outputBucket,
    fileNamePrefix='etmColJulyAug2018_fc_landcover',
    fileFormat='CSV')

# 2019
task4 = ee.batch.Export.table.toCloudStorage(
    collection=etmColMarchApril2019_fc,
    description='etmColMarchApril2019_fc_landcover export',
    bucket = outputBucket,
    fileNamePrefix='etmColMarchApril2019_fc_landcover',
    fileFormat='CSV')

task5 = ee.batch.Export.table.toCloudStorage(
    collection=etmColMayJune2019_fc,
    description='etmColMayJune2019_fc_landcover export',
    bucket = outputBucket,
    fileNamePrefix='etmColMayJune2019_fc_landcover',
    fileFormat='CSV')

task6 = ee.batch.Export.table.toCloudStorage(
    collection=etmColJulyAug2019_fc,
    description='etmColJulyAug2019_fc_landcover export',
    bucket = outputBucket,
    fileNamePrefix='etmColJulyAug2019_fc_landcover',
    fileFormat='CSV')

# 2020
task7 = ee.batch.Export.table.toCloudStorage(
    collection=etmColMarchApril2020_fc,
    description='etmColMarchApril2020_fc_landcover export',
    bucket = outputBucket,
    fileNamePrefix='etmColMarchApril2020_fc_landcover',
    fileFormat='CSV')

task8 = ee.batch.Export.table.toCloudStorage(
    collection=etmColMayJune2020_fc,
    description='etmColMayJune2020_fc_landcover export',
    bucket = outputBucket,
    fileNamePrefix='etmColMayJune2020_fc_landcover',
    fileFormat='CSV')

task9 = ee.batch.Export.table.toCloudStorage(
    collection=etmColJulyAug2020_fc,
    description='etmColJulyAug2020_fc_landcover export',
    bucket = outputBucket,
    fileNamePrefix='etmColJulyAug2020_fc_landcover',
    fileFormat='CSV')

#Export/Start Tasks

task1.start()
task2.start()
task3.start()
task4.start()
task5.start()
task6.start()
task7.start()
task8.start()
task9.start()




In [None]:

# Re-Import Feature Class Assets (no longer hosted on EE and thus not restricted to size limits) and format to pandas DataFrame
# Depends on import gcsfs

#2018
db_MarchApril2018_df = pd.read_csv('gs://landcover_samples_nlcd2019_onemillionpoints/etmColMarchApril2018_fc_landcover.csv')
#display(db_MarchApril2018_df)
db_MayJune2018_df = pd.read_csv('gs://landcover_samples_nlcd2019_onemillionpoints/etmColMayJune2018_fc_landcover.csv')
#display(db_MayJune2018_df)
db_JulyAug2018_df = pd.read_csv('gs://landcover_samples_nlcd2019_onemillionpoints/etmColJulyAug2018_fc_landcover.csv')
#display(db_JulyAug2018_df)


#2019
db_MarchApril2019_df = pd.read_csv('gs://landcover_samples_nlcd2019_onemillionpoints/etmColMarchApril2019_fc_landcover.csv')
#display(db_MarchApril2019_df)
db_MayJune2019_df = pd.read_csv('gs://landcover_samples_nlcd2019_onemillionpoints/etmColMayJune2019_fc_landcover.csv')
#display(db_MayJune2019_df)
db_JulyAug2019_df = pd.read_csv('gs://landcover_samples_nlcd2019_onemillionpoints/etmColJulyAug2019_fc_landcover.csv')
#display(db_JulyAug2019_df)


#2020
db_MarchApril2020_df = pd.read_csv('gs://landcover_samples_nlcd2019_onemillionpoints/etmColMarchApril2020_fc_landcover.csv')
#display(db_MarchApril2020_df)
db_MayJune2020_df = pd.read_csv('gs://landcover_samples_nlcd2019_onemillionpoints/etmColMayJune2020_fc_landcover.csv')
#display(db_MayJune2020_df)
db_JulyAug2020_df = pd.read_csv('gs://landcover_samples_nlcd2019_onemillionpoints/etmColJulyAug2020_fc_landcover.csv')
#display(db_JulyAug2020_df)

#Merge Landsat-8 <> LandCover Points into one DataFrame

In [None]:

data_frames = [db_MarchApril2018_df, db_MayJune2018_df, db_JulyAug2018_df,
               db_MarchApril2019_df, db_MayJune2019_df, db_JulyAug2019_df,
               db_MarchApril2020_df, db_MayJune2020_df, db_JulyAug2020_df]

from functools import reduce
df_merged = reduce(lambda left,right: pd.merge(left, right, on='system:index', how='outer'), data_frames).fillna(np.nan)
#display(df_merged)

df_merged_dropna = df_merged.dropna(axis=0, how = 'any')
#display(df_merged_dropna)


df_merged_removeQA = df_merged_dropna.drop(['QA_PIXEL_mean_1', 'QA_RADSAT_mean_1', 'QA_PIXEL_mean_2', 'QA_RADSAT_mean_2', 'QA_PIXEL_mean_3', 'QA_RADSAT_mean_3',
                                     'QA_PIXEL_mean_4', 'QA_RADSAT_mean_4', 'QA_PIXEL_mean_5', 'QA_RADSAT_mean_5', 'QA_PIXEL_mean_6', 'QA_RADSAT_mean_6',
                                     'QA_PIXEL_mean_7', 'QA_RADSAT_mean_7', 'QA_PIXEL_mean_8', 'QA_RADSAT_mean_8', 'QA_PIXEL_mean_9', 'QA_RADSAT_mean_9',
                                     'class_x', 'class_y', '.geo_x', '.geo_y', '.geo', 'system:index'], 1)
display(df_merged_removeQA)



#Export DataFrame to CSV

In [None]:
df_merged_removeQA.to_csv("example2_1dcnn_april2022.csv")

#Functions to read and compute spectral features on SITS 


In [None]:

""" 
	Some functions to read and compute spectral features on SITS
"""


import sys, os
import numpy as np
import pandas as pd
import math
import random
import itertools

import csv

#-----------------------------------------------------------------------
#---------------------- SATELLITE MODULE
#-----------------------------------------------------------------------
#final_class_label = ['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9', 'c10', 'c11', 'c12']


#-----------------------------------------------------------------------
def readSITSData(name_file):
	"""
		Read the data contained in name_file
		INPUT:
			- name_file: file where to read the data
		OUTPUT:
			- X: variable vectors for each example
			- polygon_ids: id polygon (use e.g. for validation set)
			- Y: label for each example
	"""
	
	data = pd.read_table(name_file, sep=',', header=None)
	
	y_data = data.iloc[:,0]
	y = np.asarray(y_data.values, dtype='uint8')
	
	polygonID_data = data.iloc[:,1]
	polygon_ids = polygonID_data.values
	polygon_ids = np.asarray(polygon_ids, dtype='uint16')
		
	X_data = data.iloc[:,2:]
	X = X_data.values
	X = np.asarray(X, dtype='float32')

	return  X, polygon_ids, y


#-----------------------------------------------------------------------
def addFeatures(X):
	"""
		Read the data contained in name_file
		INPUT:
			- X: orginal X features composed of threes bands (NIR-R-G)
				in the following order 
					[date1.NIR, date1.R, date1.G, ..., dateD.NIR, dateD.R, dateD.G]
		OUTPUT:
			- X_features: orginal_X with the addition of NDVI, NDWI and Brilliance
				in the following order	
					[X, date1.NDVI, ..., dateD.NDVI, date1.NDWI, ..., dateD.NDWI, date1.Brilliance, ..., dateD.Brilliance]
	"""
	n_channels = 3
	
	NIR = X[:,0::n_channels]
	NIR = np.array(NIR)
	NIR = NIR.astype(np.float)
	R = X[:,1::n_channels]
	R = np.array(R)
	R = R.astype(np.float)
	G = X[:,2::n_channels]
	G = np.array(G)
	G = G.astype(np.float)	
	
	NDVI = np.where(NIR+R!=0., (NIR-R)/(NIR+R), 0.)
	NDVI = NDVI.astype(float)
	
	
	NDWI = np.where(G+NIR!=0., (G-NIR)/(G+NIR), 0.)
	NDWI = NDWI.astype(float)
	
	Brilliance = np.sqrt((NIR*NIR + R*R + G*G)/3.0)
	Brilliance = Brilliance.astype(float)
	
	return NDVI, NDWI, Brilliance

#-----------------------------------------------------------------------
def computeNDVI(X, n_channels):
	"""
		Read the data contained in name_file
		INPUT:
			- X: orginal X features composed of threes bands (NIR-R-G)
				in the following order 
					[date1.NIR, date1.R, date1.G, ..., dateD.NIR, dateD.R, dateD.G]
		OUTPUT:
			- X_features: orginal_X with the addition of NDVI, NDWI and Brilliance
				in the following order	
					[X, date1.NDVI, ..., dateD.NDVI, date1.NDWI, ..., dateD.NDWI, date1.Brilliance, ..., dateD.Brilliance]
	"""
	
	NIR = X[:,0::n_channels]
	NIR = np.array(NIR)
	NIR = NIR.astype(np.float)
	R = X[:,1::n_channels]
	R = np.array(R)
	R = R.astype(np.float)
	
	NDVI = np.where(NIR+R!=0., (NIR-R)/(NIR+R), 0.)
	return NDVI

#-----------------------------------------------------------------------
def addingfeat_reshape_data(X, feature_strategy, nchannels):
	"""
		Reshaping (feature format (3 bands): d1.b1 d1.b2 d1.b3 d2.b1 d2.b2 d2.b3 ...)
		INPUT:
			-X: original feature vector ()
			-feature_strategy: used features (options: SB, NDVI, SB3feat)
			-nchannels: number of channels
		OUTPUT:
			-new_X: data in the good format for Keras models
	"""
			
	if feature_strategy=='SB':
		print("SPECTRAL BANDS-----------------------------------------")
		return X.reshape(X.shape[0],int(X.shape[1]/nchannels),nchannels)
								
	elif feature_strategy=='NDVI':
		print("NDVI only----------------------------------------------")
		new_X = computeNDVI(X, nchannels)
		return np.expand_dims(new_X, axis=2)
							
	elif feature_strategy=='SB3feat':
		print("SB + NDVI + NDWI + IB----------------------------------")
		NDVI, NDWI, IB = addFeatures(X)		
		new_X = X.reshape(X.shape[0],int(X.shape[1]/nchannels),nchannels)		
		new_X = np.dstack((new_X, NDVI))
		new_X = np.dstack((new_X, NDWI))
		new_X = np.dstack((new_X, IB))
		return new_X
	else:
		print("Not referenced!!!-------------------------------------------")
		return -1

#-----------------------------------------------------------------------
def computingMinMax(X, per=2):
	min_per = np.percentile(X, per, axis=(0,1))
	max_per = np.percentile(X, 100-per, axis=(0,1))
	return min_per, max_per

#-----------------------------------------------------------------------
def read_minMaxVal(minmax_file):	
	with open(minmax_file, 'r') as f:
		reader = csv.reader(f, delimiter=',')
		min_per = next(reader)
		max_per = next(reader)
	min_per = [float(k) for k in min_per]
	min_per = np.array(min_per)
	max_per = [float(k) for k in max_per]
	max_per = np.array(max_per)
	return min_per, max_per

#-----------------------------------------------------------------------
def save_minMaxVal(minmax_file, min_per, max_per):	
	with open(minmax_file, 'w') as f:
		writer = csv.writer(f, delimiter=',')
		writer.writerow(min_per)
		writer.writerow(max_per)

#-----------------------------------------------------------------------
def normalizingData(X, min_per, max_per):
	return (X-min_per)/(max_per-min_per)

#-----------------------------------------------------------------------	
def extractValSet(X_train, polygon_ids_train, y_train, val_rate=0.1):
	unique_pol_ids_train, indices = np.unique(polygon_ids_train, return_inverse=True) #-- pold_ids_train = unique_pol_ids_train[indices]
	nb_pols = len(unique_pol_ids_train)
	
	ind_shuffle = list(range(nb_pols))
	random.shuffle(ind_shuffle)
	list_indices = [[] for i in range(nb_pols)]
	shuffle_indices = [[] for i in range(nb_pols)]
	[ list_indices[ind_shuffle[val]].append(idx) for idx, val in enumerate(indices)]					
		
	final_ind = list(itertools.chain.from_iterable(list_indices))
	m = len(final_ind)
	final_train = int(math.ceil(m*(1.0-val_rate)))
	
	shuffle_polygon_ids_train = polygon_ids_train[final_ind]
	id_final_train = shuffle_polygon_ids_train[final_train]
	
	while shuffle_polygon_ids_train[final_train-1]==id_final_train:
		final_train = final_train-1
	
	
	new_X_train = X_train[final_ind[:final_train],:,:]
	new_y_train = y_train[final_ind[:final_train]]
	new_X_val = X_train[final_ind[final_train:],:,:]
	new_y_val = y_train[final_ind[final_train:]]
	
	return new_X_train, new_y_train, new_X_val, new_y_val
	

In [None]:
from tensorflow.keras.utils import to_categorical

open("/content/drive/My Drive/Invasives Research UMN/Remote Sensing Master/Leafy Spurge Demography/temporalCNN-master/example/train_dataset.csv").read()

res_path = '/content/drive/My Drive/Invasives Research UMN/Remote Sensing Master/Leafy Spurge Demography'
sits_path = '/content/drive/My Drive/Invasives Research UMN/Remote Sensing Master/Leafy Spurge Demography/temporalCNN-master/example'
feature = "SB"
noarchi = 2
norun = 0


#-- Creating output path if does not exist
if not os.path.exists(res_path):
  print("ResPath DNE")
  os.makedirs(res_path)
	
	#---- Parameters to set
n_channels = 7 #-- B G NDVI NIR Red SWIR1 SWIR2
val_rate = 0.00

	#---- Evaluated metrics
eval_label = ['OA', 'train_loss', 'train_time', 'test_time']	
	
	#---- String variables
train_str = 'train_dataset'
test_str = 'test_dataset'					
	#---- Get filenames
train_file = sits_path + '/' + train_str + '.csv'
test_file = sits_path + '/' + test_str + '.csv'
print("train_file: ", train_file)
print("test_file: ", test_file)
	
	#---- output files			
res_path = res_path + '/Archi' + str(noarchi) + '/'
if not os.path.exists(res_path):
  os.makedirs(res_path)
  print("noarchi: ", noarchi)
	
str_result = feature + '-' + train_str + '-noarchi' + str(noarchi) + '-norun' + str(norun) 
res_file = res_path + '/resultOA-' + str_result + '.csv'
res_mat = np.zeros((len(eval_label),1))
traintest_loss_file = res_path + '/trainingHistory-' + str_result + '.csv'
conf_file = res_path + '/confMatrix-' + str_result + '.csv'
out_model_file = res_path + '/bestmodel-' + str_result + '.h5'


	#---- Downloading
X_train, polygon_ids_train, y_train = readSITSData(train_file)

print(X_train.shape) #13336, 63
X_test,  polygon_ids_test, y_test = readSITSData(test_file)
print(X_train)
print(polygon_ids_train)
print(y_train.shape) #13336

n_classes_test = len(np.unique(y_test))
print(n_classes_test)
n_classes_train = len(np.unique(y_train))
print(n_classes_train)
if(n_classes_test != n_classes_train):
  print("WARNING: different number of classes in train and test")
n_classes = max(n_classes_train, n_classes_test)
y_train_one_hot = to_categorical(y_train) #specify number of classes explicity - may need to recode classes sequentially (1-9) to work correctly
y_test_one_hot = to_categorical(y_test)

print(y_train_one_hot)
print(y_test_one_hot)
	
	#---- Adding the features and reshaping the data if necessary
X_train = addingfeat_reshape_data(X_train, feature, n_channels) #Feature = "SB" (spectral bands)

print(X_train[0, :, :])
print(X_train.shape)
X_test = addingfeat_reshape_data(X_test, feature, n_channels)		
print(X_test.shape)

#---- Normalizing the data per band (Do we want to normalize across years or within one year?)
minMaxVal_file = '.'.join(out_model_file.split('.')[0:-1])
minMaxVal_file = minMaxVal_file + '_minMax.txt'

if not os.path.exists(minMaxVal_file): 
  min_per, max_per = computingMinMax(X_train) #compute 98% min/max (per = 2) on bands
  save_minMaxVal(minMaxVal_file, min_per, max_per)
else:
  min_per, max_per = read_minMaxVal(minMaxVal_file)

print(min_per, max_per)

X_train =  normalizingData(X_train, min_per, max_per)
X_test =  normalizingData(X_test, min_per, max_per)

print(X_train) #verify normalization worked as intended






#Define Keras Model Architectures

https://github.com/charlotte-pel/temporalCNN/


In [None]:

""" 
	Defining keras architecre, and training the models
"""

import sys, os
import numpy as np
import time

import keras
from keras import layers
from keras import optimizers
from keras.regularizers import l2
from keras.layers import Input, Dense, Activation, BatchNormalization, Dropout, Flatten, Lambda, SpatialDropout1D, Concatenate
from keras.layers import Conv1D, Conv2D, AveragePooling1D, MaxPooling1D, GlobalMaxPooling1D, GlobalAveragePooling1D
from keras.callbacks import Callback, ModelCheckpoint, History, EarlyStopping
from keras.models import Model, load_model
from keras.optimizers import *
from keras.utils.np_utils import to_categorical
from keras import backend as K



#-----------------------------------------------------------------------
#---------------------- Modules
#-----------------------------------------------------------------------

#-----------------------------------------------------------------------		
def conv_bn(X, **conv_params):	
	nbunits = conv_params["nbunits"];
	kernel_size = conv_params["kernel_size"];

	strides = conv_params.setdefault("strides", 1)
	padding = conv_params.setdefault("padding", "same")
	kernel_regularizer = conv_params.setdefault("kernel_regularizer", l2(1.e-6))
	kernel_initializer = conv_params.setdefault("kernel_initializer", "he_normal")

	Z = Conv1D(nbunits, kernel_size=kernel_size, 
			strides = strides, padding=padding,
			kernel_initializer=kernel_initializer,
			kernel_regularizer=kernel_regularizer)(X)

	return BatchNormalization(axis=-1)(Z) #-- CHANNEL_AXIS (-1)

#-----------------------------------------------------------------------		
def conv_bn_relu(X, **conv_params):
	Znorm = conv_bn(X, **conv_params)
	return Activation('relu')(Znorm)
	
#-----------------------------------------------------------------------		
def conv_bn_relu_drop(X, **conv_params):	
	dropout_rate = conv_params.setdefault("dropout_rate", 0.5)
	A = conv_bn_relu(X, **conv_params)
	return Dropout(dropout_rate)(A)

#-----------------------------------------------------------------------		
def conv_bn_relu_spadrop(X, **conv_params):	
	dropout_rate = conv_params.setdefault("dropout_rate", 0.5)
	A = conv_bn_relu(X, **conv_params)
	return SpatialDropout1D(dropout_rate)(A)

#-----------------------------------------------------------------------		
def conv2d_bn(X, **conv_params):	
	nbunits = conv_params["nbunits"];
	kernel_size = conv_params["kernel_size"];

	strides = conv_params.setdefault("strides", 1)
	padding = conv_params.setdefault("padding", "same")
	kernel_regularizer = conv_params.setdefault("kernel_regularizer", l2(1.e-6))
	kernel_initializer = conv_params.setdefault("kernel_initializer", "he_normal")

	Z = Conv2D(nbunits, kernel_size=kernel_size, 
			strides = strides, padding=padding,
			kernel_initializer=kernel_initializer,
			kernel_regularizer=kernel_regularizer)(X)

	return BatchNormalization(axis=-1)(Z) #-- CHANNEL_AXIS (-1)

#-----------------------------------------------------------------------		
def conv2d_bn_relu(X, **conv_params):
	Znorm = conv2d_bn(X, **conv_params)
	return Activation('relu')(Znorm)
	
#-----------------------------------------------------------------------		
def conv2d_bn_relu_drop(X, **conv_params):	
	dropout_rate = conv_params.setdefault("dropout_rate", 0.5)
	A = conv2d_bn_relu(X, **conv_params)
	return Dropout(dropout_rate)(A)

#-----------------------------------------------------------------------		
def conv2d_bn_relu_spadrop(X, **conv_params):	
	dropout_rate = conv_params.setdefault("dropout_rate", 0.5)
	A = conv2d_bn_relu(X, **conv_params)
	return SpatialDropout1D(dropout_rate)(A)

	
#-----------------------------------------------------------------------		
def relu_drop(X, **conv_params):	
	dropout_rate = conv_params.setdefault("dropout_rate", 0.5)
	A = Activation('relu')(X)
	return Dropout(dropout_rate)(A)

#-----------------------------------------------------------------------		
def fc_bn(X, **fc_params):
	nbunits = fc_params["nbunits"];
	
	kernel_regularizer = fc_params.setdefault("kernel_regularizer", l2(1.e-6))
	kernel_initializer = fc_params.setdefault("kernel_initializer", "he_normal")
		
	Z = Dense(nbunits, kernel_initializer=kernel_initializer, kernel_regularizer=kernel_regularizer)(X)
	return BatchNormalization(axis=-1)(Z) #-- CHANNEL_AXIS (-1)
	
#-----------------------------------------------------------------------		
def fc_bn_relu(X, **fc_params):	
	Znorm = fc_bn(X, **fc_params)
	return Activation('relu')(Znorm)

#-----------------------------------------------------------------------		
def fc_bn_relu_drop(X, **fc_params):
	dropout_rate = fc_params.setdefault("dropout_rate", 0.5)
	A = fc_bn_relu(X, **fc_params)
	return Dropout(dropout_rate)(A)

#-----------------------------------------------------------------------		
def softmax(X, nbclasses, **params):
	kernel_regularizer = params.setdefault("kernel_regularizer", l2(1.e-6))
	kernel_initializer = params.setdefault("kernel_initializer", "glorot_uniform")
	return Dense(nbclasses, activation='softmax', 
			kernel_initializer=kernel_initializer,
			kernel_regularizer=kernel_regularizer)(X)

#-----------------------------------------------------------------------		
def getNoClasses(model_path):
	model = load_model(model_path)
	last_weight = model.get_weights()[-1]
	nclasses = last_weight.shape[0] #--- get size of the bias in the Softmax
	return nclasses

#-----------------------------------------------------------------------
#---------------------- Training models
#-----------------------------------------------------------------------
#-----------------------------------------------------------------------
def trainTestModel(model, X_train, Y_train_onehot, X_test, Y_test_onehot, out_model_file, **train_params):
	#---- variables
	n_epochs = train_params.setdefault("n_epochs", 20)
	batch_size = train_params.setdefault("batch_size", 32)
	
	lr = train_params.setdefault("lr", 0.001)
	beta_1 = train_params.setdefault("beta_1", 0.9)
	beta_2 = train_params.setdefault("beta_2", 0.999)
	decay = train_params.setdefault("decay", 0.0)

	#---- optimizer
	opt = tf.keras.optimizers.Adam(lr=lr, beta_1=beta_1, beta_2=beta_2, decay=decay)
	model.compile(optimizer = opt, loss = "categorical_crossentropy",
			metrics = ["accuracy"])
	
	#---- monitoring the minimum loss
	checkpoint = ModelCheckpoint(out_model_file, monitor='loss',
			verbose=0, save_best_only=True, mode='min')
	callback_list = [checkpoint]
		
	start_train_time = time.time()
	hist = model.fit(x = X_train, y = Y_train_onehot, epochs = n_epochs, 
		batch_size = batch_size, shuffle=True,
		validation_data=(X_test, Y_test_onehot),
		verbose=1, callbacks=callback_list)
	train_time = round(time.time()-start_train_time, 2)
		
	#-- download the best model
	del model	
	model = load_model(out_model_file)
	start_test_time = time.time()
	test_loss, test_acc = model.evaluate(x=X_test, y=Y_test_onehot, 
		batch_size = 128, verbose=0)
	test_time = round(time.time()-start_test_time, 2)
	
	return test_acc, np.min(hist.history['loss']), model, hist.history, train_time, test_time

#-----------------------------------------------------------------------
def trainTestModel_EarlyAbandon(model, X_train, Y_train_onehot, X_test, Y_test_onehot, out_model_file, **train_params):
	#---- variables
	n_epochs = train_params.setdefault("n_epochs", 20)
	batch_size = train_params.setdefault("batch_size", 32)
	
	lr = train_params.setdefault("lr", 0.001)
	beta_1 = train_params.setdefault("beta_1", 0.9)
	beta_2 = train_params.setdefault("beta_2", 0.999)
	decay = train_params.setdefault("decay", 0.0)

	#---- optimizer
	opt = tf.keras.optimizers.Adam(lr=lr, beta_1=beta_1, beta_2=beta_2, decay=decay)
	model.compile(optimizer = opt, loss = "categorical_crossentropy",
			metrics = ["accuracy"])
	
	#---- monitoring the minimum loss
	checkpoint = ModelCheckpoint(out_model_file, monitor='loss',
			verbose=0, save_best_only=True, mode='min')
	#early_stop = EarlyStopping(monitor='loss', min_delta=0, patience=0, verbose=0, mode='auto')
  #callback_list = [checkpoint, early_stop]
	callback_list = [checkpoint]
			
	start_train_time = time.time()
	hist = model.fit(x = X_train, y = Y_train_onehot, epochs = n_epochs, 
		batch_size = batch_size, shuffle=True,
		validation_data=(X_test, Y_test_onehot),
		verbose=1, callbacks=callback_list)
	train_time = round(time.time()-start_train_time, 2)
		
	#-- download the best model
	del model	
	model = load_model(out_model_file)
	start_test_time = time.time()
	test_loss, test_acc = model.evaluate(x=X_test, y=Y_test_onehot, 
		batch_size = 128, verbose=0)
	test_time = round(time.time()-start_test_time, 2)
	
	return test_acc, np.min(hist.history['loss']), model, hist.history, train_time, test_time
		
#-----------------------------------------------------------------------
def trainValTestModel(model, X_train, Y_train_onehot, X_val, Y_val_onehot, X_test, Y_test_onehot, out_model_file, **train_params):
	#---- variables
	n_epochs = train_params.setdefault("n_epochs", 20)
	batch_size = train_params.setdefault("batch_size", 32)
	
	lr = train_params.setdefault("lr", 0.001)
	beta_1 = train_params.setdefault("beta_1", 0.9)
	beta_2 = train_params.setdefault("beta_2", 0.999)
	decay = train_params.setdefault("decay", 0.0)

	#---- optimizer
	opt = tf.keras.optimizers.Adam(lr=lr, beta_1=beta_1, beta_2=beta_2, decay=decay)
	model.compile(optimizer = opt, loss = "categorical_crossentropy",
			metrics = ["accuracy"])
	
	#---- monitoring the minimum validation loss
	checkpoint = ModelCheckpoint(out_model_file, monitor='val_loss',
			verbose=0, save_best_only=True, mode='min')
	callback_list = [checkpoint]
		
	start_train_time = time.time()
	hist = model.fit(x = X_train, y = Y_train_onehot, epochs = n_epochs, 
		batch_size = batch_size, shuffle=True,
		validation_data=(X_val, Y_val_onehot),
		verbose=1, callbacks=callback_list)
	train_time = round(time.time()-start_train_time, 2)
		
	#-- download the best model
	del model	
	model = load_model(out_model_file)
	start_test_time = time.time()
	test_loss, test_acc = model.evaluate(x=X_test, y=Y_test_onehot, 
		batch_size = 128, verbose=0)
	test_time = round(time.time()-start_test_time, 2)
	
	return test_acc, np.min(hist.history['val_loss']), model, hist.history, train_time, test_time
	
#-----------------------------------------------------------------------
def trainValTestModel_EarlyAbandon(model, X_train, Y_train_onehot, X_val, Y_val_onehot, X_test, Y_test_onehot, out_model_file, **train_params):
	#---- variables
	n_epochs = train_params.setdefault("n_epochs", 20)
	batch_size = train_params.setdefault("batch_size", 32)
	
	lr = train_params.setdefault("lr", 0.001)
	beta_1 = train_params.setdefault("beta_1", 0.9)
	beta_2 = train_params.setdefault("beta_2", 0.999)
	decay = train_params.setdefault("decay", 0.0)

	#---- optimizer
	opt = tf.keras.optimizers.Adam(lr=lr, beta_1=beta_1, beta_2=beta_2, decay=decay)
	model.compile(optimizer = opt, loss = "categorical_crossentropy",
			metrics = ["accuracy"])
	
	#---- monitoring the minimum validation loss
	checkpoint = ModelCheckpoint(out_model_file, monitor='val_loss',
			verbose=0, save_best_only=True, mode='min')
	early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=0, verbose=0, mode='auto')
	callback_list = [checkpoint, early_stop]
		
	start_train_time = time.time()
	hist = model.fit(x = X_train, y = Y_train_onehot, epochs = n_epochs, 
		batch_size = batch_size, shuffle=True,
		validation_data=(X_val, Y_val_onehot),
		verbose=1, callbacks=callback_list)
	train_time = round(time.time()-start_train_time, 2)
		
	#-- download the best model
	del model	
	model = load_model(out_model_file)
	start_test_time = time.time()
	test_loss, test_acc = model.evaluate(x=X_test, y=Y_test_onehot, 
		batch_size = 128, verbose=0)
	test_time = round(time.time()-start_test_time, 2)
	
	return test_acc, np.min(hist.history['val_loss']), model, hist.history, train_time, test_time


In [None]:

""" 
	Defining keras architecture.
	4.4. How big and deep model for our data?
	4.4.1. Width influence or the bias-variance trade-off
"""

import sys, os

import keras
from keras import layers
from keras.layers import Flatten
from keras import backend as K

#-----------------------------------------------------------------------
#---------------------- ARCHITECTURES
#------------------------------------------------------------------------	

#-----------------------------------------------------------------------		
def Archi_3CONV16_1FC256(X, nbclasses):
	
	#-- get the input sizes
	m, L, depth = X.shape
	input_shape = (L,depth)
	
	#-- parameters of the architecture
	l2_rate = 1.e-6
	dropout_rate = 0.5
	nb_conv = 3
	nb_fc= 1
	nbunits_conv = 16 #-- will be double
	nbunits_fc = 256 #-- will be double
	
	# Define the input placeholder.
	X_input = Input(input_shape)
		
	#-- nb_conv CONV layers
	X = X_input
	for add in range(nb_conv):
		X = conv_bn_relu_drop(X, nbunits=nbunits_conv, kernel_size=5, kernel_regularizer=l2(l2_rate), dropout_rate=dropout_rate)
	#-- Flatten + 	1 FC layers
	X = Flatten()(X)
	for add in range(nb_fc):	
		X = fc_bn_relu_drop(X, nbunits=nbunits_fc, kernel_regularizer=l2(l2_rate), dropout_rate=dropout_rate)
		
	#-- SOFTMAX layer
	out = softmax(X, nbclasses, kernel_regularizer=l2(l2_rate))
		
	# Create model.
	return Model(inputs = X_input, outputs = out, name='Archi_3CONV16_1FC256')	
	
	
#-----------------------------------------------------------------------		
def Archi_3CONV32_1FC256(X, nbclasses):
	
	#-- get the input sizes
	m, L, depth = X.shape
	input_shape = (L,depth)
	
	#-- parameters of the architecture
	l2_rate = 1.e-6
	dropout_rate = 0.5
	nb_conv = 3
	nb_fc= 1
	nbunits_conv = 32 #-- will be double
	nbunits_fc = 256 #-- will be double
	
	# Define the input placeholder.
	X_input = Input(input_shape)
		
	#-- nb_conv CONV layers
	X = X_input
	for add in range(nb_conv):
		X = conv_bn_relu_drop(X, nbunits=nbunits_conv, kernel_size=5, kernel_regularizer=l2(l2_rate), dropout_rate=dropout_rate)
	#-- Flatten + 	1 FC layers
	X = Flatten()(X)
	for add in range(nb_fc):	
		X = fc_bn_relu_drop(X, nbunits=nbunits_fc, kernel_regularizer=l2(l2_rate), dropout_rate=dropout_rate)
		
	#-- SOFTMAX layer
	out = softmax(X, nbclasses, kernel_regularizer=l2(l2_rate))
		
	# Create model.
	return Model(inputs = X_input, outputs = out, name='Archi_3CONV32_1FC256')	


#-----------------------------------------------------------------------		
def Archi_3CONV64_1FC256(X, nbclasses):
	
	#-- get the input sizes
	m, L, depth = X.shape
	input_shape = (L,depth)
	
	#-- parameters of the architecture
	l2_rate = 1.e-6
	dropout_rate = 0.5
	nb_conv = 3
	nb_fc= 1
	nbunits_conv = 64 #-- will be double
	nbunits_fc = 256 #-- will be double
	
	# Define the input placeholder.
	X_input = Input(input_shape)
		
	#-- nb_conv CONV layers
	X = X_input
	for add in range(nb_conv):
		X = conv_bn_relu_drop(X, nbunits=nbunits_conv, kernel_size=5, kernel_regularizer=l2(l2_rate), dropout_rate=dropout_rate)
	#-- Flatten + 	1 FC layers
	X = Flatten()(X)
	for add in range(nb_fc):	
		X = fc_bn_relu_drop(X, nbunits=nbunits_fc, kernel_regularizer=l2(l2_rate), dropout_rate=dropout_rate)
		
	#-- SOFTMAX layer
	out = softmax(X, nbclasses, kernel_regularizer=l2(l2_rate))
		
	# Create model.
	return Model(inputs = X_input, outputs = out, name='Archi_3CONV64_1FC256')	


#-----------------------------------------------------------------------		
def Archi_3CONV128_1FC256(X, nbclasses):
	
	#-- get the input sizes
	m, L, depth = X.shape
	input_shape = (L,depth)
	
	#-- parameters of the architecture
	l2_rate = 1.e-6
	dropout_rate = 0.5
	nb_conv = 3
	nb_fc= 1
	nbunits_conv = 128 #-- will be double
	nbunits_fc = 256 #-- will be double
	
	# Define the input placeholder.
	X_input = Input(input_shape)
		
	#-- nb_conv CONV layers
	X = X_input
	for add in range(nb_conv):
		X = conv_bn_relu_drop(X, nbunits=nbunits_conv, kernel_size=5, kernel_regularizer=l2(l2_rate), dropout_rate=dropout_rate)
	#-- Flatten + 	1 FC layers
	X = Flatten()(X)
	for add in range(nb_fc):	
		X = fc_bn_relu_drop(X, nbunits=nbunits_fc, kernel_regularizer=l2(l2_rate), dropout_rate=dropout_rate)
		
	#-- SOFTMAX layer
	out = softmax(X, nbclasses, kernel_regularizer=l2(l2_rate))
		
	# Create model.
	return Model(inputs = X_input, outputs = out, name='Archi_3CONV128_1FC256')	


#-----------------------------------------------------------------------		
def Archi_3CONV256_1FC256(X, nbclasses):
	
	#-- get the input sizes
	m, L, depth = X.shape
	input_shape = (L,depth)
	
	#-- parameters of the architecture
	l2_rate = 1.e-6
	dropout_rate = 0.5
	nb_conv = 3
	nb_fc= 1
	nbunits_conv = 256 #-- will be double
	nbunits_fc = 256 #-- will be double
	
	# Define the input placeholder.
	X_input = Input(input_shape)
		
	#-- nb_conv CONV layers
	X = X_input
	for add in range(nb_conv):
		X = conv_bn_relu_drop(X, nbunits=nbunits_conv, kernel_size=5, kernel_regularizer=l2(l2_rate), dropout_rate=dropout_rate)
	#-- Flatten + 	1 FC layers
	X = Flatten()(X)
	for add in range(nb_fc):	
		X = fc_bn_relu_drop(X, nbunits=nbunits_fc, kernel_regularizer=l2(l2_rate), dropout_rate=dropout_rate)
		
	#-- SOFTMAX layer
	out = softmax(X, nbclasses, kernel_regularizer=l2(l2_rate))
		
	# Create model.
	return Model(inputs = X_input, outputs = out, name='Archi_3CONV256_1FC256')	


#-----------------------------------------------------------------------		
def Archi_3CONV512_1FC256(X, nbclasses):
	
	#-- get the input sizes
	m, L, depth = X.shape
	input_shape = (L,depth)
	
	#-- parameters of the architecture
	l2_rate = 1.e-6
	dropout_rate = 0.5
	nb_conv = 3
	nb_fc= 1
	nbunits_conv = 512 #-- will be double
	nbunits_fc = 256 #-- will be double
	
	# Define the input placeholder.
	X_input = Input(input_shape)
		
	#-- nb_conv CONV layers
	X = X_input
	for add in range(nb_conv):
		X = conv_bn_relu_drop(X, nbunits=nbunits_conv, kernel_size=5, kernel_regularizer=l2(l2_rate), dropout_rate=dropout_rate)
	#-- Flatten + 	1 FC layers
	X = Flatten()(X)
	for add in range(nb_fc):	
		X = fc_bn_relu_drop(X, nbunits=nbunits_fc, kernel_regularizer=l2(l2_rate), dropout_rate=dropout_rate)
		
	#-- SOFTMAX layer
	out = softmax(X, nbclasses, kernel_regularizer=l2(l2_rate))
		
	# Create model.
	return Model(inputs = X_input, outputs = out, name='Archi_3CONV512_1FC256')	


#-----------------------------------------------------------------------		
def Archi_3CONV1024_1FC256(X, nbclasses):
	
	#-- get the input sizes
	m, L, depth = X.shape
	input_shape = (L,depth)
	
	#-- parameters of the architecture
	l2_rate = 1.e-6
	dropout_rate = 0.5
	nb_conv = 3
	nb_fc= 1
	nbunits_conv = 1024 #-- will be double
	nbunits_fc = 256 #-- will be double
	
	# Define the input placeholder.
	X_input = Input(input_shape)
		
	#-- nb_conv CONV layers
	X = X_input
	for add in range(nb_conv):
		X = conv_bn_relu_drop(X, nbunits=nbunits_conv, kernel_size=5, kernel_regularizer=l2(l2_rate), dropout_rate=dropout_rate)
	#-- Flatten + 	1 FC layers
	X = Flatten()(X)
	for add in range(nb_fc):	
		X = fc_bn_relu_drop(X, nbunits=nbunits_fc, kernel_regularizer=l2(l2_rate), dropout_rate=dropout_rate)
		
	#-- SOFTMAX layer
	out = softmax(X, nbclasses, kernel_regularizer=l2(l2_rate))
		
	# Create model.
	return Model(inputs = X_input, outputs = out, name='Archi_3CONV1024_1FC256')	


#--------------------- Switcher for running the architectures
def runArchi(noarchi, *args):
	#---- variables
	n_epochs = 20
	batch_size = 32
	
	switcher = {		
		0: Archi_3CONV16_1FC256,
		1: Archi_3CONV32_1FC256,
		2: Archi_3CONV64_1FC256,
		3: Archi_3CONV128_1FC256,
		3: Archi_3CONV256_1FC256,
		4: Archi_3CONV512_1FC256,
		5: Archi_3CONV1024_1FC256,
	}
	func = switcher.get(noarchi, lambda: 0)
	model = func(args[0], args[1].shape[1])
	
	if len(args)==5:
		return trainTestModel_EarlyAbandon(model, *args, n_epochs=n_epochs, batch_size=batch_size)
	elif len(args)==7:
		return trainValTestModel_EarlyAbandon(model, *args, n_epochs=n_epochs, batch_size=batch_size)


In [None]:

#---- Extracting a validation set (if necesary)
if val_rate > 0:
  X_train, y_train, X_val, y_val = extractValSet(X_train, polygon_ids_train, y_train, val_rate)
  #--- Computing the one-hot encoding (recomputing it for train)
  y_train_one_hot = to_categorical(y_train, n_classes)
  y_val_one_hot = to_categorical(y_val, n_classes)

if not os.path.isfile(res_file):
  if val_rate==0:
    res_mat[0,norun], res_mat[1,norun], model, model_hist, res_mat[2,norun], res_mat[3,norun] = runArchi(noarchi, X_train, y_train_one_hot, X_test, y_test_one_hot, out_model_file)
  else:
    res_mat[0,norun], res_mat[1,norun], model, model_hist, res_mat[2,norun], res_mat[3,norun] = runArchi(noarchi, X_train, y_train_one_hot, X_val, y_val_one_hot, X_test, y_test_one_hot, out_model_file)






In [None]:

train_test_path = "/content/drive/My\ Drive/Invasives\ Research\ UMN/Remote\ Sensing\ Master/Leafy\ Spurge\ Demography/temporalCNN-master/example"

results_path = "/content/drive/My\ Drive/Invasives\ Research\ UMN/Remote\ Sensing\ Master/Leafy\ Spurge\ Demography/results"

!python3 '/content/drive/My Drive/Invasives Research UMN/Remote Sensing Master/Leafy Spurge Demography/temporalCNN-master/run_archi.py' '--sits_path' {train_test_path} '--res_path' {results_path} '--noarchi' 2

