# Google Earth Engine Component

## Initialize

In [2]:
#Import required libraries
import ee
import geemap
import math

import numpy as np
import matplotlib.pyplot as plt

import itertools
# from varname import nameof

import pandas as pd
from time import time

In [3]:
#Initialize Google Earth Engine
# ee.Authenticate() #just needed the 1st time
ee.Initialize()

In [4]:
# Check if geemap is working as intended - plot the leaflet map
Map = geemap.Map()

In [5]:
Map

Map(center=[20, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=HBox(children=(Togg…

## Load Feature Collection - Shapefiles

In [6]:
#Data loads

#loads feature collection data from Google Earth Engine - We can also upload other feature collections
counties = ee.FeatureCollection("TIGER/2018/Counties")

#filter LA County
la_county = counties.filter(ee.Filter.eq('NAME', 'Los Angeles'))
sc_county = counties.filter(ee.Filter.eq('NAME', 'Santa Clara'))

In [7]:
la_county, sc_county

(<ee.featurecollection.FeatureCollection at 0x7f95c22655b0>,
 <ee.featurecollection.FeatureCollection at 0x7f95c2265250>)

## Load NAIP Imagery

In [8]:
def apply_3bands(image, band):
    i_8_bit = image.select(band).toUint8()
    square = ee.Kernel.square(**{'radius': 4})
    entropy = i_8_bit.entropy(square)
    glcm = i_8_bit.glcmTexture(**{'size': 4})
    contrast = glcm.select(str(band)+'_contrast')
    
    # Create a list of weights for a 9x9 kernel.
    list = [1, 1, 1, 1, 1, 1, 1, 1, 1]
    # The center of the kernel is zero.
    centerList = [1, 1, 1, 1, 0, 1, 1, 1, 1]
    # Assemble a list of lists: the 9x9 kernel weights as a 2-D matrix.
    lists = [list, list, list, list, centerList, list, list, list, list]
    # Create the kernel from the weights.
    # Non-zero weights represent the spatial neighborhood.
    kernel = ee.Kernel.fixed(9, 9, lists, -4, -4, False)
    neighs = i_8_bit.neighborhoodToBands(kernel)
    gearys = i_8_bit.subtract(neighs).pow(2).reduce(ee.Reducer.sum()).divide(math.pow(9, 2))
    image = image.addBands(entropy.rename(str(band)+'_Entropy')).addBands(contrast.rename(str(band)+'_Contrast')).addBands(gearys.rename(str(band)+'_Gearys'))   
    return image

def add_neighborhood_bands(image):
    bands = ['R', 'G', 'B', 'N']
    for band in bands:
        image = apply_3bands(image, band)
    return image
    
def add_NDVI(image):
    image = image.addBands(image.normalizedDifference(['N','R']).rename('NDVI'))
    return image
     

In [9]:
def get_images(param_dict):
    source_image_collection = params['source_image_collection']
    years = param_dict['years']
    counties = param_dict['counties']

    image_names = []
    images = []

    combos = list(itertools.product(years, counties.keys()))
    for i in combos:
        year = str(i[0])
        county = i[1]

        image_name = str(i[0])+'_'+i[1]
        image_names.append(image_name)

        image = ee.ImageCollection(source_image_collection)\
                                .filterDate(f'{year}-01-01', f'{year}-12-31')\
                                .select(['R','G','B','N'])\
                                .median().clip(counties[county])
        images.append(image)
        images_with_3band = list(map(add_neighborhood_bands, images))
        images_with_NDVI = list(map(add_NDVI, images_with_3band))
    return dict(zip(image_names, images_with_NDVI))

    
    

In [10]:
params = {
        'source_image_collection' : 'USDA/NAIP/DOQQ',
        'years' : [2010,2012,2014,2016,2018,2020],
        'counties': {'la_county': la_county, 'sc_county': sc_county}
         }

images = get_images(params)
images

{'2010_la_county': <ee.image.Image at 0x7f95c23a1cd0>,
 '2010_sc_county': <ee.image.Image at 0x7f95c23a1eb0>,
 '2012_la_county': <ee.image.Image at 0x7f95c23a40d0>,
 '2012_sc_county': <ee.image.Image at 0x7f95c23a42b0>,
 '2014_la_county': <ee.image.Image at 0x7f95c23a4490>,
 '2014_sc_county': <ee.image.Image at 0x7f95c23a4670>,
 '2016_la_county': <ee.image.Image at 0x7f95c23a4850>,
 '2016_sc_county': <ee.image.Image at 0x7f95c23a4a30>,
 '2018_la_county': <ee.image.Image at 0x7f95c23a4c10>,
 '2018_sc_county': <ee.image.Image at 0x7f95c23a4df0>,
 '2020_la_county': <ee.image.Image at 0x7f95c23a4fd0>,
 '2020_sc_county': <ee.image.Image at 0x7f95c23a71f0>}

In [11]:
#see what bands each image has:
images['2020_la_county'].bandNames().getInfo()

['R',
 'G',
 'B',
 'N',
 'R_Entropy',
 'R_Contrast',
 'R_Gearys',
 'G_Entropy',
 'G_Contrast',
 'G_Gearys',
 'B_Entropy',
 'B_Contrast',
 'B_Gearys',
 'N_Entropy',
 'N_Contrast',
 'N_Gearys',
 'NDVI']

In [12]:
#test to see if NAIP plot looks correct
Map.addLayer(images['2020_la_county'], {}, 'NAIP Test')

In [13]:
poly = [[-118.180413,33.912948],[-118.193457,33.875639],[-118.285876,33.872960],[-118.280523,33.928753]]

In [14]:
p = ee.Geometry.Polygon(poly)

In [15]:
im = images['2020_la_county']

In [16]:
im.clip(p)

<ee.image.Image at 0x7f95c22650a0>

In [17]:
Map.addLayer(p, {}, 'POLYGON TEST')

In [18]:
Map

Map(bottom=754.0, center=[20, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=HBox(…

In [19]:
#test to see if NDVI plot looks correct 
source_image = images['2020_sc_county']

test_ndvi = source_image.normalizedDifference(['N', 'R']).rename('2020_NDVI')
test_ndvi

<ee.image.Image at 0x7f95c21fd490>

In [20]:
Map.addLayer(test_ndvi,{min : -1, max : 1, 'palette' : ['blue', 'white', 'green']}, 'NDVI Test')
Map

Map(bottom=754.0, center=[20, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=HBox(…

## Load Labeled Data

In [21]:
PROJECT_DIR = 'projects/california-lawn-detection/assets/'

water = ee.FeatureCollection(f"{PROJECT_DIR}water_torrance_0610")
vegetation_trees = ee.FeatureCollection(f"{PROJECT_DIR}trees_torrance")
vegetation_grass = ee.FeatureCollection(f"{PROJECT_DIR}grass_torrance").limit(400)
turf_1 = ee.FeatureCollection(f"{PROJECT_DIR}turf_torrance1")
turf_2 = ee.FeatureCollection(f"{PROJECT_DIR}turf_torrance2")
pv = ee.FeatureCollection(f"{PROJECT_DIR}pv_torrance")
impervious_1 = ee.FeatureCollection(f"{PROJECT_DIR}impervious_torrance1").limit(40)
impervious_2 = ee.FeatureCollection(f"{PROJECT_DIR}impervious_torrance2").limit(40)
soil = ee.FeatureCollection(f"{PROJECT_DIR}soil_torrance").limit(40)

turf = turf_1.merge(turf_2)
impervious= impervious_1.merge(impervious_2)

LABELED_SET = water.merge(vegetation_trees)\
                   .merge(vegetation_grass)\
                   .merge(turf)\
                   .merge(impervious)\
                   .merge(soil)
    
Map.addLayer(LABELED_SET, {}, 'labeled_set')

In [22]:
Map

Map(bottom=754.0, center=[20, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=HBox(…

## Build Training Set

In [23]:
training_image_params = {
        'source_image_collection' : 'USDA/NAIP/DOQQ',
        'years' : [2020],
        'counties': {'lacounty': la_county}
         }

TRAINING_IMAGE = get_images(training_image_params)['2020_lacounty']

In [24]:
# Overlay the points on the imagery to get training.
LABEL = 'landcover'
BANDS = ['R', 'G', 'B', 'N', 'NDVI',
         'R_Entropy',
         'R_Contrast',
         'R_Gearys',
         'G_Entropy',
         'G_Contrast',
         'G_Gearys',
         'B_Entropy',
         'B_Contrast',
         'B_Gearys',
         'N_Entropy',
         'N_Contrast', 
         'N_Gearys']

training_set = TRAINING_IMAGE.select(BANDS).sampleRegions(**{
  'collection': LABELED_SET,
  'properties': [LABEL],
  'scale': 1
})

In [25]:
set(BANDS)==set(TRAINING_IMAGE.bandNames().getInfo())

True

In [26]:
def training_area(image, training_class):
    
    area = image.reduceRegion(
           reducer = ee.Reducer.count(), 
           geometry = training_class.geometry(), 
           scale = 2, 
           maxPixels = 1e13
                )

    return(area.getInfo().get('B'))

In [27]:
def training_polygons(training_class):
    return(training_class.aggregate_count('label').getInfo())

In [28]:
# training information
training_classes = [water,
                         vegetation_trees,
                         vegetation_grass,
                         turf,
                         pv,
                         impervious,
                         soil]

class_names = ['water',
                         'vegetation_trees',
                         'vegetation_grass',
                         'turf',
                         'pv',
                         'impervious',
                         'soil']

try:
    for i in range(len(training_classes)):
        area_i = training_area(TRAINING_IMAGE, training_classes[i])
        polygons_i = training_polygons(training_classes[i])
        print(class_names[i],"pixels:", area_i ,", polygons", polygons_i)
except:
    print('ERROR. POSSIBLE MISMATCH IN CLASSES LIST AND NAMES LIST SIZES')



water pixels: 486 , polygons 199
vegetation_trees pixels: 17125 , polygons 772
vegetation_grass pixels: 23681 , polygons 400
turf pixels: 857 , polygons 215
pv pixels: 1723 , polygons 344
impervious pixels: 20360 , polygons 80
soil pixels: 8761 , polygons 40


## Separate Training Data into Train/Test

In [29]:
#Split Training and Test Set Randomly - there might be a better way to do this
print("Labeled Set Size in Pixels", training_set.aggregate_count('R').getInfo())
sample = training_set.randomColumn()
trainingSample = sample.filter('random <= 0.8')
validationSample = sample.filter('random > 0.8')
print("Training Set Size in Pixels", trainingSample.aggregate_count('R').getInfo())
print("Test Set Size in Pixels", validationSample.aggregate_count('R').getInfo())

Labeled Set Size in Pixels 284710
Training Set Size in Pixels 227871
Test Set Size in Pixels 56839


## Machine Learning Model

In [30]:
clf = ee.Classifier.smileRandomForest(numberOfTrees = 100).train(trainingSample, LABEL, BANDS)
clf

<ee.Classifier at 0x7f95c23a7b20>

In [31]:
training_image_classified = TRAINING_IMAGE.select(BANDS).classify(clf)

In [32]:
legend_keys = ['water', 'vegetation_trees', 'vegetation_grass', 'turf','impervious','soil']
legend_colors = ['#0B6AEF', '#097407', '#0CE708', '#8C46D2' ,' #A1A8AF','#D47911']

Map.addLayer(training_image_classified, {'min': 1, 'max': 7, 'palette': legend_colors}, 'Classification')

In [33]:
Map

Map(bottom=754.0, center=[20, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=HBox(…

## Evaluation Metrics

### Train Accuracy

In [30]:
train_accuracy = trainingSample.classify(clf).errorMatrix('landcover', 'classification')
train_accuracy.getInfo()

[[0, 0, 0, 0, 0, 0, 0, 0],
 [0, 1473, 0, 0, 0, 0, 0, 0],
 [0, 0, 54688, 21, 0, 0, 0, 0],
 [0, 0, 147, 75601, 0, 0, 0, 2],
 [0, 0, 0, 0, 2637, 0, 1, 0],
 [0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 65382, 0],
 [0, 0, 0, 0, 0, 0, 1, 27918]]

In [32]:
train_accuracy.accuracy().getInfo()

0.9992451869698207

In [31]:
np.array(train_accuracy.getInfo()).sum()

227871

### Test Accuracy

In [33]:
test_accuracy = validationSample.classify(clf).errorMatrix('landcover', 'classification')

In [35]:
test_accuracy.accuracy().getInfo()

0.9786238322278717

In [34]:
np.array(test_accuracy.getInfo()).sum()

56839

## Binary Classification and Area Calculation

In [34]:
def area_calculation(image, class_number, shape,  pixel_scale = 20):
    
    areaImage = image.eq(class_number).multiply(ee.Image.pixelArea())

    area = areaImage.reduceRegion(
        reducer = ee.Reducer.sum(), 
        geometry = shape, 
        scale = pixel_scale, 
        maxPixels = 1e13)

    
    area_sq_m = area.getInfo().get('classification')
    
    area_sq_km = area_sq_m / 1e6
    
    return area_sq_km

In [63]:
Zipcode_list = ['90802','90732','90744','90704','91362','91316']

In [61]:
keys = {'ImageYear','polygon','water_area','vegetation_trees_area', 'vegetation_grass_area', 'turf_area', 'impervious_area','soil_area', 'total_area'}
dictionary = {}
for i in keys:
    dictionary[i] = []

imagery = training_image_classified

    
for i in Zipcode_list:
    start = time()
    polygon = la_county_income_zipcode.filter(ee.Filter.eq('ZipCode', i))

    dictionary['ImageYear'].append(nameof(imagery)) 
    dictionary['polygon'].append(i)
    
    water_area = area_calculation(imagery, 1, polygon, 20)
    dictionary['water_area'].append(water_area)
    
    vegetation_trees_area = area_calculation(imagery, 2, polygon, 20)
    dictionary['vegetation_trees_area'].append(vegetation_trees_area)
    
    vegetation_grass_area = area_calculation(imagery, 3, polygon, 20)
    dictionary['vegetation_grass_area'].append(vegetation_grass_area)
    
    turf_area = area_calculation(imagery, 4, polygon, 20)
    dictionary['turf_area'].append(turf_area)
    
    impervious_area = area_calculation(imagery, 6, polygon, 20)
    dictionary['impervious_area'].append(impervious_area)
    
    soil_area = area_calculation(imagery, 7, polygon, 20)
    dictionary['soil_area'].append(soil_area)

    total_area = water_area + vegetation_trees_area + vegetation_grass_area + turf_area + impervious_area + soil_area
    dictionary['total_area'].append(total_area)
    end = time()
    print(i, f'completed in {end-start} seconds.')

90802 completed in 10.099151134490967 seconds.
90732 completed in 8.45103120803833 seconds.
90744 completed in 8.424124956130981 seconds.
90704 completed in 98.49644899368286 seconds.
91362 completed in 21.4758038520813 seconds.
91316 completed in 49.545308113098145 seconds.


In [71]:
la_county_income_zipcode.filter(ee.Filter.eq('ZipCode', '90802'))

<ee.featurecollection.FeatureCollection at 0x7fa15d294820>

In [72]:
la_county

<ee.featurecollection.FeatureCollection at 0x7fa15d094430>

In [62]:
pd.DataFrame(dictionary)

Unnamed: 0,turf_area,vegetation_trees_area,polygon,water_area,impervious_area,total_area,soil_area,ImageYear,vegetation_grass_area
0,0.164438,0.647374,90802,0.0,12.688006,14.756617,0.760027,imagery,0.496773
1,0.159726,2.635687,90732,0.0,2.946216,8.231549,1.869716,imagery,0.620204
2,0.190158,2.017314,90744,0.0,15.849339,22.599279,3.527528,imagery,1.01494
3,9.193009,66.103575,90704,5.3337,35.695794,341.338173,138.000843,imagery,87.01125
4,0.023005,0.129558,91362,0.057541,1.716988,2.632023,0.202211,imagery,0.50272
5,0.763596,4.772787,91316,0.004292,7.122218,14.764068,0.461717,imagery,1.639458


In [45]:
# classified = training_image_classified

# water_area = area_calculation(classified, 1, la_county, 20)
# vegetation_trees_area = area_calculation(classified, 2, la_county, 20)
# vegetation_grass_area = area_calculation(classified, 3, la_county, 20)
# turf_area = area_calculation(classified, 4, la_county, 20)
# impervious_area = area_calculation(classified, 6, la_county, 20)
# soil_area = area_calculation(classified, 7, la_county, 20)

# total_area = water_area + vegetation_trees_area + vegetation_grass_area + turf_area + impervious_area + soil_area

# water_percentage = water_area / total_area
# vegetation_trees_percentage = vegetation_trees_area / total_area
# vegetation_grass_percentage = vegetation_grass_area / total_area
# turf_percentage = turf_area / total_area
# impervious_percentage = impervious_area / total_area
# soil_percentage = soil_area / total_area