# Google Earth Engine Panel Data Creation

## Initialize

In [4]:
!pip install geemap
#!pip install ee

Collecting geemap
  Downloading geemap-0.14.0-py2.py3-none-any.whl (2.0 MB)
[K     |████████████████████████████████| 2.0 MB 25.4 MB/s eta 0:00:01
[?25hCollecting geocoder
  Downloading geocoder-1.38.1-py2.py3-none-any.whl (98 kB)
[K     |████████████████████████████████| 98 kB 17.3 MB/s eta 0:00:01
[?25hCollecting xyzservices
  Downloading xyzservices-2022.6.0-py3-none-any.whl (36 kB)
Collecting ffmpeg-python
  Downloading ffmpeg_python-0.2.0-py3-none-any.whl (25 kB)
Collecting mapclassify>=2.4.0
  Downloading mapclassify-2.4.3-py3-none-any.whl (38 kB)
Collecting pyshp>=2.1.3
  Downloading pyshp-2.3.0-py2.py3-none-any.whl (46 kB)
[K     |████████████████████████████████| 46 kB 9.1 MB/s  eta 0:00:01
[?25hCollecting sankee
  Downloading sankee-0.0.7.tar.gz (29 kB)
Collecting folium>=0.11.0
  Downloading folium-0.12.1.post1-py2.py3-none-any.whl (95 kB)
[K     |████████████████████████████████| 95 kB 7.9 MB/s  eta 0:00:01
[?25hCollecting python-box
  Downloading python_box-6.0.2-c

Collecting pyasn1<0.5.0,>=0.4.6
  Downloading pyasn1-0.4.8-py2.py3-none-any.whl (77 kB)
[K     |████████████████████████████████| 77 kB 13.5 MB/s eta 0:00:01
Collecting whitebox
  Downloading whitebox-2.1.2-py2.py3-none-any.whl (75 kB)
[K     |████████████████████████████████| 75 kB 10.2 MB/s eta 0:00:01
Collecting ratelim
  Downloading ratelim-0.1.6-py2.py3-none-any.whl (4.0 kB)
Collecting google-cloud-core<3.0dev,>=2.3.0
  Downloading google_cloud_core-2.3.1-py2.py3-none-any.whl (29 kB)
Collecting google-resumable-media>=2.3.2
  Downloading google_resumable_media-2.3.3-py2.py3-none-any.whl (76 kB)
[K     |████████████████████████████████| 76 kB 10.6 MB/s eta 0:00:01
[?25hCollecting google-crc32c<2.0dev,>=1.0
  Downloading google_crc32c-1.3.0-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (37 kB)


Collecting plotly>=5.2.2
  Downloading plotly-5.9.0-py2.py3-none-any.whl (15.2 MB)
[K     |████████████████████████████████| 15.2 MB 77.3 MB/s eta 0:00:01
[?25hCollecting tenacity>=6.2.0
  Downloading tenacity-8.0.1-py3-none-any.whl (24 kB)
Building wheels for collected packages: earthengine-api, ee-extra, gdown, httplib2shim, pycrs, sankee
  Building wheel for earthengine-api (setup.py) ... [?25ldone
[?25h  Created wheel for earthengine-api: filename=earthengine_api-0.1.316-py3-none-any.whl size=270124 sha256=6a153ec4faee3c0e90040840342d0838443670a2af623fd98943019d3a59dd7b
  Stored in directory: /root/.cache/pip/wheels/e0/9d/5d/2846b359e2ad28adb3104502507cb33e20e502b26fd22f7dd5
  Building wheel for ee-extra (setup.py) ... [?25ldone
[?25h  Created wheel for ee-extra: filename=ee_extra-0.0.13-py3-none-any.whl size=198382 sha256=1348fd0d79ab47b9583f51784005953237c8789a3b1f47ae4733fc360fb5256d
  Stored in directory: /root/.cache/pip/wheels/99/95/78/eeee7313080552ade64452acc7f7acfe9d

In [5]:
#GEE specific
import ee
import geemap
import math

#plotting and functions
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import itertools
from time import time

In [7]:
#Initialize Google Earth Engine
ee.Authenticate() #just needed the 1st time
ee.Initialize()

Enter verification code: 4/1AX4XfWhmHg5xP5-0HUFkNfuobhU1hp8EPnpqyupo6L0lX8cxTceC45wEsU8

Successfully saved authorization token.


In [8]:
# Check if geemap is working as intended - plot the leaflet map
Map = geemap.Map()

## Load Feature Collection - Shapefiles

In [9]:
#Data loads

#loads feature collection data from Google Earth Engine - We can also upload other feature collections
counties = ee.FeatureCollection("TIGER/2018/Counties")

#filter LA County
la_county = counties.filter(ee.Filter.eq('NAME', 'Los Angeles'))
sc_county = counties.filter(ee.Filter.eq('NAME', 'Santa Clara'))

In [10]:
la_county, sc_county

(<ee.featurecollection.FeatureCollection at 0x7f49551e93d0>,
 <ee.featurecollection.FeatureCollection at 0x7f49551e95e0>)

In [11]:
#Income Data
la_county_income = ee.FeatureCollection("projects/california-lawn-detection/assets/lacountyincome-final")

## Load NAIP Imagery

In [12]:
def apply_3bands(image, band):
    i_8_bit = image.select(band).toUint8()
    square = ee.Kernel.square(**{'radius': 4})
    entropy = i_8_bit.entropy(square)
    glcm = i_8_bit.glcmTexture(**{'size': 4})
    contrast = glcm.select(str(band)+'_contrast')
    
    # Create a list of weights for a 9x9 kernel.
    list = [1, 1, 1, 1, 1, 1, 1, 1, 1]
    # The center of the kernel is zero.
    centerList = [1, 1, 1, 1, 0, 1, 1, 1, 1]
    # Assemble a list of lists: the 9x9 kernel weights as a 2-D matrix.
    lists = [list, list, list, list, centerList, list, list, list, list]
    # Create the kernel from the weights.
    # Non-zero weights represent the spatial neighborhood.
    kernel = ee.Kernel.fixed(9, 9, lists, -4, -4, False)
    neighs = i_8_bit.neighborhoodToBands(kernel)
    gearys = i_8_bit.subtract(neighs).pow(2).reduce(ee.Reducer.sum()).divide(math.pow(9, 2))
    image = image.addBands(entropy.rename(str(band)+'_Entropy')).addBands(contrast.rename(str(band)+'_Contrast')).addBands(gearys.rename(str(band)+'_Gearys'))   
    return image

def add_neighborhood_bands(image):
    bands = ['R', 'G', 'B', 'N']
    for band in bands:
        image = apply_3bands(image, band)
    return image
    
def add_NDVI(image):
    image = image.addBands(image.normalizedDifference(['N','R']).rename('NDVI'))
    return image
     

In [13]:
def get_images(param_dict):
    source_image_collection = params['source_image_collection']
    years = param_dict['years']
    counties = param_dict['counties']

    image_names = []
    images = []

    combos = list(itertools.product(years, counties.keys()))
    for i in combos:
        year = str(i[0])
        county = i[1]

        image_name = str(i[0])+'_'+i[1]
        image_names.append(image_name)

        image = ee.ImageCollection(source_image_collection)\
                                .filterDate(f'{year}-01-01', f'{year}-12-31')\
                                .select(['R','G','B','N'])\
                                .median().clip(counties[county])
        images.append(image)
        images_with_3band = list(map(add_neighborhood_bands, images))
        images_with_NDVI = list(map(add_NDVI, images_with_3band))
    return dict(zip(image_names, images_with_NDVI))

    
    

In [14]:
params = {
        'source_image_collection' : 'USDA/NAIP/DOQQ',
        'years' : [2010,2012,2014,2016,2018,2020],
        'counties': {'la_county': la_county, 'sc_county': sc_county}
         }

images = get_images(params)
images

{'2010_la_county': <ee.image.Image at 0x7f4954f072e0>,
 '2010_sc_county': <ee.image.Image at 0x7f4954f074c0>,
 '2012_la_county': <ee.image.Image at 0x7f4954f076a0>,
 '2012_sc_county': <ee.image.Image at 0x7f4954f07880>,
 '2014_la_county': <ee.image.Image at 0x7f4954f07a60>,
 '2014_sc_county': <ee.image.Image at 0x7f4954f07c40>,
 '2016_la_county': <ee.image.Image at 0x7f4954f07e20>,
 '2016_sc_county': <ee.image.Image at 0x7f4954f0a040>,
 '2018_la_county': <ee.image.Image at 0x7f4954f0a220>,
 '2018_sc_county': <ee.image.Image at 0x7f4954f0a400>,
 '2020_la_county': <ee.image.Image at 0x7f4954f0a5e0>,
 '2020_sc_county': <ee.image.Image at 0x7f4954f0a7c0>}

In [15]:
#see what bands each image has:
images['2020_la_county'].bandNames().getInfo()

['R',
 'G',
 'B',
 'N',
 'R_Entropy',
 'R_Contrast',
 'R_Gearys',
 'G_Entropy',
 'G_Contrast',
 'G_Gearys',
 'B_Entropy',
 'B_Contrast',
 'B_Gearys',
 'N_Entropy',
 'N_Contrast',
 'N_Gearys',
 'NDVI']

In [16]:
#test to see if NAIP plot looks correct
Map.addLayer(images['2020_la_county'], {}, 'NAIP Test')

In [17]:
#test to see if NDVI plot looks correct (define a different location in santa clara county just for visibility)
source_image = images['2020_sc_county']

test_ndvi = source_image.normalizedDifference(['N', 'R']).rename('2020_NDVI')
test_ndvi

<ee.image.Image at 0x7f4954f91280>

In [18]:
Map.addLayer(test_ndvi,{min : -1, max : 1, 'palette' : ['blue', 'white', 'green']}, 'NDVI Test')
Map

Map(center=[20, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=HBox(children=(Togg…

## Load Labeled Data

In [19]:
## Loading feature collections from Google Earth Engine

#water = ee.FeatureCollection("projects/california-lawn-detection/assets/water_torrance")
water_training = ee.FeatureCollection("projects/california-lawn-detection/assets/water_training")
trees_training = ee.FeatureCollection("projects/california-lawn-detection/assets/trees_training")
grass_training = ee.FeatureCollection("projects/california-lawn-detection/assets/grass_training")
turf_training = ee.FeatureCollection("projects/california-lawn-detection/assets/turf_training")
#pv_training = ee.FeatureCollection("projects/california-lawn-detection/assets/pv_training")
impervious_training = ee.FeatureCollection("projects/california-lawn-detection/assets/impervious_training").limit(50)
soil_training = ee.FeatureCollection("projects/california-lawn-detection/assets/soil_training").limit(50)

LABELED_SET = water_training.merge(trees_training).merge(grass_training).merge(turf_training).merge(impervious_training).merge(soil_training)

In [20]:
water_test = ee.FeatureCollection("projects/california-lawn-detection/assets/water_test")
vegetation_trees_test = ee.FeatureCollection("projects/california-lawn-detection/assets/trees_test")
vegetation_grass_test  = ee.FeatureCollection("projects/california-lawn-detection/assets/grass_test")
turf_test  = ee.FeatureCollection("projects/california-lawn-detection/assets/turf_test")
#pv_test  = ee.FeatureCollection("projects/california-lawn-detection/assets/pv_test")
impervious_test  = ee.FeatureCollection("projects/california-lawn-detection/assets/impervious_test")
soil_test  = ee.FeatureCollection("projects/california-lawn-detection/assets/soil_test")

TEST_SET = water_test.merge(vegetation_trees_test).merge(vegetation_grass_test).merge(turf_test).merge(impervious_test).merge(soil_test)

## Build Training Set

In [21]:
training_image_params = {
        'source_image_collection' : 'USDA/NAIP/DOQQ',
        'years' : [2020],
        'counties': {'lacounty': la_county}
         }

TRAINING_IMAGE = get_images(training_image_params)['2020_lacounty']

In [22]:
# Overlay the points on the imagery to get training.
LABEL = 'landcover'
BANDS = ['R', 'G', 'B', 'N', 'NDVI',
         'R_Entropy',
         'R_Contrast',
         'R_Gearys',
         'G_Entropy',
         'G_Contrast',
         'G_Gearys',
         'B_Entropy',
         'B_Contrast',
         'B_Gearys',
         'N_Entropy', 
         'N_Contrast', 
         'N_Gearys']

train_data = TRAINING_IMAGE.select(BANDS).sampleRegions(**{
  'collection': LABELED_SET,
  'properties': [LABEL],
  'scale': 1
})

test_data = TRAINING_IMAGE.select(BANDS).sampleRegions(**{
  'collection': TEST_SET,
  'properties': [LABEL],
  'scale': 1
})

In [23]:
set(BANDS)==set(TRAINING_IMAGE.bandNames().getInfo())

True

In [24]:
print("Training Set Size in Pixels", train_data.aggregate_count('R').getInfo())

Training Set Size in Pixels 169069


In [25]:
print("Test Set Size in Pixels", test_data.aggregate_count('R').getInfo())

Test Set Size in Pixels 97964


## Machine Learning Model

In [26]:
clf = ee.Classifier.smileRandomForest(numberOfTrees = 200, minLeafPopulation = 5, bagFraction= 0.7)\
                   .train(train_data, LABEL, BANDS)
clf

<ee.Classifier at 0x7f49551aeeb0>

In [27]:
training_image_classified = TRAINING_IMAGE.select(BANDS)\
                                          .classify(clf)


In [28]:
legend_keys = ['water', 'vegetation_trees', 'vegetation_grass', 'turf','impervious','soil']
legend_colors = ['#0B6AEF', '#097407', '#0CE708', '#8C46D2' ,' #A1A8AF','#D47911']

Map.addLayer(training_image_classified, {'min': 1, 'max': 7, 'palette': legend_colors}, 'Classification')

In [29]:
training_image_classified.bandNames().getInfo()

['classification']

In [30]:
Map

Map(center=[20, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=HBox(children=(Togg…

## Evaluation Metrics

### Train Accuracy

In [31]:
train_accuracy = train_data.classify(clf).errorMatrix('landcover', 'classification')
train_conf_matrix = train_accuracy.getInfo()

In [32]:
print(f'Spot check to confirm pixel numbers: {np.array(train_conf_matrix).sum()}')
train_conf_matrix

Spot check to confirm pixel numbers: 169069


[[1331, 1, 0, 1, 1, 0],
 [0, 9475, 40, 8, 12, 11],
 [0, 93, 6591, 2, 0, 38],
 [0, 15, 1, 2398, 5, 0],
 [0, 3, 0, 4, 21239, 60],
 [0, 12, 1, 2, 37, 127688]]

### Test Accuracy

In [33]:
test_accuracy = test_data.classify(clf).errorMatrix('landcover', 'classification')
test_conf_matrix = test_accuracy.getInfo()

In [34]:
print(f'Spot check to confirm pixel numbers: {np.array(test_conf_matrix).sum()}')
test_conf_matrix

Spot check to confirm pixel numbers: 97964


[[275, 0, 0, 0, 0, 0],
 [0, 2620, 141, 16, 12, 12],
 [0, 164, 1327, 3, 0, 38],
 [0, 29, 0, 602, 222, 7],
 [9, 3, 4, 847, 70443, 1647],
 [0, 36, 151, 23, 387, 18946]]

## Binary Classification and Area Calculation

In [35]:
def area_calculation(image, class_number, shape, pixel_scale = 20):

    if type(shape) == str:
        shape = la_county_income_zipcode.filter(ee.Filter.eq('ZipCode', shape))

    areaImage = image.eq(class_number).multiply(ee.Image.pixelArea())

    area = areaImage.reduceRegion(
        reducer = ee.Reducer.sum(),
        geometry = shape,
        scale = pixel_scale,
        maxPixels = 1e13)


    area_sq_m = area.getInfo().get('classification')

    area_sq_km = area_sq_m / 1e6

    return area_sq_km

In [36]:
def ndvi_calculation(image, class_number, shape, ref_image, pixel_scale=1):
    
    if type(shape) == str:
        shape = la_county_income_zipcode.filter(ee.Filter.eq('ZipCode', shape))
        
    ndvi = ref_image.normalizedDifference(['N', 'R'])
    image_clipped = image.clip(shape)
    
    NDVI_for_class = ndvi.updateMask(image_clipped.select('classification').eq(class_number))
    
    reducer = ee.Reducer.mean()\
                        .combine(ee.Reducer.max(),sharedInputs=True)\
                        .combine(ee.Reducer.min(),sharedInputs=True)
    
    
    qty = NDVI_for_class.reduceRegion(
        reducer = reducer, 
        geometry = shape, 
        scale = pixel_scale, 
        maxPixels = 1e13)
    return qty



### Create Panel Data

In [37]:
la_county_income_zipcode2 = ee.FeatureCollection("projects/california-lawn-detection/assets/income_zipcode2019")
la_county_income_zipcode = la_county_income_zipcode2.select(ee.List(['zipcode', '2019zipcod','shape_area']), ee.List(['ZipCode', 'Median_Income','Area_sqft']))

In [38]:
year_list = [2010,2012, 2014, 2016, 2018,2020]
zipcode_list = ['90802','90732','90744']

In [39]:
test_image_params = {
        'source_image_collection' : 'USDA/NAIP/DOQQ',
        'years' : year_list,
        'counties': {'lacounty': la_county}
         }

inference_images = get_images(test_image_params)
inference_images

{'2010_lacounty': <ee.image.Image at 0x7f4954ebc610>,
 '2012_lacounty': <ee.image.Image at 0x7f4954ebc7f0>,
 '2014_lacounty': <ee.image.Image at 0x7f4954ebc9d0>,
 '2016_lacounty': <ee.image.Image at 0x7f4954ebcbb0>,
 '2018_lacounty': <ee.image.Image at 0x7f4954ebcd90>,
 '2020_lacounty': <ee.image.Image at 0x7f4954ebcf70>}

In [40]:
keys = {'year','polygon','water_area','vegetation_trees_area', 
        'vegetation_grass_area', 'turf_area', 'impervious_area',
        'soil_area', 'total_area','tree_ndvi_mean', 'tree_ndvi_max','tree_ndvi_min',
       'grass_ndvi_mean', 'grass_ndvi_max','grass_ndvi_min'}


dictionary = {}
for i in keys:
    dictionary[i] = []

for i in zipcode_list:
    for j in list(inference_images.items()):
        im = j[1]
        imagery = im.select(BANDS).classify(clf)
        name = j[0]
        
        start = time()
        polygon = la_county_income_zipcode.filter(ee.Filter.eq('ZipCode', i))

        dictionary['year'].append(j[0][:4]) 
        dictionary['polygon'].append(i)

        water_area = area_calculation(imagery, 0, polygon, 20)
        dictionary['water_area'].append(water_area)

        vegetation_trees_area = area_calculation(imagery, 1, polygon, 20)
        dictionary['vegetation_trees_area'].append(vegetation_trees_area)

        vegetation_grass_area = area_calculation(imagery, 2, polygon, 20)
        dictionary['vegetation_grass_area'].append(vegetation_grass_area)

        turf_area = area_calculation(imagery, 3, polygon, 20)
        dictionary['turf_area'].append(turf_area)

        impervious_area = area_calculation(imagery, 4, polygon, 20)
        dictionary['impervious_area'].append(impervious_area)

        soil_area = area_calculation(imagery, 5, polygon, 20)
        dictionary['soil_area'].append(soil_area)

        total_area = water_area + vegetation_trees_area + vegetation_grass_area + turf_area + impervious_area + soil_area
        dictionary['total_area'].append(total_area)
        
        tree_ndvi_mean, tree_ndvi_max, tree_ndvi_min = ndvi_calculation(imagery, 1, polygon, ref = im).getInfo().values()
        dictionary['tree_ndvi_mean'].append(tree_ndvi_mean)
        dictionary['tree_ndvi_max'].append(tree_ndvi_max)
        dictionary['tree_ndvi_min'].append(tree_ndvi_min)
        
        grass_ndvi_mean, grass_ndvi_max, grass_ndvi_min = ndvi_calculation(imagery, 2, polygon, ref = im).getInfo().values()
        dictionary['grass_ndvi_mean'].append(grass_ndvi_mean)
        dictionary['grass_ndvi_max'].append(grass_ndvi_max)
        dictionary['grass_ndvi_min'].append(grass_ndvi_min)
        
        
        
        end = time()
        print(f'Zip Code: {i}, Year: {j[0][:4]} ::: completed in {end-start} seconds.')
              
              
              

TypeError: ndvi_calculation() got an unexpected keyword argument 'ref'

In [None]:
#extract just necessary columns from dictionary dataframe
df = pd.DataFrame(dictionary) #[['polygon','soil_area','vegetation_grass_area','water_area','vegetation_trees_area','impervious_area','turf_area']]
df

### Plot Area Results

In [41]:
years= [2010, 2012, 2014, 2016, 2018, 2020]
zips = df.polygon.unique()
fig, ax = plt.subplots(1,3, figsize = (20,5))

labels = [ 'soil_area',
 'vegetation_grass_area',
 'water_area',
 'vegetation_trees_area',
 'turf_area']

colors = ['brown','#66CC00','#3399FF','#FF8000','#9933FF']


for i in range(3):
    for j in range(len(labels)):
        if j == 'polygon':
            continue
        else:
            d = df[df.polygon == zips[i]]
            ax[i].plot(years, d[labels[j]], color = colors[j], label = labels[j])
    ax[i].set_title(f'Zip Code: {zips[i]}', size = 15)
    ax[i].set_xlabel('Year', size = 13)
    ax[i].set_ylabel('Area (Square Kilometers)', size = 13)
    ax[i].set_ylim(0,11)
    ax[0].legend()
    
# plt.savefig(fname = 'test.png', dpi = 200)

NameError: name 'df' is not defined