# Creation of Land Use Change Tables from Generated Google Earth Assets


In [20]:
import ee
import geemap
import geemap.ml as ml
from ipygee import chart as chart
# import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# import pymannkendall as mk
import xarray as xr
import os
# Import date class from datetime module
from datetime import datetime as dt
import datetime
import pytz

today = dt.today()
print("Today is: ", today)

Today is:  2023-05-18 12:30:29.054359


# GEE Authentication 
 
 ### Paste the Authetication code into the box below if prompted to save token
 
 
 (press enter to save token)


In [21]:
# ee.Authenticate()

In [22]:
geemap.ee_initialize()

### New version control of inputs and outputs

* best to check that catchment hydroclimatic information is indeed the most reliable/latest available

In [23]:
# set the path and version of the input data
p = '..'
version = 'Version_3_20230303'

# read in the list of catchment IDs from the input csv file
l = pd.read_csv(f"{p}/Inputs/{version}/GB.csv").ID

# display the list of catchment IDs and convert it to a Python list
names = l.tolist()

# names = [22001, 23004, 71004, 79002, 94001]

# print the number of catchments and their IDs
print(f'{len(names)} catchments processed for hydroclimatic variables:\n \n{names}')

95 catchments processed for hydroclimatic variables:
 
[17005, 18001, 20007, 21017, 21023, 21024, 22001, 23004, 24004, 25006, 26003, 27035, 27042, 27047, 27051, 27071, 28046, 28072, 29003, 29009, 30004, 30012, 30015, 31023, 32003, 33018, 33019, 33029, 34011, 36003, 36009, 36010, 37005, 38026, 39017, 39019, 39020, 39025, 39034, 40005, 40011, 41022, 41025, 41027, 41029, 42003, 43014, 45005, 46003, 46005, 47009, 48003, 48004, 49004, 50002, 52010, 52016, 53006, 53008, 53009, 53017, 54008, 54018, 54025, 54034, 54036, 55008, 55014, 55016, 55026, 55029, 56013, 57004, 60002, 60003, 62001, 64001, 65005, 67010, 67018, 68005, 71001, 71004, 72005, 73005, 73011, 75017, 76014, 77004, 78004, 79002, 79004, 8009, 93001, 94001]



### Load the JS Module

The custom JS module takes the difficult javascript functions that do not translate well to python, and makes them callable in the notebook environment. 

Current version 6 improves the classifier by using a weighted training strategy.

This custom module borrows some functions from the LandTrendr module developed by Justin Braaten (Google) which is classified under an apache license i.e. free for use). The adaptation begins from landTrendr version 0.2 which incorporated Landsat Collection 2, removing the need for regression coefficients between sensors developed by roy et. al.

In [24]:
oeel = geemap.requireJS()

Map = geemap.Map()

ltgee = geemap.requireJS(r'../JS_module/Adapted_LT_v8.2.js')

ltgee.availability  #all functions within the javascript module

{'version': 'string',
 'buildSensorYearCollection': 'function',
 'getSRcollection': 'function',
 'getCombinedSRcollection': 'function',
 'buildSRcollection': 'function',
 'getCollectionIDlist': 'function',
 'countClearViewPixels': 'function',
 'buildClearPixelCountCollection': 'function',
 'removeImages': 'function',
 'LAIcol': 'function',
 'calcIndex': 'function',
 'standardize': 'function',
 'transformSRcollection': 'function',
 'createTrainingImage': 'function',
 'addTerrainBand': 'function',
 'genGCP': 'function',
 'classifier': 'function',
 'classArea': 'function',
 'imcolFromAsset': 'function',
 'imcolFromAssetHILDA': 'function',
 'unionCollections': 'function'}

## Initiate With a Shapefile

This notebook assumes the user has a shapefile saved as an asset on their GEE, the assets used in the CATAPUCII project will be made publicly available in the @mohanrahan repository


In [25]:
# Directory where assets are stored
asset_dir = 'projects/mohanrahan/assets'

# Asset ID for catchment boundaries
catchment_asset = 'CATAPUCII_Catchments/CAMELS_GB_catchment_boundaries'

# Name of the dataset
dataset = 'CAMELS_GB'

# Column string to identify catchments
col_string  = 'ID'

# Coordinate reference system, GB is british national grid
crs = 'EPSG:27700'

# Figure number for plotting
fignum = 0

# RGB visualization settings for Landsat imagery
RGB_VIS = {'bands':['B3','B2','B1'], 'min':0, 'max':1.5e3}

#Classified image visualisation
lc_vis = {'bands':['landcover'], 'min':1, 'max':5, 'palette':['#E6004D', '#FFFFA8', '#80FF00', '#A6A6FF', '#00CCF2']}

# Start and end years for Landsat data collection
startYear = 1899

endYear = 2019


# Start and end days for Landsat data collection
startDay = '06-20'
endDay = '08-31'

## The Table Data


In [11]:
table = ee.FeatureCollection(f"{asset_dir}/{catchment_asset}")

def set_area_km2(feature):
    '''
    Calculate the area of each geometry in square kilometer
    '''
    area = feature.geometry().area().divide(1000*1000)
    setting = feature.set('area_km2', area)
    return setting

def set_area_pixel(feature):
    aoi = feature.geometry()
    area = ee.Image.pixelArea().divide(1e6).clip(aoi).select('area').reduceRegion(**{
        'reducer':ee.Reducer.sum(),
        'geometry':aoi,
        'scale':30,
        'crs':crs,
        'maxPixels':1e13,
        'bestEffort':True,
        }).get('area')
    setting = feature.set('pixel_area', area)
    return setting

def set_id(feature):
    '''
    Set the system ID as a column
    '''
    getting_name = ee.String(feature.get('system:index'))
    setting_id = feature.set({'system_index':getting_name,})
    return setting_id

# Define a function to set the zone for a feature
def set_zone(feature):
    # Get the feature geometry
    geometry = feature.geometry()
    
    # Check which zone the feature intersects with
    if geometry.intersects(gn,1):
        zone = 'N'
        
    elif geometry.intersects(gm,1):
        zone = 'M'
        
    elif geometry.intersects(gs,1):
        zone = 'S'
        
    else:
        zone = 'Unknown'
        
    # Add the 'zone' property to the feature
    return feature.set('zone', zone)

table_area = table.map(set_area_km2).map(set_id).map(set_area_pixel)

Filtered_Sorted = table_area.filter(ee.Filter.gt('area_km2', 0)).sort('pixel_area', False) #.map(set_zone) # true ranks from smallest to largest

# # Set the 'zone' property for features in the north zone
# fc_north = Filtered_Sorted.filterBounds(gn).map(lambda f: f.set('zone', 'N'))

# # Set the 'zone' property for features in the middle zone
# fc_middle = Filtered_Sorted.filterBounds(gm).map(lambda f: f.set('zone', 'M'))

# # Set the 'zone' property for features in the south zone
# fc_south = Filtered_Sorted.filterBounds(gs).map(lambda f: f.set('zone', 'S'))

# Merge the three feature collections
# fc_with_zones = ee.FeatureCollection(fc_north.merge(fc_middle).merge(fc_south)).sort('pixel_area', False)

down = geemap.ee_to_pandas(Filtered_Sorted).set_index(['system_index'])

df1 = down.loc[down[col_string].isin(names)]

#df1.to_excel(f'../Outputs/{dataset}/{dataset}_catchment_table.xlsx')

print(f'The length of the dataframe generated from the EE asset {len(df1)}')

sys_index = df1.index.to_list()

display(df1)

The length of the dataframe generated from the EE asset 95


Unnamed: 0_level_0,pixel_area,area_km2,SOURCE,VERSION,ID,EXPORTED,ID_STRING
system_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
00000000000000000088,1355.679450,1349.764903,National River Flow Archive,1.3,27071,1518422400000,27071
00000000000000000235,1145.767150,1140.885961,National River Flow Archive,1.3,71001,1518422400000,71001
000000000000000001c0,1125.603290,1121.172318,National River Flow Archive,1.3,54008,1518422400000,54008
00000000000000000207,897.948531,894.478359,National River Flow Archive,1.3,62001,1518422400000,62001
0000000000000000025a,798.157955,794.507522,National River Flow Archive,1.3,79002,1518422400000,79002
...,...,...,...,...,...,...,...
0000000000000000021b,12.768404,12.719149,National River Flow Archive,1.3,67010,1518422400000,67010
00000000000000000082,11.353532,11.304831,National River Flow Archive,1.3,27047,1518422400000,27047
000000000000000001df,10.506362,10.466687,National River Flow Archive,1.3,55008,1518422400000,55008
00000000000000000084,8.177400,8.143164,National River Flow Archive,1.3,27051,1518422400000,27051


In [13]:
len1 = len(df1[col_string].values)
len2 = len(names)

# geom_tup_ls = [('north_zone', geom_north), ('middle_zone', geom_middle), ('south_zone',geom_south)]

# file_prefix = ['GB_Middle_HC_tuned_', 'GB_North_HC_tuned_', 'GB_South_HC_tuned_'] # f'{file_prefix}{year}'
 
if len1 > len2:
    print(f'catchment{ set(df1[col_string].values).symmetric_difference(names)} is/are missing from the catchment sets')
elif len2 >len1:
    print(f'catchment{ set(df1[col_string].values).symmetric_difference(names)} is/are missing from the EE asset')
else:
    print('The number of catchments with Hydroclimatic indices calculated match the length filtered EE asset\nThere seems to be no mismatch\nContinue... ')

The number of catchments with Hydroclimatic indices calculated match the length filtered EE asset
There seems to be no mismatch
Continue... 


In [16]:
aoi = Filtered_Sorted.geometry().bounds()

imCol = ltgee.imcolFromAssetHILDA(startYear, endYear, aoi, dataset, "projects/mohanrahan/assets/HILDA")

Map = geemap.Map()
# Map.setOptions('')
Map.addLayer(imCol.mode(), {'bands':['b1'], 'min':1, 'max':99, 'palette':['#E6004D', '#FFFFA8', '#80FF00', '#A6A6FF', '#00CCF2']}, 'Mode of Classes')
Map.addLayer(aoi, {'color': 'green'}, 'green: Included')
# Map.addLayer(naoi, {'color':'red'}, 'red: Not Included')
Map.centerObject(aoi, 7)


Map

Map(center=[54.4666618785552, -2.562465661904664], controls=(WidgetControl(options=['position', 'transparent_b…

In [17]:
def extractArea(item):
    
    '''
    Method borrowed from https://code.earthengine.google.co.in/9c45ff677c46eae08952831de02bfb40
    Article: https://spatialthoughts.com/2020/06/19/calculating-area-gee/
    '''
    
    areaDict = ee.Dictionary(item)
    classNumber = ee.Number(areaDict.get('b1')).format()
    area = ee.Number(areaDict.get('sum')).divide(1e6)
    return ee.List([classNumber, area])

def classArea(classified_image, scale, aoi):
    '''
    This function takes the pixel areas represented by each class the landsat scale is 30m but,
    nominal scale of image is 111000m after medoid compositing
    '''
    
    areaImage = ee.Image.pixelArea().addBands(classified_image)
    
    areas = areaImage.reduceRegion(**{
            'reducer':ee.Reducer.sum().group(**{'groupField':1, 'groupName':'b1'}),
            'geometry':aoi,
            'scale':scale,
            'maxPixels':1e10,
            'bestEffort':True,
    })
    
    classAreas = ee.List(areas.get('groups'))
    
    classAreasLists = classAreas.map(extractArea)
    
    return classAreasLists

def dateToMs(year):
    datetime_obj = datetime.datetime(year, 8, 31, 22, 0, 0)
    return datetime_obj

def dataframeAreas(i, yc, aoi, classified, trainingClassImage, ms, classImageYear, name, accuracy, pixArea):

    ls1 = pd.DataFrame(classArea(classified, 1113, aoi).getInfo(), columns=['class', 'area_H'])
    # ls2 = pd.DataFrame(classArea(trainingClassImage, 1000, aoi).getInfo(), columns=['class', 'area_CORINE'])

    merged = ls1

    merged['image_date'] = ms
    pivoted = merged.pivot(index='image_date', columns='class', values=['area_H'])
    pivoted['training', 'year_trained'] =  classImageYear

    pivoted['catchment', 'area'] = pixArea
    
    # pivoted['area_RF', '6'] = pivoted.catchment.area - pivoted.iloc[0, 6:10].sum() 
    pivoted['catchment', 'name '] = name
    # pivoted['testing', 'accuracy'] = accuracy
    # pivoted['ind'] = str(i)+'_'+str(yc)
    pivoted.fillna(0)
    # print(pivoted)
    return pivoted

## Running Module over the Shapefile

1. The geometries are called by their system indices (sys_index) updating the 'aoi' and running the process over any  using the indices included in the 
2. The image collection is generated per shapefile and then returns the decadal mean of each index

# TODO:

- Redefine the methodology of reduction. Using chart --> dataframe --> join all dataframes is redundant an probably very slow

In [18]:
classLoopParams = {'dataset':'CORINE',    #training dataset, no other than corine currently supported
               'trainingClassLevel':1, #classLevel determines the level of corine class simplification
               'customClassLevels':None,   #can provide some custom levels, not fully tested
               'numClasses':5,            #if trainingClassLevel is 1 then there are 5 classes, level is 2 then there are 15, 3 is 44. (CORINE land cover class grouping)
               'tileScale':2,            #tileScale higher number reduces likelihood of classifier running into a memory limit
               'year_classified': np.arange(2019, 2020),   # classification year is , for classifiers saved, the same as the years available in the training dataset
              }

output_folder = f'../Outputs/{dataset}/class_areas_from_asset_HILDA/'

if not os.path.exists(output_folder):
    print(f'{output_folder} created')
    os.makedirs(output_folder)



In [39]:
# La Sormonne Belval
catchment_ID_broken = 27071 #the catchment at which to start the loop again
index_broken = df1[df1.ID==catchment_ID_broken].index.values[0]
year_broken = 1984 #the year at which to start the looping again

GB_decades = ('1970 - 1979', '1980 - 1989', '1990 - 1999', '2000 - 2009')

In [40]:
t0 = dt.today()

scale =30

print(f'begin loop: {t0}')

classArea_df = pd.DataFrame()


index_broken = df1[df1[col_string] == catchment_ID_broken].index.tolist()[0]

index = sys_index.index(index_broken)

# Slice the list based on the length of the string
slice_start = index
slice_broken = index + sys_index.count(index_broken) -1



for i, ind in enumerate(sys_index[slice_broken:]): # Loop through all indices in the system index
    
    year_range = [1899, 1930, 1950, 1960, 1970, 1979, 1980] + list(np.arange(1984, 2020))
    
    if ind == index_broken:
        startYear = year_broken
        year_range = year_range[year_range.index(startYear):]  # Update year_range starting from startYear
        print('resuming from broken')
    else:
        year_range = year_range
    
    print(f'{slice_broken+i} / {len(sys_index)}')
    
    pix_area = df1.loc[ind, 'pixel_area']
    
    name = df1.loc[ind, col_string]
    
    aoi = Filtered_Sorted.filter(ee.Filter.eq('system_index', ind)).geometry()
    
    t1 = dt.today()
    
    if dataset == 'CAMELS_GB':
        '''
        
        '''
        for j, yc in enumerate(year_range):
            
            print('begin', dataset, name, yc)
            
            #the image from the collection that we want to classify
            classified = imCol.filterDate(str(yc+100)+'-'+startDay, str(yc+101)+'-'+endDay).first()
        
            ms = dateToMs(yc)
           
            print(ms)
            
         
            df = dataframeAreas(i, yc, aoi, classified, ee.Image.constant(ee.Number(1)), ms, yc, f'{name}', 1, pix_area)
            
            df.to_excel(f'{output_folder}{name}_{yc}_HILDA.xlsx')
            
            
                       
            classArea_df = classArea_df.append(df)


    else:
        print('classification routine for this dataset is not yet provided for')
    
    t4 = dt.today()
    
    print(f'step2: Done: {t4}, time taken: {t4-t1}')
    
    print(f'\nCatchment: {name}, total time: {t4-t1}\n---------------')
    


tfinal = dt.today()

print(f'END LOOP: Full routine finished: {tfinal} \nTime taken: {tfinal-t0}')

begin loop: 2023-05-18 14:05:50.831632
resuming from broken
0 / 95
begin CAMELS_GB 27071 1984
1984-08-31 22:00:00
begin CAMELS_GB 27071 1985
1985-08-31 22:00:00
begin CAMELS_GB 27071 1986
1986-08-31 22:00:00
begin CAMELS_GB 27071 1987
1987-08-31 22:00:00
begin CAMELS_GB 27071 1988
1988-08-31 22:00:00
begin CAMELS_GB 27071 1989
1989-08-31 22:00:00
begin CAMELS_GB 27071 1990
1990-08-31 22:00:00
begin CAMELS_GB 27071 1991
1991-08-31 22:00:00
begin CAMELS_GB 27071 1992
1992-08-31 22:00:00
begin CAMELS_GB 27071 1993
1993-08-31 22:00:00
begin CAMELS_GB 27071 1994
1994-08-31 22:00:00
begin CAMELS_GB 27071 1995
1995-08-31 22:00:00
begin CAMELS_GB 27071 1996
1996-08-31 22:00:00
begin CAMELS_GB 27071 1997
1997-08-31 22:00:00
begin CAMELS_GB 27071 1998
1998-08-31 22:00:00
begin CAMELS_GB 27071 1999
1999-08-31 22:00:00
begin CAMELS_GB 27071 2000
2000-08-31 22:00:00
begin CAMELS_GB 27071 2001
2001-08-31 22:00:00
begin CAMELS_GB 27071 2002
2002-08-31 22:00:00
begin CAMELS_GB 27071 2003
2003-08-31 22