In [1]:
#Import necessary libraries
import os
import ee
import geemap
import ipyleaflet
import matplotlib.pyplot as plt
import numpy as np
import sklearn
import statsmodels.api as sm
import pandas as pd
from IPython.display import HTML, display
import random
import json
import time
import glob
num_seed=30
random.seed(num_seed)

In [2]:
#Initialize earth engine
ee.Initialize()


In [3]:
#Define functions for mapping MapBiomas and simplifying the legend
coverage_palette =  ['ffffff', '129912', '1f4423', '006400', '00ff00', '687537', '76a5af', '29eee4', 
                     '77a605', '935132', 'bbfcac', '45c2a5', 'b8af4f', 'f1c232', 'ffffb2', 'ffd966', 
                     'f6b26b', 'f99f40', 'e974ed', 'd5a6bd', 'c27ba0', 'fff3bf', 'ea9999', 'dd7e6b', 
                     'aa0000', 'ff99ff', '0000ff', 'd5d5e5', 'dd497f', 'b2ae7c', 'af2a2a', '8a2be2', 
                     '968c46', '0000ff', '4fd3ff']


simple_palette = ['129912','BBFCAC','FFFFB2','EA9999','0000FF','D5D5E5']
statesViz = {'min': 0, 'max': 34, 'palette': coverage_palette};
simpleStatesViz = {'min': 1, 'max': 6, 'palette': simple_palette};

change_detection_palette = ['df07b5','0741df']
changeDetectionViz = {'min': 0, 'max': 1, 'palette': change_detection_palette};

#Load in mapbiomas
mapbiomas_states=ee.Image('projects/mapbiomas-workspace/public/collection4_1/mapbiomas_collection41_integration_v1')
states_mask = mapbiomas_states.mask()

#Define function to convert hierarchical legend to simplest form
def simplify_legend(bandName):
    simplify = mapbiomas_states.expression(
        '(b0 >=1)  && (b0<10) ? 1 :'+
        '((b0>=10) && (b0<14)) || (b0==32) || (b0==29) ? 2 :'+
        '((b0>=18) && (b0<22)) || ((b0>=14)&&(b0<16)) ? 3 :'+
        '((b0>=22) && (b0<26)) || (b0==30) ? 4 :'+
        '(b0==26) || (b0==33) || (b0==31) ? 5 : 6', 
        {
          'b0': mapbiomas_states.select([bandName])
        })
    simplify = simplify.select(['constant'],[bandName])
    return simplify

#Select bands we are interested in
bandList = ['classification_1985', 'classification_1986', 'classification_1987', 'classification_1988', 
             'classification_1989', 'classification_1990', 'classification_1991', 'classification_1992', 
             'classification_1993', 'classification_1994', 'classification_1995', 'classification_1996', 
             'classification_1997', 'classification_1998', 'classification_1999', 'classification_2000', 
             'classification_2001', 'classification_2002', 'classification_2003', 'classification_2004', 
             'classification_2005', 'classification_2006', 'classification_2007', 'classification_2008', 
             'classification_2009', 'classification_2010', 'classification_2011', 'classification_2012', 
             'classification_2013', 'classification_2014', 'classification_2015', 'classification_2016', 
             'classification_2017', 'classification_2018']
bandsEEList = ee.List(bandList) 
states_simple = ee.ImageCollection(bandsEEList.map(simplify_legend)).toBands()
states_simple = states_simple.select(states_simple.bandNames(),bandsEEList)
states_simple = states_simple.updateMask(states_mask)
states_simple = states_simple.set(ee.Dictionary({'min_value':1,'max_value':5}))
#states_simple is now an iamge where each band corresponds to the land cover class for the band name year

#Map one year to check it out!
Map1 = geemap.Map(center=[-9,-51], zoom=4)
Map1.addLayer(mapbiomas_states.select('classification_2018'),statesViz,name='Original MapBiomas')
Map1.addLayer(states_simple.select('classification_2018'),simpleStatesViz,name='Simplified MapBiomas')
display(Map1)


Map(center=[-9, -51], controls=(WidgetControl(options=['position'], widget=HBox(children=(ToggleButton(value=F…

In [31]:
#Save 30 meter projection
projection_30m = mapbiomas_states.projection().getInfo()
scale = mapbiomas_states.projection().nominalScale().getInfo()
crs = projection_30m.get('crs')
crsTransform = projection_30m.get('transform')
print(crs)
print(crsTransform)
print(scale)


EPSG:4326
[0.0002694945852358564, 0, -74.54381924374933, 0, -0.0002694945852358564, 6.792611020869763]
29.999999999999996


In [5]:
#Convert long band names to short band names
intBandNames = ['1985', '1986', '1987', '1988', '1989', '1990', '1991', '1992', '1993', '1994', '1995', '1996', 
             '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004',  '2005', '2006', '2007', '2008', 
             '2009', '2010', '2011', '2012','2013', '2014', '2015', '2016', '2017', '2018']

states_simple = states_simple.select(bandList,intBandNames)


In [6]:

#Image bands must be ordered by increasing years
def get_year_stack_image_collection(image, band_names, band_indices=[-1,0,1]):
    '''
    Function returns image collection of images where each band is taken from the band_indices. If inputted bands do
                do not follow the band indices, that image will not be returned. 
                For example if one band index is less than 0, an image for the first band will not be returned
                because there is not a band corresponding to that index.
    Inputs:
        image: image where each band represents the land cover classification for a year, bands ordered by 
                increasing years
        band_names: list of band names in the image
        band_indices: list of indices you want to collect from the image, the default [-1,0,1] will return an 
                image collection where each image will have the bands [previous year, current year, following year]
    Returns:
        out_image_list: an image collection where each image corresponds to a band in band_names, where the bands 
                of the image correspond to the band_indices input
                
    Example:
        Inputs:
            image = image of land cover classification for years [1986,1987,1988,1989]
            band_names = [1986,1987,1988,1989]
            band_indices = [-1,0,1]
        Returns:
            out_image_list = image collection with the following images:
                image 1: bands: [1986,1987,1988], property {'OriginalBand': 1987}
                image 2: bands: [1987,1988,1989], property {'OriginalBand': 1988}
            (an image for 1986 is not included because there is not a year before 1986,
             and an image for 1989 is not included because there is not a year after 1989)
    '''
    out_image_list = []
    for i,band_name in enumerate(band_names):
        #indices = i_
        if all(np.array([int(i+x) for x in band_indices])>=0):
            try:
                band_list = [band_names[i+x] for x in band_indices]
                out_image = ee.Image.cat(image.select(band_list))
                out_image = out_image.set(ee.Dictionary({'OriginalBand':band_name}))
                out_image_list.append(out_image)
            except:
                None
    
    return ee.ImageCollection(out_image_list)


In [7]:
#Functions for binary land cover change properties
def lc_one_change(image):
    '''
    Determines if there was one change occurance from year i to year i+1. Returns an image with values:
    1 if state(i) != state(i+1)
    0 if state(i) == state(i+1)
    '''
    band_names = image.bandNames()
    out_image = image.select([band_names.get(0)]).neq(image.select([band_names.get(1)]))
    out_image = out_image.select(out_image.bandNames(),[band_names.get(0)])
    out_image = out_image.set(ee.Dictionary({'OriginalBand':band_names.get(0)}))
    return out_image

def lc_no_change(image):
    '''
    Determines if there was no change occurance from year i to year i+1. Returns an image with values:
    1 if state(i) != state(i+1)
    0 if state(i) == state(i+1)
    '''
    band_names = image.bandNames()
    out_image = image.select([band_names.get(0)]).eq(image.select([band_names.get(1)]))
    out_image = out_image.select(out_image.bandNames(),[band_names.get(0)])
    out_image = out_image.set(ee.Dictionary({'OriginalBand':band_names.get(0)}))
    return out_image

def lc_reverse(image):
    '''
    Determines if change that occured from i to i+1 reversed back to state i in i+2
    1 if state(i) != state(i+1) and state(i) == state(i+2)
    0 otherwise
    '''
    band_names = image.bandNames()
    current_year = image.select([band_names.get(0)])
    next_year = image.select([band_names.get(1)])
    next_next_year = image.select([band_names.get(2)])
    
    returnback = current_year.eq(next_next_year)
    changed = current_year.neq(next_year)
    out_image = returnback.bitwise_and(changed)
    out_image = out_image.select(out_image.bandNames(),[band_names.get(0)])
    out_image = out_image.set(ee.Dictionary({'OriginalBand':band_names.get(0)}))
    return out_image

def lc_change_to_another(image):
    '''
    Determines if change occured from i to i+1 and change occured in i+1 to i+2 where state(i)!=state(i+2)
    1 if state(i) != state(i+1) and state(i) != state(i+2) and state(i+1) != state(i+2)
    0 otherwise
    '''
    band_names = image.bandNames()
    current_year = image.select([band_names.get(0)])
    next_year = image.select([band_names.get(1)])
    next_next_year = image.select([band_names.get(2)])
    
    changed = current_year.neq(next_year)
    changed_again = next_year.neq(next_next_year)
    not_reversed = current_year.neq(next_next_year)
    
    out_image = changed.bitwise_and(changed_again.bitwise_and(not_reversed))
    out_image = out_image.select(out_image.bandNames(),[band_names.get(0)])
    out_image = out_image.set(ee.Dictionary({'OriginalBand':band_names.get(0)}))
    return out_image

def lc_consistent_change_one_year(image):
    '''
    Determines if change that occured from i to i+1 stayed in i+2
    1 if state(i) != state(i+1) and state(i+1) == state(i+2)
    0 otherwise
    '''
    band_names = image.bandNames()
    current_year = image.select([band_names.get(0)])
    next_year = image.select([band_names.get(1)])
    next_next_year = image.select([band_names.get(2)])
    
    changed = current_year.neq(next_year)
    stayed = next_year.eq(next_next_year)
    
    out_image = changed.bitwise_and(stayed)
    out_image = out_image.select(out_image.bandNames(),[band_names.get(0)])
    out_image = out_image.set(ee.Dictionary({'OriginalBand':band_names.get(0)}))
    return out_image

def lc_consistent_change_two_years(image):
    '''
    Determines if change that occured from i to i+1 stayed in i+2 and i+3
    1 if state(i) != state(i+1) and state(i+1) == state(i+2) and state(i+1) == state(i+3)
    0 otherwise
    '''
    band_names = image.bandNames()
    current_year = image.select([band_names.get(0)])
    next_year = image.select([band_names.get(1)])
    next_next_year = image.select([band_names.get(2)])
    next_next_next_year = image.select([band_names.get(2)])
    
    changed = current_year.neq(next_year)
    stayed = next_year.eq(next_next_year)
    stayed_again = next_year.eq(next_next_next_year)
    
    out_image = changed.bitwise_and(stayed.bitwise_and(stayed_again))
    out_image = out_image.select(out_image.bandNames(),[band_names.get(0)])
    out_image = out_image.set(ee.Dictionary({'OriginalBand':band_names.get(0)}))
    return out_image


In [8]:
#Apply land cover change functions to images, first returns an image collection then converted to image
#where each band represents one year
lc_one_change_col = get_year_stack_image_collection(states_simple,states_simple.bandNames().getInfo(), band_indices=[0,1])
lc_one_change_col = lc_one_change_col.map(lc_one_change)
lc_one_change_image = lc_one_change_col.toBands()
lc_one_change_image = lc_one_change_image.select(lc_one_change_image.bandNames(),lc_one_change_col.aggregate_array('OriginalBand'))

lc_consistent_change_two_years_col = get_year_stack_image_collection(states_simple,states_simple.bandNames().getInfo(), band_indices=[0,1,2,3])
lc_consistent_change_two_years_col = lc_consistent_change_two_years_col.map(lc_consistent_change_two_years)
lc_consistent_change_two_years_image = lc_consistent_change_two_years_col.toBands()
lc_consistent_change_two_years_image = lc_consistent_change_two_years_image.select(lc_consistent_change_two_years_image.bandNames(), lc_consistent_change_two_years_col.aggregate_array('OriginalBand'))


In [9]:
#Map consistent change layer for one year to test if it works
Map2 = geemap.Map(center=[-9,-51], zoom=4)
Map2.addLayer(lc_consistent_change_two_years_image.select(['1990']),changeDetectionViz,name='1990 Consistent Change')
Map2.addLayer(states_simple.select(['1990']),simpleStatesViz,name='1990')
Map2.addLayer(states_simple.select(['1991']),simpleStatesViz,name='1991')
Map2.addLayer(states_simple.select(['1992']),simpleStatesViz,name='1992')
display(Map2)


Map(center=[-9, -51], controls=(WidgetControl(options=['position'], widget=HBox(children=(ToggleButton(value=F…

In [10]:
#Get mask of when tiles had one change, we only want to sample pixels that had at least one change
# i.e. mask pixels that had no change
change_occured = lc_one_change_image.reduce(ee.Reducer.max())
change_occured = change_occured.select(change_occured.bandNames(),['one_change_occurred']).selfMask()
#Find pixels that had at least one year of consistent change, add 1 so that 0 can be the mask/no data value
consistent_change_occurred = lc_consistent_change_two_years_image.reduce(ee.Reducer.max()).add(1)
#Update mask
consistent_change_occurred_masked = consistent_change_occurred.updateMask(change_occured)
#Rename band from "max" from reducer to "consistent_change"
consistent_change_occurred_masked = consistent_change_occurred_masked.select(consistent_change_occurred_masked.bandNames(),['consistent_change'])
#Consistent change raster is now coded:
# 0 = no data, change did not occur in this pixel in any year
# 1 = no consistent change in this pixel in any year
# 2 = at least one year of consistent change occurred in this pixel

print(consistent_change_occurred_masked.getInfo())

#Define color palettes and map
one_change_detection_palette = ['379c4d','04e735']
oneChangeDetectionViz = {'min': 0, 'max': 1, 'palette': one_change_detection_palette};

consistent_change_detection_palette = ['df07b5','0741df']
consistentChangeDetectionViz = {'min': 1, 'max': 2, 'palette': consistent_change_detection_palette};

Map3 = geemap.Map(center=[-9,-51], zoom=4)
Map3.addLayer(change_occured.updateMask(change_occured),oneChangeDetectionViz,name='One Change')
#Light green shows one change occurred, dark green shows no change occurred (now masked so only dark green should show)
Map3.addLayer(consistent_change_occurred_masked,consistentChangeDetectionViz,name='Consistent Change')
#Pink shows there was not consistent change, blue shows consistent change
display(Map3)
#Pink is no change
#Blue is change

#Consistent change raster is now coded:
# 0 = no data, change did not occur in this pixel in any year
# 1 = no consistent change in this pixel in any year (PINK)
# 2 = at least one year of consistent change occurred in this pixel (BLUE)


{'type': 'Image', 'bands': [{'id': 'consistent_change', 'data_type': {'type': 'PixelType', 'precision': 'int', 'min': 1, 'max': 2}, 'crs': 'EPSG:4326', 'crs_transform': [1, 0, 0, 0, 1, 0]}]}


Map(center=[-9, -51], controls=(WidgetControl(options=['position'], widget=HBox(children=(ToggleButton(value=F…

In [14]:
#Functions for sampling images
#Function to sample image data at point locations (sampleBandPoints) and rename new property to image_name

def getStratifiedSampleBandPoints(image, region, bandName, **kwargs):
    '''
    Function to perform stratified sampling of an image over a given region
    Returns feature collection of sampled points along with coordinates
    '''
    #stratifiedSample(numPoints, classBand, region, scale, projection, seed, classValues, 
    #classPoints, dropNulls, tileScale, geometries)
    dargs = {
        'numPoints': 1000,
        'classBand': bandName,
        'region': region.geometry()
    }
    dargs.update(kwargs)
    print(dargs)
    stratified_sample = image.stratifiedSample(**dargs)
    return stratified_sample

def get_dataframe_from_feature_collection(feature_collection, property_names):
    '''
    Function to convert feature collection to pandas dataframe
    '''
    df = pd.DataFrame()
    for property_name in property_names:
        property_values = feature_collection.aggregate_array(property_name).getInfo()
        df[property_name] = property_values
    return df


def convert_points_df_to_feature_collection(df,projection='EPSG:4326',lat_name='latitude',lon_name='longitude'):
    '''
    Function to convert pandas dataframe of points to EE feature collection
    '''
    feature_collection_list = []
    for i,row in df.iterrows():
        geometry = ee.Geometry.Point([row[lon_name],row[lat_name]],projection)
        row_dict = row.to_dict()
        row_feature = ee.Feature(geometry,row_dict)
        feature_collection_list.append(row_feature)
    return ee.FeatureCollection(feature_collection_list)



In [18]:
#Load in feature collection to sample over
Brazil_adm1 = ee.FeatureCollection('users/listerkristineanne/DynamicWorld/ChangeDetection/MapBiomas/Brazil_adm1')
Brazil_adm0 = ee.FeatureCollection('users/listerkristineanne/DynamicWorld/ChangeDetection/MapBiomas/Brazil_adm0')

#Reproject consistent change image to the original projection, Earth Engine will not force this calculation
#until we do so

numPoints = 5000

consistent_change_occurred_masked_reprj = consistent_change_occurred_masked.addBands(ee.Image.pixelLonLat())
# consistent_change_occurred_masked_reprj = consistent_change_occurred_masked_reprj.reproject(crs=crs,
#                                                                                       crsTransform=crsTransform)
#Sample points
sample_points = getStratifiedSampleBandPoints(consistent_change_occurred_masked_reprj,Brazil_adm0,
                                     'consistent_change',numPoints=numPoints,seed=num_seed,scale=scale,projection=crs)


{'numPoints': 5000, 'classBand': 'consistent_change', 'region': <ee.geometry.Geometry object at 0x12d207dd8>, 'seed': 30}


In [19]:
#At higher resolution, the calculation times out to print to the client side,
#So we will export it the google drive of the current user

export_sample_points_task = ee.batch.Export.table.toDrive(
    collection=sample_points, 
    description = "SamplePoints_10K_new", 
    fileNamePrefix = 'SamplePoints_10K_new')

export_sample_points_task.start()
print(export_sample_points_task)


<Task EXPORT_FEATURES: SamplePoints_10K_new (UNSUBMITTED)>


Once task export is complete, the file will be exported to google drive. Copy the file to github or load from file one local computer. I will upload all exports from the code to the github folder https://github.com/kristinelister/WRI-NGS-DynamicWorld/tree/master/MapBiomas_ChangeDetection/TrainingPoints

To load a CSV from this folder directly into the jupyter notebook, click the file you'd like to download ([for example this file](https://github.com/kristinelister/WRI-NGS-DynamicWorld/blob/master/MapBiomas_ChangeDetection/TrainingPoints/Sample_Points_1000.csv)) and click on the "Raw" button on the upper left corner of the preview window. It will lead you to a page [like this with the raw data](https://raw.githubusercontent.com/kristinelister/WRI-NGS-DynamicWorld/master/MapBiomas_ChangeDetection/TrainingPoints/Sample_Points_1000.csv) which you can directly read into a pandas dataframe using the "pd.read_csv(url)" command demonstrated below.

In [20]:
#Read in training points from github
training_points_url = 'https://raw.githubusercontent.com/wri/rw-dynamicworld-cd/master/TrainingPoints/SamplePoints_10K_new.csv'
training_points = pd.read_csv(training_points_url)

#Remove excess columns supplied by EE
columns_to_remove = ['system:index','.geo']
columns_to_keep = [x for x in list(training_points) if x not in columns_to_remove]
training_points = training_points[columns_to_keep]


# #Map results
# consistent_change_detection_palette = ['df07b5','0741df']
# consistentChangeDetectionViz = {'min': 1, 'max': 2, 'palette': consistent_change_detection_palette};
# Map4 = geemap.Map(center=[-9,-51], zoom=4)
# Map4.addLayer(consistent_change_occurred_masked,consistentChangeDetectionViz,name='Consistent Change')
# Map4.addLayer(training_points_fc,name='Sampled Points')
# #Pink shows there was not consistent change, blue shows consistent change
# display(Map4)


In [21]:
#Functions I've written to try to do this sampling

#Function to convert feature collection to pandas dataframe
def get_dataframe_from_feature_collection(feature_collection, property_names):
    df = pd.DataFrame()
    for property_name in property_names:
        property_values = feature_collection.aggregate_array(property_name).getInfo()
        df[property_name] = property_values
    return df

#Function to convert pandas dataframe to feature collection
def convert_points_df_to_feature_collection(df,projection='EPSG:4326',lat_name='latitude',lon_name='longitude'):
    feature_collection_list = []
    for i,row in df.iterrows():
        geometry = ee.Geometry.Point([row[lon_name],row[lat_name]])#,projection)
        row_dict = row.to_dict()
        row_feature = ee.Feature(geometry,row_dict)
        feature_collection_list.append(row_feature)
    return ee.FeatureCollection(feature_collection_list)

#Function to convert pandas dataframe to feature collection
def convert_point_df_to_feature(series,projection='EPSG:4326',lat_name='latitude',lon_name='longitude'):
    geometry = ee.Geometry.Point([series[lon_name],series[lat_name]])#,projection)
    row_dict = series.to_dict()
    row_feature = ee.Feature(geometry,row_dict)
    return row_feature

#Function to sample image data at point locations (sampleBandPoints) and rename new property to image_name
def getSampleImageData(image, sampleBandPoints, image_name):
    #Sample image data at point locations
    sampleImageData = image.reduceRegions(
        collection=sampleBandPoints,
        reducer=ee.Reducer.first(),
        tileScale= 16,
        crs=crs,
        crsTransform=crsTransform
        )
    #Rename sampled values from "first" to image_name
    sampleImageData = sampleImageData.map(lambda x: x.set({image_name:x.get('first')}))
    return sampleImageData


In [22]:

#Define list of images and the corresponding names (which will be used as column names) to sample for covariates
image_list = [lc_one_change_image,lc_consistent_change_two_years_image]

image_name_list = ['one_change','consistent_change_two_years']

#Reproject images to original reprojection
#image_list = [x.reproject(crs=crs,crsTransform=crsTransform) for x in image_list]

#Define years to sample value sin
years = lc_consistent_change_two_years_image.bandNames().getInfo()
print(years)




['1985', '1986', '1987', '1988', '1989', '1990', '1991', '1992', '1993', '1994', '1995', '1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015']


We now have a database of point locations within Brazil, all of which had at least one year of change, and half of  which had at least one year of consistent change. However we don't know which years these changes occured.

Therefore we need to sample the images of "one change occurred" and "consistent change occured", which contain 33 bands, one for each year, and get the status of "one change" and "consistent change" for each year at each point location. 

The results of this sampling will create a wide database, with 33 columns of "one change" status at each year and "consistent change" status at each year (creating 66 new columns). We will then collapse this database to get a narrower version which shows the year of change.

In [24]:
#Convert to feature collection
#

print(len(training_points))
n=5000
list_df = [training_points[i:i+n] for i in range(0,training_points.shape[0],n)]

for i,df in enumerate(list_df):
    #if i==0:
    training_points_fc = convert_points_df_to_feature_collection(df)
    print(training_points_fc.first().getInfo())
    
    #Loop over year values
    for index, year in enumerate(years):
        
        #Loop over images that will be sampled
        for zippy in zip(image_list,image_name_list):
            image = zippy[0]
            image_name = zippy[1]+'_{}'.format(year)
            #Select year 
            image = image.select(year)
            #Sample image values at point locations
            training_points_fc = getSampleImageData(image, training_points_fc,image_name)
            
    #Convert list of feature collections to one feature collection
    out_feature_collection = ee.FeatureCollection(training_points_fc).flatten()
    
    export_sample_points_task = ee.batch.Export.table.toDrive(
        collection=training_points_fc, 
        description = "Sample_Points_10K_wide_{}".format(i), 
        fileNamePrefix = 'Sample_Points_10K_wide_{}'.format(i),
        folder = 'Dynamic World')

    export_sample_points_task.start()
    print(export_sample_points_task)

10000
{'type': 'Feature', 'geometry': {'type': 'Point', 'coordinates': [-61.45891389950018, 2.192742692771548]}, 'id': '0', 'properties': {'consistent_change': 1, 'latitude': 2.192742692771548, 'longitude': -61.45891389950018}}
<Task EXPORT_FEATURES: Sample_Points_10K_wide_0 (UNSUBMITTED)>
{'type': 'Feature', 'geometry': {'type': 'Point', 'coordinates': [-65.01462545710206, -3.0009569538938767]}, 'id': '0', 'properties': {'consistent_change': 2, 'latitude': -3.0009569538938767, 'longitude': -65.01462545710206}}
<Task EXPORT_FEATURES: Sample_Points_10K_wide_1 (UNSUBMITTED)>


In [25]:
# download_folder = '/Users/kristine/WRI/NationalGeographic/DynamicWorld/ChangeDetection/training_points/DriveDownloads'
# in_csv_list = glob.glob(os.path.join(download_folder,'*.csv'))
# training_points_wide = pd.DataFrame()
# for in_csv in in_csv_list:
#     in_df = pd.read_csv(in_csv)
#     training_points_wide = training_points_wide.append(in_df,ignore_index=True)
# display(training_points_wide)
training_points_wide_url = 'https://raw.githubusercontent.com/wri/rw-dynamicworld-cd/master/TrainingPoints/SamplePoints_10k_wide.csv'
training_points_wide = pd.read_csv(training_points_wide_url)
columns_to_remove = ['system:index','.geo']
columns_to_keep = [x for x in list(training_points_wide) if x not in columns_to_remove]
training_points_wide = training_points_wide[columns_to_keep]

display(training_points_wide)
print(list(training_points_wide))


Unnamed: 0.1,Unnamed: 0,consistent_change,consistent_change_two_years_1985,consistent_change_two_years_1986,consistent_change_two_years_1987,consistent_change_two_years_1988,consistent_change_two_years_1989,consistent_change_two_years_1990,consistent_change_two_years_1991,consistent_change_two_years_1992,...,one_change_2006,one_change_2007,one_change_2008,one_change_2009,one_change_2010,one_change_2011,one_change_2012,one_change_2013,one_change_2014,one_change_2015
0,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,3,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,4,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9995,2,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9996,9996,2,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9997,9997,2,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9998,9998,2,0,0,0,0,1,0,0,0,...,0,0,0,0,0,1,0,0,0,1


['Unnamed: 0', 'consistent_change', 'consistent_change_two_years_1985', 'consistent_change_two_years_1986', 'consistent_change_two_years_1987', 'consistent_change_two_years_1988', 'consistent_change_two_years_1989', 'consistent_change_two_years_1990', 'consistent_change_two_years_1991', 'consistent_change_two_years_1992', 'consistent_change_two_years_1993', 'consistent_change_two_years_1994', 'consistent_change_two_years_1995', 'consistent_change_two_years_1996', 'consistent_change_two_years_1997', 'consistent_change_two_years_1998', 'consistent_change_two_years_1999', 'consistent_change_two_years_2000', 'consistent_change_two_years_2001', 'consistent_change_two_years_2002', 'consistent_change_two_years_2003', 'consistent_change_two_years_2004', 'consistent_change_two_years_2005', 'consistent_change_two_years_2006', 'consistent_change_two_years_2007', 'consistent_change_two_years_2008', 'consistent_change_two_years_2009', 'consistent_change_two_years_2010', 'consistent_change_two_years

In [None]:

# first_change_column = ee.List(change_columns[0])
# second_change_column = ee.List(change_columns[1])
# years_ee_list = ee.List(years)

# def convert_wide_to_short(feature):
#     first_change_dict = feature.toDictionary(first_change_column)
    

In [26]:

def get_years_of_consistent_change(df,column_names,years):
    '''
    Function to convert wide column of indicator function to narrow column of year of change occurence
    '''
    for column_name in column_names:
        df[column_name] = ''
        match_columns = [x for x in list(df) if column_name in x]
        for i,row in df.iterrows():
            row = row[match_columns]
            positive_columns = [x for x in match_columns if row[x]==1]
            if len(positive_columns) == 0:
                df.at[i,column_name] = None
            else:
                years_of_change = [x for x in years if str(x) in str(positive_columns)]
                years_of_change = ' '.join([str(item) for item in years_of_change])
                df.at[i,column_name] = years_of_change
            
    return df

#Define list of column prefixes
ordered_columns = ['one_change','consistent_change_two_years']
#Get list of year names to loop over
year_names = years#[intBandNames[x] for x in years_for_three_change]

#Apply function to convert wide frame to narrow frame
collapsed_df = get_years_of_consistent_change(training_points_wide,ordered_columns,year_names)
collapsed_df = collapsed_df[ordered_columns]
#Merge with sample point locations and consistent change status
merged_collapsed_df = pd.concat([training_points,collapsed_df],axis=1)
#Export to csv
merged_collapsed_df.to_csv('/Users/kristine/Downloads/Sample_Points_10K_narrow.csv',index=False)
#Display
merged_collapsed_df

Unnamed: 0,consistent_change,latitude,longitude,one_change,consistent_change_two_years
0,1,2.192743,-61.458914,,
1,1,-14.909923,-49.960388,,
2,1,-8.754936,-47.672649,,
3,1,-2.937356,-49.114984,,
4,1,-12.243004,-64.460814,,
...,...,...,...,...,...
9995,2,-18.958809,-52.877128,1985 1986,1986
9996,2,-9.139505,-48.105457,1994 1997 1998,1994 1998
9997,2,-16.007305,-59.978850,2002,2002
9998,2,-3.620525,-61.218525,1989 1993 1995 1996 1997 2000 2002 2011 2015,1989 1993 1997 2000 2002 2011 2015


In [27]:
#Copy dataframe

columns = ['consistent_change','latitude','longitude','year']

one_year_df = pd.DataFrame()
for i,row in merged_collapsed_df.copy().iterrows():
    if (row['one_change'] is not None) and (row['consistent_change_two_years'] is not None):
        one_change_years = row['one_change'].split(' ')
        two_change_years = row['consistent_change_two_years'].split(' ')
        
        one_change_years = [x for x in one_change_years if x not in two_change_years]
        
        for j,year in enumerate(two_change_years):
            new_row = row.copy()
            new_row['consistent_change'] =1
            new_row['year'] = year
            one_year_df = one_year_df.append(new_row[columns],ignore_index=True)
        for j,year in enumerate(one_change_years):
            new_row = row.copy()
            new_row['consistent_change'] = 0
            new_row['year'] = year
            one_year_df = one_year_df.append(new_row[columns],ignore_index=True)
display(one_year_df)

Unnamed: 0,consistent_change,latitude,longitude,year
0,1.0,-3.000957,-65.014625,1985
1,1.0,-3.000957,-65.014625,2003
2,1.0,-3.000957,-65.014625,2005
3,1.0,-19.649524,-47.603119,1989
4,1.0,-19.649524,-47.603119,2005
...,...,...,...,...
11489,1.0,-3.620525,-61.218525,2011
11490,1.0,-3.620525,-61.218525,2015
11491,0.0,-3.620525,-61.218525,1995
11492,0.0,-3.620525,-61.218525,1996


In [28]:
print(len(one_year_df[one_year_df['consistent_change']==1]))
print(len(one_year_df[one_year_df['consistent_change']==0]))


9845
1649


As you can see above, the point locations often experience multiple years of change or multiple years of consistent change. For our purposes we need to either split these years into multiple rows/observations or pick one year to look at. The cell below will randomly select one year if there are multiple years in order to have each row correspond to one year of change

In [32]:
# #Copy dataframe
# one_year_df = merged_collapsed_df.copy()
# #Loop through rows of dataframe
# for i,row in one_year_df.iterrows():
#     #If the row has consistent change
#     if row['consistent_change_two_years'] is not None:
#         #Replace "2" value in consistent change with "1"
#         one_year_df.at[i,'consistent_change'] = 1
#         #Split element into list and choose a random sample
#         years = row['consistent_change_two_years'].split(' ')
#         one_year_df.at[i,'year'] = random.choice(years)
            
#     else:
#         #Replace "1" value with "0"
#         one_year_df.at[i,'consistent_change'] = 0
#         #Split element into list and choose a random sample
#         years = row['one_change'].split(' ')
#         one_year_df.at[i,'year'] = random.choice(years)

#Select appropriate columns and display
#one_year_df = one_year_df[['consistent_change','latitude','longitude','year']]
#Sample_Points_1000
one_year_df.to_csv('/Users/kristine/Downloads/Sample_Points_10K_wchange.csv',index=False)
one_year_df

Unnamed: 0,consistent_change,latitude,longitude,year
0,1.0,-3.000957,-65.014625,1985
1,1.0,-3.000957,-65.014625,2003
2,1.0,-3.000957,-65.014625,2005
3,1.0,-19.649524,-47.603119,1989
4,1.0,-19.649524,-47.603119,2005
...,...,...,...,...
11489,1.0,-3.620525,-61.218525,2011
11490,1.0,-3.620525,-61.218525,2015
11491,0.0,-3.620525,-61.218525,1995
11492,0.0,-3.620525,-61.218525,1996
