# Split Data for Cross Validation
This scipt divides up the 10 years of available data into 10 folds plus the hold-out data for final testing. Data can be exported to Google Drive or to Google Earth Engine assests.

In [None]:
# accesses earth engine API
import ee
ee.Authenticate()
ee.Initialize()

To authorize access needed by Earth Engine, open the following URL in a web browser and follow the instructions. If the web browser does not start automatically, please manually browse the URL below.

    https://code.earthengine.google.com/client-auth?scopes=https%3A//www.googleapis.com/auth/earthengine%20https%3A//www.googleapis.com/auth/devstorage.full_control&request_id=61z0eeq2hKAc8CDKN0HjgrnL8x8XKwRbOG4cbAxBYB0&tc=SYET9WgzAV2UkcAgVjGcEST90FP_wIo-sb2lS7DCgWU&cc=Bt5yhYZn9pKsCtUcrJs9DYZ8WqivhP6Ur1SDg_Pw31A

The authorization workflow will generate a code, which you should paste in the box below.
Enter verification code: 4/1AfJohXkfbRDvJlNyS_Y0FzqeTeTK2NcNFW1rKcvudu8UyuV4ThqPeF7Dqrc

Successfully saved authorization token.


In [None]:
# import PM-2.5, AOD and climate data using full cloud mask
data1 = ee.FeatureCollection('users/aspenjkmorgan/Multivariable_FC/Full_Mask_2012-2015')
data2 = ee.FeatureCollection('users/aspenjkmorgan/Multivariable_FC/Full_Mask_2015-2018')
data3 = ee.FeatureCollection('users/aspenjkmorgan/Multivariable_FC/Full_Mask_2018-2020')
data4 = ee.FeatureCollection('users/aspenjkmorgan/Multivariable_FC/Full_Mask_2020-2023')
all_data = ee.FeatureCollection([data1, data2, data3, data4]).flatten()

In [None]:
# export as feature collections, use these for modeling
def exportToAsset(FC, name):
  ee.batch.Export.table.toAsset(**{
    'collection': FC,
    'description': name,
    'assetId': 'users/aspenjkmorgan/k_folds/' + name}).start()

def exportToDrive(FC, name):
  ee.batch.Export.table.toDrive(**{
    'collection': FC,
    'description': name,
    'folder': 'k_folds',
    'fileNamePrefix': name,
    'fileFormat': 'CSV'}).start()

In [None]:
data = all_data.randomColumn()

# Divide all data into k folds
f0 = data.filter(ee.Filter.lt('random', 0.09))
exportToDrive(f0, 'f0')
exportToAsset(f0, 'f0')
f1 = data.filter(ee.Filter.And(ee.Filter.gte('random', 0.09), ee.Filter.lt('random', 0.18)))
exportToDrive(f1, 'f1')
exportToAsset(f1, 'f1')
f2 = data.filter(ee.Filter.And(ee.Filter.gte('random', 0.18), ee.Filter.lt('random', 0.27)))
exportToDrive(f2, 'f2')
exportToAsset(f2, 'f2')
f3 = data.filter(ee.Filter.And(ee.Filter.gte('random', 0.27), ee.Filter.lt('random', 0.36)))
exportToDrive(f3, 'f3')
exportToAsset(f3, 'f3')
f4 = data.filter(ee.Filter.And(ee.Filter.gte('random', 0.36), ee.Filter.lt('random', 0.45)))
exportToDrive(f4, 'f4')
exportToAsset(f4, 'f4')
f5 = data.filter(ee.Filter.And(ee.Filter.gte('random', 0.45), ee.Filter.lt('random', 0.54)))
exportToDrive(f5, 'f5')
exportToAsset(f5, 'f5')
f6 = data.filter(ee.Filter.And(ee.Filter.gte('random', 0.54), ee.Filter.lt('random', 0.63)))
exportToDrive(f6, 'f6')
exportToAsset(f6, 'f6')
f7 = data.filter(ee.Filter.And(ee.Filter.gte('random', 0.63), ee.Filter.lt('random', 0.72)))
exportToDrive(f7, 'f7')
exportToAsset(f7, 'f7')
f8 = data.filter(ee.Filter.And(ee.Filter.gte('random', 0.72), ee.Filter.lt('random', 0.81)))
exportToDrive(f8, 'f8')
exportToAsset(f8, 'f8')
f9 = data.filter(ee.Filter.And(ee.Filter.gte('random', 0.81), ee.Filter.lt('random', 0.9)))
exportToDrive(f9, 'f9')
exportToAsset(f9, 'f9')
test = data.filter(ee.Filter.gte('random', 0.9))
exportToDrive(test, 'test')
exportToAsset(test, 'test')