# Split Data for Cross Validation
This scipt divides up the 10 years of available data into 10 folds plus the hold-out data for final testing. Data can be exported to Google Drive or to Google Earth Engine assests.

In [None]:
# accesses earth engine API
import ee
ee.Authenticate()
ee.Initialize(project='ee-aspenjkmorgan')

In [None]:
# import PM-2.5, AOD and climate data using full cloud mask
data1 = ee.FeatureCollection('projects/ee-aspenjkmorgan/assets/Data/Mul_Var_2012-2015')
data2 = ee.FeatureCollection('projects/ee-aspenjkmorgan/assets/Data/Mul_Var_2015-2018')
data3 = ee.FeatureCollection('projects/ee-aspenjkmorgan/assets/Data/Mul_Var_2018-2020')
data4 = ee.FeatureCollection('projects/ee-aspenjkmorgan/assets/Data/Mul_Var_2020-2023')
all_data = ee.FeatureCollection([data1, data2, data3, data4]).flatten()

In [None]:
# export as feature collections, use these for modeling
def exportToAsset(FC, name):
  ee.batch.Export.table.toAsset(**{
    'collection': FC,
    'description': name,
    'assetId': 'projects/ee-aspenjkmorgan/assets/Folds/' + name}).start()

def exportToDrive(FC, name):
  ee.batch.Export.table.toDrive(**{
    'collection': FC,
    'description': name,
    'folder': 'k_folds',
    'fileNamePrefix': name,
    'fileFormat': 'CSV'}).start()

In [None]:
data = all_data.randomColumn()

# Divide all data into k folds
f0 = data.filter(ee.Filter.lt('random', 0.07))
# exportToDrive(f0, 'f0_v2')
exportToAsset(f0, 'f0')
f1 = data.filter(ee.Filter.And(ee.Filter.gte('random', 0.07), ee.Filter.lt('random', 0.14)))
# exportToDrive(f1, 'f1_v2')
exportToAsset(f1, 'f1')
f2 = data.filter(ee.Filter.And(ee.Filter.gte('random', 0.14), ee.Filter.lt('random', 0.21)))
# exportToDrive(f2, 'f2_v2')
exportToAsset(f2, 'f2')
f3 = data.filter(ee.Filter.And(ee.Filter.gte('random', 0.21), ee.Filter.lt('random', 0.28)))
# exportToDrive(f3, 'f3_v2')
exportToAsset(f3, 'f3')
f4 = data.filter(ee.Filter.And(ee.Filter.gte('random', 0.28), ee.Filter.lt('random', 0.35)))
# exportToDrive(f4, 'f4_v2')
exportToAsset(f4, 'f4')
f5 = data.filter(ee.Filter.And(ee.Filter.gte('random', 0.35), ee.Filter.lt('random', 0.42)))
# exportToDrive(f5, 'f5_v2')
exportToAsset(f5, 'f5')
f6 = data.filter(ee.Filter.And(ee.Filter.gte('random', 0.42), ee.Filter.lt('random', 0.49)))
# exportToDrive(f6, 'f6_v2')
exportToAsset(f6, 'f6')
f7 = data.filter(ee.Filter.And(ee.Filter.gte('random', 0.49), ee.Filter.lt('random', 0.56)))
# exportToDrive(f7, 'f7_v2')
exportToAsset(f7, 'f7')
f8 = data.filter(ee.Filter.And(ee.Filter.gte('random', 0.56), ee.Filter.lt('random', 0.63)))
# exportToDrive(f8, 'f8_v2')
exportToAsset(f8, 'f8')
f9 = data.filter(ee.Filter.And(ee.Filter.gte('random', 0.63), ee.Filter.lt('random', 0.7)))
# exportToDrive(f9, 'f9_v2')
exportToAsset(f9, 'f9')
test = data.filter(ee.Filter.gte('random', 0.7))
# exportToDrive(test, 'test_v2')
exportToAsset(test, 'test')