In [10]:
import pandas as pd
import numpy as np
import time
import ee
import pickle
ee.Initialize()

# Set up bounding box

In [11]:
def square(lat=45.475649, lon=-69.471018, size=100):
  crs_proj = "EPSG:4326"  
  return ee.Geometry.Point([lon, lat], proj=crs_proj).buffer(size).bounds()

In [12]:
srtm = ee.Image('USGS/SRTMGL1_003')
slope = ee.Terrain.slope(srtm)
jrc = ee.Image("JRC/GSW1_2/GlobalSurfaceWater").select("seasonality", "recurrence")

def get_stats(image=ee.Image('USGS/SRTMGL1_003'), lat=45.475649, lon=-69.471018, size=100):
    
    try:

        mean = image.reduceRegion(
                reducer = ee.Reducer.mean(),
                geometry = square(lat, lon, size),
                scale = 30,
                maxPixels = 1e9
            ).getInfo()

        stdDev = image.reduceRegion(
                reducer = ee.Reducer.stdDev(),
                geometry = square(lat, lon, size),
                scale = 30,
                maxPixels = 1e9
            ).getInfo()#.get('elevation')

        maxMin = image.reduceRegion(
                reducer = ee.Reducer.minMax(),
                geometry = square(lat, lon, size),
                scale = 30,
                maxPixels = 1e9
        ).getInfo()
        return mean, stdDev, maxMin

    except Exception as e:
        print(e)
        return np.nan, np.nan, np.nan
    



def get_transition(lat=45.475649, lon=-69.471018, size=100):
    try:
        result = ee.Image("JRC/GSW1_2/GlobalSurfaceWater").select("transition").reduceRegion(
                    reducer = ee.Reducer.frequencyHistogram(),
                    geometry = square(lat, lon, size),
                    scale = 30,
                    maxPixels = 1e9
            ).getInfo()
        return result.get('transition')
    except Exception as e:
        print(e)
        return np.nan

In [13]:
# read in csv file 
df_m = pd.read_csv("combined_regular_clean_with_ssurgo_variables.csv")

In [14]:
def extract_image_stats(df_m_, i, size):

  df_m_["srtm_stats"] = (df_m_.apply(lambda x: 
                                      get_stats(image=ee.Image('USGS/SRTMGL1_003'),
                                                lat=x.latitude, 
                                                lon=x.longitude, 
                                                size=size), 
                                      axis=1))
  df_m_["slope_stats"] = (df_m_.apply(lambda x: 
                                      get_stats(image = ee.Terrain.slope(ee.Image('USGS/SRTMGL1_003')),
                                                lat=x.latitude, 
                                                lon=x.longitude, 
                                                size=size), 
                                      axis=1))

  df_m_["seasonality_stats"] = (df_m_.apply(lambda x: 
                                      get_stats(image=ee.Image("JRC/GSW1_2/GlobalSurfaceWater").select("seasonality"),
                                                lat=x.latitude, 
                                                lon=x.longitude, 
                                                size=size), 
                                      axis=1))

  df_m_["recurrence_stats"] = (df_m_.apply(lambda x: 
                                      get_stats(image=ee.Image("JRC/GSW1_2/GlobalSurfaceWater").select("recurrence"),
                                                lat=x.latitude, 
                                                lon=x.longitude, 
                                                size=size), 
                                      axis=1))
    

  df_m_["transition_hist"] = (df_m_.apply(lambda x: 
                                      get_transition(lat=x.latitude, 
                                                lon=x.longitude, 
                                                size=size), 
                                      axis=1))
  
  # pickle the dataframe 
  (pickle.dump(df_m_, open("ImageStatsPickledFiles/image_stats_" 
                           + str(2*size) + "X" + str(2*size)
                           + "_part" + str(i),"wb"), 
               protocol=3))        

In [15]:
from datetime import datetime
print(datetime.now())

2021-03-14 10:48:54.238728


In [8]:
# pass in batches of 500
# MADHUKAR: 0 - 5000 
# SHOBHA: 5000 - 10000
# RADHIKA: 10000 - 15000

PATCH_SIZE = 1250

batch_size = 500
MY_NAME = "MADHUKAR"
START = 0 + 5000 * (MY_NAME == "SHOBHA") + 10000 * (MY_NAME == "RADHIKA")

for batch in range(10):
  print("batch {} of 10 started".format(batch + 1))
  batch_df = df_m[START + batch_size * batch : START + batch_size * (batch + 1)].copy()
  extract_image_stats(batch_df, (START + batch_size * batch) + 1, size = PATCH_SIZE)
  print("batch {} of 10 done".format(batch + 1))
    


batch 1 of 10 started
batch 1 of 10 done
batch 2 of 10 started
batch 2 of 10 done
batch 3 of 10 started
batch 3 of 10 done
batch 4 of 10 started
batch 4 of 10 done
batch 5 of 10 started
batch 5 of 10 done
batch 6 of 10 started
batch 6 of 10 done
batch 7 of 10 started
batch 7 of 10 done
batch 8 of 10 started
batch 8 of 10 done
batch 9 of 10 started
batch 9 of 10 done
batch 10 of 10 started
batch 10 of 10 done


In [16]:
# batch 9 of 10 done

batch_size = 500
MY_NAME = "SHOBHA"
START = 0 + 5000 * (MY_NAME == "SHOBHA") + 10000 * (MY_NAME == "RADHIKA")

for batch in range(9,10):
  print("batch {} of 10 started".format(batch + 1))
  batch_df = df_m[START + batch_size * batch : START + batch_size * (batch + 1)].copy()
  extract_image_stats(batch_df, (START + batch_size * batch) + 1, size = PATCH_SIZE)
  print("batch {} of 10 done".format(batch + 1))


batch 10 of 10 started
batch 10 of 10 done


In [17]:
# pass in batches of 500
# MADHUKAR: 0 - 5000 
# SHOBHA: 5000 - 10000
# RADHIKA: 10000 - 15000

batch_size = 500
MY_NAME = "RADHIKA"
START = 0 + 5000 * (MY_NAME == "SHOBHA") + 10000 * (MY_NAME == "RADHIKA")

for batch in range(10):
  print("batch {} of 10 started".format(batch + 1))
  batch_df = df_m[START + batch_size * batch : START + batch_size * (batch + 1)].copy()
  extract_image_stats(batch_df, (START + batch_size * batch) + 1, size = PATCH_SIZE)
  print("batch {} of 10 done".format(batch + 1))


batch 1 of 10 started
batch 1 of 10 done
batch 2 of 10 started
batch 2 of 10 done
batch 3 of 10 started
batch 3 of 10 done
batch 4 of 10 started
batch 4 of 10 done
batch 5 of 10 started
batch 5 of 10 done
batch 6 of 10 started
batch 6 of 10 done
batch 7 of 10 started
batch 7 of 10 done
batch 8 of 10 started
batch 8 of 10 done
batch 9 of 10 started
batch 9 of 10 done
batch 10 of 10 started
batch 10 of 10 done


In [18]:
from datetime import datetime
print(datetime.now())

2021-03-14 14:09:25.886341


In [20]:
df_m_test = pd.read_pickle('ImageStatsPickledFiles/image_stats_2500X2500_part1')
df_m_test.columns

Index(['Unnamed: 0', 'Unnamed: 0.1', 'jurisdiction_type', 'da_number',
       'district', 'project_name', 'longitude', 'latitude',
       'date_issued_or_denied', 'rha_determination', 'cwa_determination',
       'rha1', 'rha2', 'cwa1', 'cwa2', 'cwa3', 'cwa4', 'cwa5', 'cwa6', 'cwa7',
       'cwa8', 'cwa9', 'potential_wetland', 'index', 'Index', 'mukey',
       'hydclprs', 'aws025wta', 'drclassdcd', 'srtm_stats', 'slope_stats',
       'seasonality_stats', 'recurrence_stats', 'transition_hist'],
      dtype='object')

In [21]:
for item in df_m_test.transition_hist:
    print(item)
# df_m_test.image_stats


{'1': 5153.20784313726, '10': 30, '2': 4, '4': 51, '5': 47.54509803921569, '6': 10, '7': 16, '8': 8}
{'1': 4481.329411764707, '10': 11, '2': 5, '4': 21, '5': 49, '6': 23.972549019607843, '7': 3, '8': 4}
{'1': 3174.3764705882363, '10': 8, '2': 1, '3': 1, '4': 113.08235294117647, '5': 29.776470588235295, '6': 9, '7': 15, '8': 47.16470588235295}
{}
{'1': 38, '10': 80, '2': 22.71372549019608, '3': 18, '4': 18, '5': 93.8549019607843, '6': 23, '8': 15, '9': 4}
{}
{'1': 5248.078431372527, '10': 30.376470588235293, '2': 2, '3': 20.752941176470586, '4': 155.65882352941173, '5': 59, '6': 75.38823529411764, '7': 4.376470588235295, '8': 408.8470588235294}
{'10': 1}
{'0': 0.7137254901960784, '10': 8, '2': 1, '5': 10.545098039215686, '6': 4}
{'1': 5257.447058823541, '10': 14, '3': 5, '4': 23.662745098039217, '5': 7, '6': 30, '8': 33.8}
{'1': 91.10588235294118, '10': 17.811764705882354, '2': 9, '4': 62, '5': 127.65882352941176, '6': 4.905882352941177, '7': 10, '8': 9}
{'0': 8, '1': 7016.376470588237,