In [1]:
import pandas as pd
import numpy as np
import time
import ee
import pickle
ee.Initialize()

In [2]:
PATCH_SIZE = 1250

# Set up bounding box

In [3]:
def square(lat=45.475649, lon=-69.471018, size=100):
  crs_proj = "EPSG:4326"  
  return ee.Geometry.Point([lon, lat], proj=crs_proj).buffer(size).bounds()

In [4]:
srtm = ee.Image('USGS/SRTMGL1_003')
slope = ee.Terrain.slope(srtm)
jrc = ee.Image("JRC/GSW1_2/GlobalSurfaceWater").select("seasonality", "recurrence")

def get_stats(image=ee.Image('USGS/SRTMGL1_003'), lat=45.475649, lon=-69.471018, size=100):
    
    try:

        mean = image.reduceRegion(
                reducer = ee.Reducer.mean(),
                geometry = square(lat, lon, size),
                scale = 30,
                maxPixels = 1e9
            ).getInfo()

        stdDev = image.reduceRegion(
                reducer = ee.Reducer.stdDev(),
                geometry = square(lat, lon, size),
                scale = 30,
                maxPixels = 1e9
            ).getInfo()#.get('elevation')

        maxMin = image.reduceRegion(
                reducer = ee.Reducer.minMax(),
                geometry = square(lat, lon, size),
                scale = 30,
                maxPixels = 1e9
        ).getInfo()
        return mean, stdDev, maxMin

    except Exception as e:
        print(e)
        return np.nan, np.nan, np.nan
    



def get_transition(lat=45.475649, lon=-69.471018, size=100):
    try:
        result = ee.Image("JRC/GSW1_2/GlobalSurfaceWater").select("transition").reduceRegion(
                    reducer = ee.Reducer.frequencyHistogram(),
                    geometry = square(lat, lon, size),
                    scale = 30,
                    maxPixels = 1e9
            ).getInfo()
        return result.get('transition')
    except Exception as e:
        print(e)
        return np.nan

In [5]:
# read in csv file 
# df_m = pd.read_csv("combined_regular_clean_with_ssurgo_variables.csv")
df_m = pd.read_pickle("cwr_nwpr_dataset")

In [6]:
def extract_image_stats(df_m_, i, size):

  df_m_["srtm_stats"] = (df_m_.apply(lambda x: 
                                      get_stats(image=ee.Image('USGS/SRTMGL1_003'),
                                                lat=x.latitude, 
                                                lon=x.longitude, 
                                                size=size), 
                                      axis=1))
  df_m_["slope_stats"] = (df_m_.apply(lambda x: 
                                      get_stats(image = ee.Terrain.slope(ee.Image('USGS/SRTMGL1_003')),
                                                lat=x.latitude, 
                                                lon=x.longitude, 
                                                size=size), 
                                      axis=1))

  df_m_["seasonality_stats"] = (df_m_.apply(lambda x: 
                                      get_stats(image=ee.Image("JRC/GSW1_2/GlobalSurfaceWater").select("seasonality"),
                                                lat=x.latitude, 
                                                lon=x.longitude, 
                                                size=size), 
                                      axis=1))

  df_m_["recurrence_stats"] = (df_m_.apply(lambda x: 
                                      get_stats(image=ee.Image("JRC/GSW1_2/GlobalSurfaceWater").select("recurrence"),
                                                lat=x.latitude, 
                                                lon=x.longitude, 
                                                size=size), 
                                      axis=1))
    

  df_m_["transition_hist"] = (df_m_.apply(lambda x: 
                                      get_transition(lat=x.latitude, 
                                                lon=x.longitude, 
                                                size=size), 
                                      axis=1))
  
  # pickle the dataframe 
  (pickle.dump(df_m_, open("cwr_nwpr/" + str(2 * PATCH_SIZE) + "m_SRTM/image_stats_" 
                           + str(2*PATCH_SIZE) + "X" + str(2*PATCH_SIZE)
                           + "_part" + str(i),"wb"), 
               protocol=3))        

In [7]:
from datetime import datetime
print(datetime.now())

2021-04-01 19:45:57.565250


In [8]:
# pass in batches of 500
# MADHUKAR: 0 - 5000 
# SHOBHA: 5000 - 10000
# RADHIKA: 10000 - 15000

patch_size = PATCH_SIZE
batch_size = 50
MY_NAME = "MADHUKAR"
START = 0 + 5000 * (MY_NAME == "SHOBHA") + 10000 * (MY_NAME == "RADHIKA")

for batch in range(10,65):
  print("batch {} of 10 started".format(batch + 1))
  batch_df = df_m[START + batch_size * batch : START + batch_size * (batch + 1)].copy()
  extract_image_stats(batch_df, (START + batch_size * batch) + 1, size = patch_size)
  print("batch {} of 10 done".format(batch + 1))
    


batch 1 of 10 started
batch 1 of 10 done
batch 2 of 10 started
batch 2 of 10 done
batch 3 of 10 started
batch 3 of 10 done
batch 4 of 10 started
batch 4 of 10 done
batch 5 of 10 started
batch 5 of 10 done
batch 6 of 10 started
batch 6 of 10 done
batch 7 of 10 started
batch 7 of 10 done
batch 8 of 10 started
batch 8 of 10 done
batch 9 of 10 started
batch 9 of 10 done
batch 10 of 10 started
batch 10 of 10 done


In [9]:
# # pass in batches of 500
# # MADHUKAR: 0 - 5000 
# # SHOBHA: 5000 - 10000
# # RADHIKA: 10000 - 15000

# patch_size = PATCH_SIZE
# batch_size = 500
# MY_NAME = "SHOBHA"
# START = 0 + 5000 * (MY_NAME == "SHOBHA") + 10000 * (MY_NAME == "RADHIKA")

# for batch in range(10):
#   print("batch {} of 10 started".format(batch + 1))
#   batch_df = df_m[START + batch_size * batch : START + batch_size * (batch + 1)].copy()
#   extract_image_stats(batch_df, (START + batch_size * batch) + 1, size = patch_size)
#   print("batch {} of 10 done".format(batch + 1))


In [10]:
# # 6 of 10 batches done

# patch_size = PATCH_SIZE
# batch_size = 500
# MY_NAME = "RADHIKA"
# START = 0 + 5000 * (MY_NAME == "SHOBHA") + 10000 * (MY_NAME == "RADHIKA")

# for batch in range(6, 10):
#   print("batch {} of 10 started".format(batch + 1))
#   batch_df = df_m[START + batch_size * batch : START + batch_size * (batch + 1)].copy()
#   extract_image_stats(batch_df, (START + batch_size * batch) + 1, size = patch_size)
#   print("batch {} of 10 done".format(batch + 1))


In [11]:
from datetime import datetime
print(datetime.now())

2021-04-01 20:05:44.184861


In [12]:
df_m_test = pd.read_pickle("cwr_nwpr/" + str(2 * PATCH_SIZE) + "m_SRTM/image_stats_" + str(2 * PATCH_SIZE) + "X" + str(2 * PATCH_SIZE) + "_part1")
df_m_test.columns

Index(['jurisdiction_type', 'da_number', 'latitude', 'longitude',
       'cwa_determination', 'Index', 'srtm_stats', 'slope_stats',
       'seasonality_stats', 'recurrence_stats', 'transition_hist'],
      dtype='object')

In [13]:
for item in df_m_test.transition_hist:
    print(item)
# df_m_test.image_stats


{}
{}
{'10': 2.768627450980392, '6': 7.8}
{'1': 2, '10': 15, '5': 12, '6': 1.9803921568627452, '8': 1}
{'1': 428.129411764706, '10': 6, '2': 1, '4': 186.17254901960786, '5': 30.28235294117647, '6': 2, '7': 12, '8': 34}
{'0': 2, '1': 49.27843137254902, '10': 7.639215686274509, '2': 64.31372549019608, '4': 16.427450980392155, '5': 55.66274509803922, '6': 2, '7': 1.3254901960784313}
{'10': 19, '2': 22, '3': 1, '5': 27, '6': 12}
{'10': 5, '4': 14, '5': 17, '6': 3, '7': 4}
{'2': 1.7882352941176471, '4': 0.9058823529411765, '5': 2}
{'1': 10, '10': 3, '2': 2, '4': 16, '5': 106.46666666666667, '8': 17}
{'0': 4, '1': 5, '10': 49.27058823529411, '2': 1, '4': 26.533333333333335, '5': 172.24705882352933, '6': 2, '8': 63}
{'10': 10, '4': 3, '5': 10, '6': 31}
{'1': 320.87058823529406, '10': 9, '3': 3, '4': 75.56470588235294, '5': 68.4, '6': 2, '7': 3, '8': 29.65882352941177}
{'1': 11, '10': 6, '2': 10, '4': 30, '5': 49, '6': 5, '7': 5}
{'1': 1135.8039215686274, '10': 2, '2': 49, '4': 134.94117647058