In [1]:
import pandas as pd
import numpy as np
import time
import ee
import pickle
ee.Initialize()

In [2]:
PATCH_SIZE = 500

# Set up bounding box

In [3]:
def square(lat=45.475649, lon=-69.471018, size=100):
  crs_proj = "EPSG:4326"  
  return ee.Geometry.Point([lon, lat], proj=crs_proj).buffer(size).bounds()

In [4]:
srtm = ee.Image('USGS/SRTMGL1_003')
slope = ee.Terrain.slope(srtm)
jrc = ee.Image("JRC/GSW1_2/GlobalSurfaceWater").select("seasonality", "recurrence")

def get_stats(image=ee.Image('USGS/SRTMGL1_003'), lat=45.475649, lon=-69.471018, size=100):
    
    try:

        mean = image.reduceRegion(
                reducer = ee.Reducer.mean(),
                geometry = square(lat, lon, size),
                scale = 30,
                maxPixels = 1e9
            ).getInfo()

        stdDev = image.reduceRegion(
                reducer = ee.Reducer.stdDev(),
                geometry = square(lat, lon, size),
                scale = 30,
                maxPixels = 1e9
            ).getInfo()#.get('elevation')

        maxMin = image.reduceRegion(
                reducer = ee.Reducer.minMax(),
                geometry = square(lat, lon, size),
                scale = 30,
                maxPixels = 1e9
        ).getInfo()
        return mean, stdDev, maxMin

    except Exception as e:
        print(e)
        return np.nan, np.nan, np.nan
    



def get_transition(lat=45.475649, lon=-69.471018, size=100):
    try:
        result = ee.Image("JRC/GSW1_2/GlobalSurfaceWater").select("transition").reduceRegion(
                    reducer = ee.Reducer.frequencyHistogram(),
                    geometry = square(lat, lon, size),
                    scale = 30,
                    maxPixels = 1e9
            ).getInfo()
        return result.get('transition')
    except Exception as e:
        print(e)
        return np.nan

In [5]:
# read in csv file 
# df_m = pd.read_csv("combined_regular_clean_with_ssurgo_variables.csv")
df_m = pd.read_pickle("cwr_nwpr_dataset")

In [6]:
def extract_image_stats(df_m_, i, size):

  df_m_["srtm_stats"] = (df_m_.apply(lambda x: 
                                      get_stats(image=ee.Image('USGS/SRTMGL1_003'),
                                                lat=x.latitude, 
                                                lon=x.longitude, 
                                                size=size), 
                                      axis=1))
  df_m_["slope_stats"] = (df_m_.apply(lambda x: 
                                      get_stats(image = ee.Terrain.slope(ee.Image('USGS/SRTMGL1_003')),
                                                lat=x.latitude, 
                                                lon=x.longitude, 
                                                size=size), 
                                      axis=1))

  df_m_["seasonality_stats"] = (df_m_.apply(lambda x: 
                                      get_stats(image=ee.Image("JRC/GSW1_2/GlobalSurfaceWater").select("seasonality"),
                                                lat=x.latitude, 
                                                lon=x.longitude, 
                                                size=size), 
                                      axis=1))

  df_m_["recurrence_stats"] = (df_m_.apply(lambda x: 
                                      get_stats(image=ee.Image("JRC/GSW1_2/GlobalSurfaceWater").select("recurrence"),
                                                lat=x.latitude, 
                                                lon=x.longitude, 
                                                size=size), 
                                      axis=1))
    

  df_m_["transition_hist"] = (df_m_.apply(lambda x: 
                                      get_transition(lat=x.latitude, 
                                                lon=x.longitude, 
                                                size=size), 
                                      axis=1))
  
  # pickle the dataframe 
  (pickle.dump(df_m_, open("cwr_nwpr/" + str(2 * PATCH_SIZE) + "m_SRTM/image_stats_" 
                           + str(2*PATCH_SIZE) + "X" + str(2*PATCH_SIZE)
                           + "_part" + str(i),"wb"), 
               protocol=3))        

In [7]:
from datetime import datetime
print(datetime.now())

2021-04-01 19:45:48.065919


In [8]:
# pass in batches of 500
# MADHUKAR: 0 - 5000 
# SHOBHA: 5000 - 10000
# RADHIKA: 10000 - 15000

patch_size = PATCH_SIZE
batch_size = 50
MY_NAME = "MADHUKAR"
START = 0 + 5000 * (MY_NAME == "SHOBHA") + 10000 * (MY_NAME == "RADHIKA")

for batch in range(10,65):
  print("batch {} of 10 started".format(batch + 1))
  batch_df = df_m[START + batch_size * batch : START + batch_size * (batch + 1)].copy()
  extract_image_stats(batch_df, (START + batch_size * batch) + 1, size = patch_size)
  print("batch {} of 10 done".format(batch + 1))
    


batch 1 of 10 started
batch 1 of 10 done
batch 2 of 10 started
batch 2 of 10 done
batch 3 of 10 started
batch 3 of 10 done
batch 4 of 10 started
batch 4 of 10 done
batch 5 of 10 started
batch 5 of 10 done
batch 6 of 10 started
batch 6 of 10 done
batch 7 of 10 started
batch 7 of 10 done
batch 8 of 10 started
batch 8 of 10 done
batch 9 of 10 started
batch 9 of 10 done
batch 10 of 10 started
batch 10 of 10 done


In [9]:
# # pass in batches of 500
# # MADHUKAR: 0 - 5000 
# # SHOBHA: 5000 - 10000
# # RADHIKA: 10000 - 15000

# patch_size = PATCH_SIZE
# batch_size = 500
# MY_NAME = "SHOBHA"
# START = 0 + 5000 * (MY_NAME == "SHOBHA") + 10000 * (MY_NAME == "RADHIKA")

# for batch in range(10):
#   print("batch {} of 10 started".format(batch + 1))
#   batch_df = df_m[START + batch_size * batch : START + batch_size * (batch + 1)].copy()
#   extract_image_stats(batch_df, (START + batch_size * batch) + 1, size = patch_size)
#   print("batch {} of 10 done".format(batch + 1))


In [10]:
# # 6 of 10 batches done

# patch_size = PATCH_SIZE
# batch_size = 500
# MY_NAME = "RADHIKA"
# START = 0 + 5000 * (MY_NAME == "SHOBHA") + 10000 * (MY_NAME == "RADHIKA")

# for batch in range(6, 10):
#   print("batch {} of 10 started".format(batch + 1))
#   batch_df = df_m[START + batch_size * batch : START + batch_size * (batch + 1)].copy()
#   extract_image_stats(batch_df, (START + batch_size * batch) + 1, size = patch_size)
#   print("batch {} of 10 done".format(batch + 1))


In [11]:
from datetime import datetime
print(datetime.now())

2021-04-01 20:05:44.078607


In [12]:
df_m_test = pd.read_pickle("cwr_nwpr/" + str(2 * PATCH_SIZE) + "m_SRTM/image_stats_" + str(2 * PATCH_SIZE) + "X" + str(2 * PATCH_SIZE) + "_part1")
df_m_test.columns

Index(['jurisdiction_type', 'da_number', 'latitude', 'longitude',
       'cwa_determination', 'Index', 'srtm_stats', 'slope_stats',
       'seasonality_stats', 'recurrence_stats', 'transition_hist'],
      dtype='object')

In [13]:
for item in df_m_test.transition_hist:
    print(item)
# df_m_test.image_stats


{}
{}
{}
{'10': 1.188235294117647, '5': 0.18823529411764706}
{'1': 99.30588235294121, '4': 51.152941176470584, '5': 4.768627450980393, '8': 6.3843137254901965}
{}
{'10': 5, '2': 14.913725490196079, '5': 19.91372549019608}
{}
{}
{}
{'10': 1, '5': 1}
{'10': 5, '5': 2.0274509803921568, '6': 16}
{'10': 6, '4': 10, '5': 10}
{'4': 0.3411764705882353, '5': 1, '6': 1}
{'1': 75.09411764705882, '10': 1, '2': 17.16078431372549, '4': 16, '5': 41.160784313725486, '8': 2.5803921568627453}
{}
{}
{'5': 0.5529411764705883}
{'5': 1}
{}
{'10': 5, '5': 4}
{}
{'0': 1, '1': 112.01568627450983, '10': 67.09803921568627, '2': 9, '4': 24.376470588235293, '5': 69.93333333333334, '6': 8, '8': 25.788235294117648}
{'10': 7.662745098039216, '4': 1.831372549019608, '5': 1.831372549019608, '6': 3.662745098039216}
{'10': 0.5411764705882353, '3': 0.5411764705882353}
{}
{'5': 4.5058823529411764}
{'1': 117.65098039215687, '2': 17.04705882352941, '4': 167.43137254901973, '5': 230.9960784313726, '6': 18, '7': 2, '8': 10}
{'