In [22]:
from glob import glob
import pandas as pd
import geopandas as gpd
import ee
import numpy as np
from shapely.geometry import Point, Polygon
import folium
from folium.plugins import MarkerCluster
import binascii
import os
import sys
sys.path.append('/content')
from src.tools import Mapdisplay, random_point_in_shp
from src.tools import get_bound_points, generate_box_buffer, generate_ee_points
import src.tools as tools

ee.Initialize()
%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# open FIA data points

In [11]:
#import dask.dataframe as dd
#fiafile='/datadrive/fia/supp_data/PLOT.zip'
fiafile='/datadrive/fia/PNWRS_Annualized_Inventory_ActCoordinates_ChangMTA_200731.csv'
#plot = dd.read_csv(plotfile, compression='zip')
plots = gpd.GeoDataFrame(pd.read_csv(fiafile))
plots.rename(columns={'ACTUAL_LAT':'LAT','ACTUAL_LON':'LON'}, inplace=True)
#fiafile = './data/ALL_PLOTS_ACTUAL*'
#points = gpd.read_file(glob(fiafile)[-1])
plots.Latitude = plots.LAT.astype('float')
plots.Longitude = plots.LON.astype('float')
geometry = [Point(xy) for xy in zip(plots.Longitude, plots.Latitude)]
crs = {'init': 'epsg:4326'} #http://www.spatialreference.org/ref/epsg/2263/
geo_df = gpd.GeoDataFrame(plots, crs=crs, geometry=geometry)

In [70]:
statecd = pd.read_csv(glob('/datadrive/fia/supp_data/*.csv')[0])
query_invyr = 2006
query_statecd = 6
subset = geo_df[(geo_df['INVYR']==query_invyr) & (geo_df['STATECD']==query_statecd)]
#add a uniq identifier to use as a relational database

#subset['CN'] = subset['CN'].astype('str')
subset['PLT_CN'] = subset['PLT_CN'].astype('str')
batch_size = 10
subset = subset.iloc[:batch_size]
KEYSIZE = 16
import random 
random.seed(1337)
uid = [str(binascii.hexlify(os.urandom(KEYSIZE)), 'utf-8')
        for i in range(batch_size)]
subset['UNIQID'] = uid
#subset['UNIQID'] = [str(uuid.uuid1()) for i in range(len(subset))]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super(GeoDataFrame, self).__setitem__(key, value)


# let's run a sample for all the NAIP first and see how many batches we can do it in

In [72]:
#subsetting here would also be by ecoregion...
BUFFER=60
roi, center = tools.get_points_centroid(subset)
uniqid = 'UNIQID'
p = tools.generate_ee_points(subset[[uniqid, 'geometry']], idname=uniqid)
boxes = generate_box_buffer(subset, BUFFER, idname=uniqid)
fc_boxes = boxes['ee_fc']
box = boxes['box_df']

training_list = fc_boxes.toList(fc_boxes.size())
training_pnts = p.toList(p.size())

map_ex = folium.Map(center,zoom_start=4)
tile = folium.TileLayer(
        tiles = 'https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}',
        attr = 'Esri',
        name = 'Esri Satellite',
        overlay = False,
        control = True
       ).add_to(map_ex)
#add points
mc = MarkerCluster()
for _,p in subset.iterrows():
    mc.add_child(folium.Marker(location=[p.geometry.y,p.geometry.x],popup=p[uniqid]))
map_ex.add_child(mc)

#add boxes
[folium.GeoJson(geom).add_to(map_ex) for geom in box.geometry]
map_ex

  return _prepare_from_string(" ".join(pjargs))


# we want to filter each of the FIA points by year and group them into batches

In [73]:
# set a year range based on the rules stipulated in the Chimera paper are:
# 2 years back and 1 year forward

req = '0001'
years = np.unique(geo_df.INVYR).astype(int)
RGB_bands = ['R','G','B'] #RGB
BUCKET = 'usfs-fia-data'
FOLDER = f'request-{req}'
BATCHES = 2
BUFFER = 60
#for y in years:
y = query_invyr
#    subset, _, _ = tools.create_subset(geo_df, y)
n = len(subset)
#subset the points first to get the appropriate images for plot
TRAINING_BASE = f'naip_{y}'
SAMPLE = f'sample_patches_{y}'
batch_indicies = np.array_split(np.arange(n),BATCHES)
for b in range(BATCHES):
    batch_subset, roi, training_pnts = tools.create_subset(subset.iloc[batch_indicies[b]], y)
    start_date = f'{y}-01-01'
    end_date = f'{y}-12-31'
    ic = ee.ImageCollection("USDA/NAIP/DOQQ")\
                   .filterBounds(roi)\
                   .filterDate(start_date, end_date)\
                   .select(RGB_bands).median()
    #if not ic.getInfo()['bands']:
    #    continue
    year_bands = [f'{b}_{y}' for b in RGB_bands]
    outimage = [ic.rename(year_bands)]
    featureStack = ee.Image.cat(outimage)

    arrays, EXPORT_BANDS = tools.create_arraystack(featureStack, BUFFER)

    n = training_pnts.size().getInfo()
    #we can batch here
    geomSample = ee.FeatureCollection([])
    for g in range(n):
        region = ee.Feature(training_pnts.get(g)).geometry()
        sample = arrays.sample(\
                               region = region,\
                               scale = 1,\
                               numPixels = 1,\
                               tileScale=8)
    geomSample = geomSample.merge(sample)
    desc = f'{TRAINING_BASE}_batch_{b+1}'
    task = ee.batch.Export.table.toCloudStorage(\
        collection = geomSample,\
        description = desc, \
        bucket = BUCKET,\
        fileNamePrefix = f'{FOLDER}/{desc}',\
        fileFormat = 'TFRecord',\
        selectors = EXPORT_BANDS)
    task.start()


In [75]:
import os
creds = '/content/authentication/microsoft-aiforearth-f3c5b0f10cfa.json'
os.environ['GOOGLE_APPLICATION_CREDENTIALS']=creds
from google.cloud import storage
client = storage.Client()
bucket = storage.Bucket(BUCKET)
for blob in client.list_blobs(BUCKET):
    print(blob.name)

request-0001/naip_2006_batch_1.tfrecord.gz
request-0001/naip_2006_batch_2.tfrecord.gz
request-002/naip_patches_g1.tfrecord.gz
request-002/naip_patches_g2.tfrecord.gz
request-002/naip_patches_g4.tfrecord.gz
request-002/naip_patches_g5.tfrecord.gz
request-002/naip_patches_g9.tfrecord.gz


In [76]:
import tensorflow as tf

In [77]:
import subprocess
#command to get data
local_dirc = '/content/data'
cmd = f'gsutil -m cp -r gs://{BUCKET}/{FOLDER} {local_dirc}/'
print(cmd.split(' '))
subprocess.run(cmd.split(' '))

['gsutil', '-m', 'cp', '-r', 'gs://usfs-fia-data/request-0001', '/content/data/']


CompletedProcess(args=['gsutil', '-m', 'cp', '-r', 'gs://usfs-fia-data/request-0001', '/content/data/'], returncode=0)

# `naip` in now a list of naip images for that year surrounding points that are of interest
# note that many times, these images may be blank as there are no images for the selected states within the roi 

In [78]:
arrays, EXPORT_BANDS = tools.create_arraystack(featureStack, BUFFER)

In [79]:
years = np.unique(geo_df.INVYR).astype(int)
BUCKET = 'usfs-fia-data'
FOLDER = 'aux-demo'
BATCHES = 3
KERNEL_SIZE = 12 
max_cloud_thr = 5
im_resolution = 30
BUFFER = 60

#we want to iterate through the years of the naip
for y in years:
    subset, _, _ = tools.create_subset(geo_df, y)
    n = len(subset)
    #subset the points first to get the appropriate images for plot
    TRAINING_BASE = f'aux_{y}'
    SAMPLE = f'aux_sample_patches_{y}'
    batch_indicies = np.array_split(np.arange(n),BATCHES)
    for b in range(BATCHES):
        batch_subset, roi, training_pnts = tools.create_subset(subset.iloc[batch_indicies[b]], y)
        start_date = [int(y),6,1]
        imgs = tools.get_model_data(roi, start_date, monthwindow=1, \
                                    max_cloud_thr=max_cloud_thr)
        topo = imgs['topo']
        climate = imgs['climate']
        landsat = imgs['landsat']
        
        featureStack = ee.Image.cat([
           topo, \
           climate, \
           landsat]).float()
        
        #topo_bands = [f"{b['id']}_{y}" for b in topo.getInfo()['bands']]
        #landsat_bands = [f"{b['id']}_{y}" for b in landsat.getInfo()['bands']]
        #climate_bands = [f"{b['id']}_{y}" for b in climate.getInfo()['bands']]       
        arrays, EXPORT_BANDS = tools.create_arraystack(featureStack, BUFFER)

        n = training_pnts.size().getInfo()
        #we can batch here
        geomSample = ee.FeatureCollection([])
        for g in range(n):
            region = ee.Feature(training_pnts.get(g)).geometry()
            sample = arrays.sample(\
                                   region = region,\
                                   scale = im_resolution,\
                                   numPixels = 1,\
                                   tileScale=8)
        geomSample = geomSample.merge(sample)
        desc = f'{TRAINING_BASE}_batch_{b+1}'
        
        task = ee.batch.Export.table.toCloudStorage(\
            collection = geomSample,\
            description = desc, \
            bucket = BUCKET,\
            fileNamePrefix = f'{FOLDER}/{desc}',\
            fileFormat = 'TFRecord',\
            selectors = EXPORT_BANDS)
        task.start()



SSLError: (SSLError("bad handshake: SysCallError(-1, 'Unexpected EOF')"),)