# Goals

1. prototype some pangeoLib functions
2. organize notebooks for inputs and output classification for ET

## prototype
1. boto 3 list all tiles
2. boto 3 list all outputs in tiles by subset
3. build coverage geometries for each tile day 001

In [1]:
import boto3

In [2]:
from pangeoLib.aws_authenticate import aws_authenticate

aws_authenticate()

In [3]:
#list_all_tiles()

In [4]:
# ! aws s3 ls --request-payer requester dev-et-data/tiles/

# rio takes too long - research ways to just read the header

## USGS network security should be investigated again for slow speed ???

In [5]:
! rio info /vsis3/dev-et-data/test/compressed/NDVI_filled/2001/2001001.250_m_NDVI.tif

{"blockxsize": 512, "blockysize": 512, "bounds": [-155.57238265799998, 20.000239124000082, -52.215132156499976, 49.999999996000085], "colorinterp": ["gray"], "compress": "deflate", "count": 1, "crs": "EPSG:4326", "descriptions": [null], "driver": "GTiff", "dtype": "float32", "height": 14416, "indexes": [1], "interleave": "band", "lnglat": [-103.89375740724998, 35.00011956000009], "mask_flags": [["nodata"]], "nodata": -3.4028234663852886e+38, "res": [0.0020810045, 0.0020810045], "shape": [14416, 49667], "tiled": true, "transform": [0.0020810045, 0.0, -155.57238265799998, 0.0, -0.0020810045, 49.999999996000085, 0.0, 0.0, 1.0], "units": [null], "width": 49667}


In [6]:
import fsspec
fs = fsspec.filesystem('s3', anon=False, requester_pays=True)
chip_list = fs.ls('dev-et-data/tiles/')

In [7]:
! rio info /vsis3/dev-et-data/tiles/tile40N-100E_chip32N-100E/dd_2014.tif

{"bounds": [-100.00123780499997, 29.999468047915045, -97.99931147599997, 32.001394376915044], "colorinterp": ["gray"], "count": 1, "crs": "EPSG:4326", "descriptions": [null], "driver": "GTiff", "dtype": "float64", "height": 962, "indexes": [1], "interleave": "band", "lnglat": [-99.00027464049998, 31.000431212415045], "mask_flags": [["all_valid"]], "nodata": null, "res": [0.0020810045, 0.0020810045], "shape": [962, 962], "tiled": false, "transform": [0.0020810045, 0.0, -100.00123780499997, 0.0, -0.0020810045, 32.001394376915044, 0.0, 0.0, 1.0], "units": [null], "width": 962}


In [8]:
import rasterio
file='/vsis3/dev-et-data/tiles/tile40N-100E_chip32N-100E/dd_2014.tif'
src = rasterio.open(file)
src.shape

(962, 962)

In [9]:
#dir(src)

src.bounds.left

-100.00123780499997

In [10]:
#dir(src.bounds)



You are almost there with what you tried, and using the box method is indeed the best way. With the list comprehension, you can do

b = [box(l, b, r, t) for l, b, r, t in zip(df.left, df.bottom, df.right, df.top)]

Another option is to apply the box function to each row of your dataframe:

b = df.apply(lambda row: box(row.left, row.bottom, row.right, row.top), axis=1)

Once you have converted the bounding boxes to polygons, make sure to actually create a GeoDataFrame:

gdf = geopandas.GeoDataFrame(df, geometry=b)



In [11]:
# chip_list

In [12]:
def get_bounding(file):
    try:
        src = rasterio.open(file)
        return(src.bounds)
    except:
        pass
        

In [13]:

import pandas as pd

def build_df_of_chip_bounds():
    df = pd.DataFrame(columns=('Chip', 'left', 'bottom', 'right', 'top'))
    i=0
    for chip in chip_list:
        file = '/vsis3/' + chip + '/dd_2013.tif'
        bounds = get_bounding(file)
        if not bounds is None:
            print(bounds.left)
            df.loc[i] =[chip, bounds.left, bounds.bottom, bounds.right, bounds.top]
            i=i+1
    return(df)

In [14]:
import time
try:
    df = pd.read_pickle('tile_catalog_df.pkl')
except:
    start = time.time()
    print("build df one time")
    df = build_df_of_chip_bounds()
    end = time.time()
    print("Time it Took =",end - start)




In [15]:
df.head(30)

Unnamed: 0,Chip,left,bottom,right,top,geometry
0,dev-et-data/tiles/tile40N-100E_chip32N-100E,-100.001238,29.999468,-97.999311,32.001394,"POLYGON ((-97.99931 29.99947, -97.99931 32.001..."
1,dev-et-data/tiles/tile40N-100E_chip32N-92E,-92.001857,29.999468,-89.99993,32.001394,"POLYGON ((-89.99993 29.99947, -89.99993 32.001..."
2,dev-et-data/tiles/tile40N-100E_chip32N-94E,-94.001702,29.999468,-91.999776,32.001394,"POLYGON ((-91.99978 29.99947, -91.99978 32.001..."
3,dev-et-data/tiles/tile40N-100E_chip32N-96E,-96.001547,29.999468,-93.999621,32.001394,"POLYGON ((-93.99962 29.99947, -93.99962 32.001..."
4,dev-et-data/tiles/tile40N-100E_chip32N-98E,-98.001392,29.999468,-95.999466,32.001394,"POLYGON ((-95.99947 29.99947, -95.99947 32.001..."
5,dev-et-data/tiles/tile40N-100E_chip34N-100E,-100.001238,31.999313,-97.999311,34.00124,"POLYGON ((-97.99931 31.99931, -97.99931 34.001..."
6,dev-et-data/tiles/tile40N-100E_chip34N-92E,-92.001857,31.999313,-89.99993,34.00124,"POLYGON ((-89.99993 31.99931, -89.99993 34.001..."
7,dev-et-data/tiles/tile40N-100E_chip34N-94E,-94.001702,31.999313,-91.999776,34.00124,"POLYGON ((-91.99978 31.99931, -91.99978 34.001..."
8,dev-et-data/tiles/tile40N-100E_chip34N-96E,-96.001547,31.999313,-93.999621,34.00124,"POLYGON ((-93.99962 31.99931, -93.99962 34.001..."
9,dev-et-data/tiles/tile40N-100E_chip34N-98E,-98.001392,31.999313,-95.999466,34.00124,"POLYGON ((-95.99947 31.99931, -95.99947 34.001..."


In [16]:
#dir(df)

In [17]:
df.describe()

Unnamed: 0,left,bottom,right,top
count,125.0,125.0,125.0,125.0
mean,-94.000953,33.999159,-91.998943,36.001085
std,12.048236,2.83959,12.048264,2.83959
min,-110.000464,29.999468,-107.998538,32.001394
25%,-104.000928,31.999313,-101.999002,34.00124
50%,-96.001547,33.999159,-93.999621,36.001085
75%,-84.000394,35.999004,-81.998468,38.00093
max,-72.001322,37.998849,-69.999396,40.000776


In [18]:
import geopandas
from shapely.geometry import box
#import shapely.bounding.box as box

#Once you have converted the bounding boxes to polygons, make sure to actually create a GeoDataFrame:
b = df.apply(lambda row: box(row.left, row.bottom, row.right, row.top), axis=1)
gdf = geopandas.GeoDataFrame(df, geometry=b)

In [19]:
gdf.head()

Unnamed: 0,Chip,left,bottom,right,top,geometry
0,dev-et-data/tiles/tile40N-100E_chip32N-100E,-100.001238,29.999468,-97.999311,32.001394,"POLYGON ((-97.99931 29.99947, -97.99931 32.001..."
1,dev-et-data/tiles/tile40N-100E_chip32N-92E,-92.001857,29.999468,-89.99993,32.001394,"POLYGON ((-89.99993 29.99947, -89.99993 32.001..."
2,dev-et-data/tiles/tile40N-100E_chip32N-94E,-94.001702,29.999468,-91.999776,32.001394,"POLYGON ((-91.99978 29.99947, -91.99978 32.001..."
3,dev-et-data/tiles/tile40N-100E_chip32N-96E,-96.001547,29.999468,-93.999621,32.001394,"POLYGON ((-93.99962 29.99947, -93.99962 32.001..."
4,dev-et-data/tiles/tile40N-100E_chip32N-98E,-98.001392,29.999468,-95.999466,32.001394,"POLYGON ((-95.99947 29.99947, -95.99947 32.001..."


In [20]:
import folium


In [21]:
tony_plot_tiles_json = gdf.to_json()

In [22]:

map_osm = folium.Map(location=[35, -90], zoom_start=5)

tile_boxes=folium.features.GeoJson(tony_plot_tiles_json)
map_osm.add_child(tile_boxes)
map_osm

In [23]:
my_s= gdf.loc[0]

#print(dir(my_s))


print(my_s.geometry)



POLYGON ((-97.99931147599997 29.99946804791504, -97.99931147599997 32.00139437691504, -100.001237805 32.00139437691504, -100.001237805 29.99946804791504, -97.99931147599997 29.99946804791504))


In [24]:
geopandas.GeoSeries([my_s.geometry]).to_json()

'{"type": "FeatureCollection", "features": [{"id": "0", "type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-97.99931147599997, 29.999468047915045], [-97.99931147599997, 32.001394376915044], [-100.00123780499997, 32.001394376915044], [-100.00123780499997, 29.999468047915045], [-97.99931147599997, 29.999468047915045]]]}, "bbox": [-100.00123780499997, 29.999468047915045, -97.99931147599997, 32.001394376915044]}], "bbox": [-100.00123780499997, 29.999468047915045, -97.99931147599997, 32.001394376915044]}'

In [25]:
map_osm = folium.Map(location=[35, -90], zoom_start=5)

style_function = lambda x: {'fillColor': 'red', 'color': 'green'} 

for index, row in gdf.iterrows():
    # print(row.geometry)
    # print(row.Chip)
    my_useful_geojson = geopandas.GeoSeries([row.geometry]).to_json()
    gjson = folium.GeoJson(my_useful_geojson, style_function=style_function, tooltip=row.Chip).add_to(map_osm)



map_osm

In [26]:
df.to_pickle('tile_catalog_df.pkl')  # where to save it, usually as a .pkl


df = pd.read_pickle('tile_catalog_df.pkl')

In [27]:
df.head()

Unnamed: 0,Chip,left,bottom,right,top,geometry
0,dev-et-data/tiles/tile40N-100E_chip32N-100E,-100.001238,29.999468,-97.999311,32.001394,"POLYGON ((-97.99931 29.99947, -97.99931 32.001..."
1,dev-et-data/tiles/tile40N-100E_chip32N-92E,-92.001857,29.999468,-89.99993,32.001394,"POLYGON ((-89.99993 29.99947, -89.99993 32.001..."
2,dev-et-data/tiles/tile40N-100E_chip32N-94E,-94.001702,29.999468,-91.999776,32.001394,"POLYGON ((-91.99978 29.99947, -91.99978 32.001..."
3,dev-et-data/tiles/tile40N-100E_chip32N-96E,-96.001547,29.999468,-93.999621,32.001394,"POLYGON ((-93.99962 29.99947, -93.99962 32.001..."
4,dev-et-data/tiles/tile40N-100E_chip32N-98E,-98.001392,29.999468,-95.999466,32.001394,"POLYGON ((-95.99947 29.99947, -95.99947 32.001..."
