# Parse EE Images into tiles

Development notebook only.
Using Earth Engine API to extract images as tiled data to avoid API limit. 



### Requirements:
- [PyDrive](https://pypi.org/project/PyDrive/) ([doc](https://pythonhosted.org/PyDrive/quickstart.html)): Helps python interact with Google Drive
- [ee](https://pypi.org/project/earthengine-api/) ([doc](https://developers.google.com/earth-engine/apidocs)): Earth Engine Python API
- `pprint`: visualization for nested data
- `lib.eetools`: local tools for earth engine 

### Prepare environment

In [4]:
CITIES = {
    "Austin, TX": {
        'size': "14.601 MB",
        'filename': 'austin', 
        'west': -97.7699, 
        'south': 30.2237, 
        'east': -97.7212, 
        'north': 30.3040,
        'scale': 4
    },
    "New York, NY": {
        'size': "192.438 MB", 
        'filename': 'new_york',
        'west': -74.4034, 
        'south': 40.3712, 
        'east': -73.5918, 
        'north': 40.9359,
        'scale': 10
    },
    "Tuscaloosa, AL": {
        'size': "298.448 MB",
        'filename': 'tuscaloosa',
        'west': -87.57477, 
        'south': 33.16558, 
        'east': -87.48422, 
        'north': 33.23129,
        'scale': 1
    }
}
import overpass
import lib.osmtools as ost

op = overpass.API()

def get_gjson(ovp, bounds):

    response = ovp.get(
        "way" + ost.overpass_bounds(bounds) + ";(._;>;);",
        verbosity="geom",
    )
    way_features = [f for f in response.features if f.geometry['type'] == "LineString"]
    return way_features

In [5]:

city_key = 'Austin, TX'

counts = {
    'highway': [],
    'building': [],
    'amenity': [],
    'landuse': [],
    'waterway': [],
    'railway': [],
    'natural': [],
}
city_features = get_gjson(op, CITIES[city_key])
for idx, s in enumerate(city_features):
    for key in counts:
        if key in s['properties'].keys():
            counts[key].append(idx)

print(f'{city_key}: {len(city_features)}')
for key in counts:
    print(f"- {key}: {len(counts[key])}")

Austin, TX: 44729
- highway: 12729
- building: 28518
- amenity: 923
- landuse: 767
- waterway: 121
- railway: 49
- natural: 69


In [6]:
from IPython.display import display
for feat_idx in counts['amenity'][:20]:
    for k, i in city_features[feat_idx]['properties'].items():
        if 'addr' not in k:
            print(f'- {k}: {i}')
    print()

- amenity: cinema
- building: yes
- name: South Lamar Alamo
- wikidata: Q42298382

- amenity: library
- building: university
- building:levels: 5
- name: Perry-Castañeda Library
- ref: PCL
- wikidata: Q1647700

- amenity: parking
- building: yes
- building:levels: 6
- ele: 161
- name: Brazos Garage
- parking: multi-storey
- ref: BRG

- amenity: parking
- building: garage
- ele: 161
- name: San Jacinto Garage
- ref: SJG

- amenity: community_centre
- building: yes
- name: Hancock Recreation Center
- opening_hours: Mo-Fr 09:00-22:00; Sa 09:00-24:00
- website: http://www.austintexas.gov/department/hancock-golf-course

- amenity: parking
- parking: surface
- wheelchair: yes

- amenity: parking
- building: garage
- ele: 154
- name: Trinity Garage
- ref: TRG

- amenity: library
- building: university
- ele: 163
- name: Collections Deposit Library
- ref: CDL

- amenity: school
- created_by: Potlatch 0.10f
- name: San Juan Diego HS

- amenity: parking

- amenity: parking

- amenity: police
- b

In [17]:
buildings = [c for c in city_features if 'building' in c['properties']]
building_yes = [c for c in buildings if c['properties']['building'] == 'yes']

print(len(building_yes))

for b in building_yes[:20]:
    for k, i in b['properties'].items():
        if 'addr' not in k:
            print(f'- {k}: {i}')
    print()

27203
- amenity: cinema
- building: yes
- name: South Lamar Alamo
- wikidata: Q42298382

- alt_name: Bullock Texas State History Museum;Bullock Museum
- building: yes
- building:levels: 3
- museum: history
- name: Texas State History Museum
- name:etymology: Bob Bullock
- name:etymology:wikidata: Q4931979
- official_name: Bob Bullock Texas State History Museum
- opening_hours: 09:00-17:00
- operator: Texas State Preservation Board
- operator:type: public
- operator:wikidata: Q7708087
- tourism: museum
- wheelchair: yes
- wikidata: Q10858643
- wikipedia: en:Bullock Texas State History Museum

- amenity: parking
- building: yes
- building:levels: 6
- ele: 161
- name: Brazos Garage
- parking: multi-storey
- ref: BRG

- amenity: community_centre
- building: yes
- name: Hancock Recreation Center
- opening_hours: Mo-Fr 09:00-22:00; Sa 09:00-24:00
- website: http://www.austintexas.gov/department/hancock-golf-course

- building: yes
- name: LINE
- tourism: hotel

- building: yes
- operator: Bi

In [1]:
from IPython.display import display
import geopandas as gpd

roads = gpd.read_file('/home/stephen/software/geospatial/geo-scraping/data/osm-sets/tuscaloosa_01/roads.shp')
display(roads)

Unnamed: 0,index,category,label,name,geometry
0,0,,residential,Monte Vista Circle,"LINESTRING (-87.48667 33.16766, -87.48682 33.1..."
1,1,,residential,Monte Vista Circle,"LINESTRING (-87.48664 33.16682, -87.48666 33.1..."
2,2,,residential,Caplewood Drive,"LINESTRING (-87.55477 33.21429, -87.55483 33.2..."
3,3,,residential,Hamilton Lane,"LINESTRING (-87.51621 33.18704, -87.51615 33.1..."
4,4,,residential,Hamilton Lane,"LINESTRING (-87.51495 33.18708, -87.51538 33.1..."
...,...,...,...,...,...
8178,26632,,service,,"LINESTRING (-87.55294 33.21255, -87.55276 33.2..."
8179,26633,,service,,"LINESTRING (-87.55265 33.21268, -87.55263 33.2..."
8180,26634,,service,,"LINESTRING (-87.55250 33.21332, -87.55240 33.2..."
8181,26635,,service,,"LINESTRING (-87.55222 33.21324, -87.55226 33.2..."


# End Label Analysis

## City Contents by key Overpass Labels

In [5]:
import os, time

import pydrive

from pprint import pprint
from lib.authkit import ee_client
import lib.misc as m

data = {
    'size': "298.448 MB",
    'west': -87.57477, 
    'south': 33.16558, 
    'east': -87.48422, 
    'north': 33.23129
}

naip_data = ee.ImageCollection("USDA/NAIP/DOQQ").filter(ee.Filter.date('2017-01-01', '2019-01-01'))
naip_image = naip_data.mosaic()


# naip_image.getInfo() - does not have shape
naip_info = naip_image.toDictionary().getInfo()


In [6]:
m.show_dict(data=naip_info, title="naip query info")

naip query info
---------------



### Get Image from EE

In [4]:
# Load a landsat image and select three bands.
landsat_img = ee.Image('LANDSAT/LC08/C01/T1_TOA/LC08_123032_20140515').select(['B4', 'B3', 'B2'])

# Create 'task' to export the image, specifying scale and region.
task = ee.batch.Export.image.toDrive(image=landsat_img,        # EE Image to export
                                     description='exportTest', # (str) Desription of task, will become exported file name
                                     fileFormat='GeoTIFF',
                                     folder='data/geo_scraping',            # (str) Ouput folder in drive
                                     scale=30,                 # (int) Pixel res in meters
                                     region=geom_coords)       # (list(5,2)) Bounds to crop image to

# Start task
task.start()
print("Started Image export")
last_state = None
while (task.status()['state'] != "COMPLETED"):
    if task.status()['state'] != last_state:
        print('>', task.status()['state'])
        last_state = task.status()['state']
print('>', task.status()['state'])

pprint(task.status())

Started Image export
> READY
> RUNNING
> COMPLETED
{'attempt': 1,
 'creation_timestamp_ms': 1623785072863,
 'description': 'exportTest',
 'destination_uris': ['https://drive.google.com/'],
 'id': 'TGDXLYRHL2R26PUMDSU7IBMZ',
 'name': 'projects/earthengine-legacy/operations/TGDXLYRHL2R26PUMDSU7IBMZ',
 'start_timestamp_ms': 1623785096994,
 'state': 'COMPLETED',
 'task_type': 'EXPORT_IMAGE',
 'update_timestamp_ms': 1623785111132}


## Download Google Drive Data

### Authenticate Google Drive

If you don't have a `settings.yaml`, contact Stephen Kirby.

In [15]:
import os
import pydrive 
from lib.authkit import get_drive
from lib.misc import *

gdrive = get_drive(settings_fp='./keys/pydrive/settings.yaml')
print("> Completed PyDrive authenticaiton.")

> Loaded settings from:
  './keys/pydrive/settings.yaml'
Your browser has been opened to visit:

    https://accounts.google.com/o/oauth2/auth?client_id=416252874442-c1r64ms1oh34n1l1nj6cn7rcp28e6egg.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A8080%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive&access_type=offline&response_type=code

Authentication successful.
> Completed PyDrive authenticaiton.


In [22]:
from lib.misc import file_size, show_dict, show_keys

       

filepath = download_raster(gdrive=gdrive, 
                           name='exportTest.tif', 
                           out_path="./data/ee", 
                           force_equals=False, 
                           verbose=True)

Found 1 matching files.
Preparing for download:
- src:  exportTest.tif
- size: 4.655 MB
- out:  './data/ee/exportTest.tif'

Cancelled.


In [19]:
show_raster_keys(gdrive)

Keys: (total = 39)
------------------
| 'kind'                        (str)  | 'id'                          (str)  | 'etag'                        (str)  | 'selfLink'                    (str)  |
| 'webContentLink'              (str)  | 'alternateLink'               (str)  | 'embedLink'                   (str)  | 'iconLink'                    (str)  |
| 'title'                       (str)  | 'mimeType'                    (str)  | 'labels'                      (dict) | 'copyRequiresWriterPermission'(bool) |
| 'createdDate'                 (str)  | 'modifiedDate'                (str)  | 'lastViewedByMeDate'          (str)  | 'markedViewedByMeDate'        (str)  |
| 'version'                     (str)  | 'parents'                     (list) | 'downloadUrl'                 (str)  | 'userPermission'              (dict) |
| 'originalFilename'            (str)  | 'fileExtension'               (str)  | 'md5Checksum'                 (str)  | 'fileSize'                    (str)  |
| 'quotaBytesU

In [6]:
import os
import shutil

filepath = 'data/osm-sets/austin/austin.tif'
disk_usage = os.path.getsize(filepath)

print(type(disk_usage))

<class 'int'>
