In [2]:
pip install pdal

Note: you may need to restart the kernel to use updated packages.


PDAL wants nupmy <= 1.20.0, you may need to conda update it ```conda update numpy=1.20.1```pip install pdal

In [3]:
import numpy as np
np.version.version

'1.20.1'

In [4]:
from API_utils import show_dates, show_files_for_site_date
import os
import numpy as np
import json
import multiprocessing
import time
import glob
import rasterio
from osgeo import gdal
import rasterio as rio
import re
import time
import requests
import hashlib
import pdal
from string import Template
import subprocess
from dask import delayed, compute
from dask.diagnostics import ProgressBar
from gdal import GA_ReadOnly

ncores = multiprocessing.cpu_count()
ncores

16

In [5]:
site = 'TEAK'
productcode = 'DP1.30003.001'
tmp_path = '/home/jovyan/tmp'
NEON_path = '/home/jovyan/NEON'
date = '2018-06'

os.makedirs(tmp_path, exist_ok=True)

img_path = os.path.join(NEON_path, 'D17_CHM_all_Mask5m_roughFlightline.tif')

img = gdal.Open(img_path, GA_ReadOnly)
width = img.RasterXSize
height = img.RasterYSize
gt = img.GetGeoTransform()
extent = {}
extent['minx'] = gt[0]
extent['miny'] = gt[3] + width*gt[4] + height*gt[5] 
extent['maxx'] = gt[0] + width*gt[1] + height*gt[2]
extent['maxy'] = gt[3]
img = None


In [8]:
def generate_laz_download_info(productcode, site, date):
    '''Returns: time of url issueance, list of laz files'''
    t0 = time.time()
    files = show_files_for_site_date(productcode, site, date)
    laz = []
    for file in files:
        if 'classified_point_cloud_colorized.laz' in file['name']:
            laz.append(file)
    return(t0, laz)
    
    
def refresh_url(f, t0, productcode, site, date):
    '''If too much time has elapsed since url issued, modifies f to contain new url'''
    if time.time() - t0 < 3550:
        pass
    else:
        files = show_files_for_site_date(productcode, site, date)
        for file in files:
            if file['name'] == f['name']:
                f['url'] = file['url']
                
def download_from_NEON_API(f, tmp_path):

    attempts = 0 
    while attempts < 4:
        try:
            # get the file 
            response = requests.get(f['url'], stream=True)
            
            # raise an error for bad status codes
            response.raise_for_status()
            
            #check the md5 if it exists
            if f['md5']:
                md5 = hashlib.md5(response.content).hexdigest()
                if md5 == f['md5']:
                    success = True
                    attempts = 4
                else:
                    fmd5 = f['md5']
                    print(f'md5 mismatch on attempt {attempts}')
                    success = False
                    attempts = attempts + 1
            else: 
                success = True
                attempts = 4
        except Exception as e:
            print(f'Warning:\n{e}')
            success = False
            attempts = attempts + 1
    # write the file
    if success:
        fname = os.path.join(tmp_path, f['name'])
        with open(fname, 'wb') as sink:
            for block in response.iter_content(1024):
                sink.write(block)
        time.sleep(1)   # this should not be nededed!
            
    else:
        raise Exception('failed to download')
        
def make_pipe(f, bbox, out_path, resolution=1):
    tile = '_'.join(f.rpartition('/')[2].split('_')[4:6])
    '''Creates, validates and then returns the pdal pipeline
    
    Arguments:
    bbox       -- Tuple - Bounding box in srs coordintes (default srs is EPSG:3857),
                  in the form: ([xmin, xmax], [ymin, ymax]).
    outpath   -- String - Path where the CHM shall be saved. Must include .tif exstension.
    srs        -- String - EPSG identifier for srs  being used. Defaults to EPSG:3857
                  because that is what ept files tend to use.
    threads    -- Int - Number os threads to be used by the reader.ept. Defaults to 4.
    resolution -- Int or Float - resolution (m) used by writers.gdal
    '''
    
    t = Template('''
    {
        "pipeline": [
            {
            "filename": "${f}",
            "type": "readers.las",
            "tag": "readdata"
            },
            {
            "type":"filters.outlier",
            "method":"radius",
            "radius":1.0,
            "min_k":4
            },
            {
            "type":"filters.optimalneighborhood",
            "min_k":8,
            "max_k": 50
            },
            {
            "type":"filters.covariancefeatures",
            "knn":10,
            "threads": 2,
            "feature_set": "Anisotropy,DemantkeVerticality,Linearity,Omnivariance,Planarity,Scattering,SurfaceVariation,Verticality"
            },
            {
            "filename": "${outpath}/${tile}_Anisotropy.tif",
            "gdalopts": "tiled=yes,     compress=deflate",
            "nodata": -9999,
            "output_type": "idw",
            "resolution":  "${resolution}",
            "type": "writers.gdal",
            "window_size": 6,
            "dimension": "Anisotropy",
            "bounds": "${bbox}"
            },
            {
            "filename": "${outpath}/${tile}_DemantkeVerticality.tif",
            "gdalopts": "tiled=yes,     compress=deflate",
            "nodata": -9999,
            "output_type": "idw",
            "resolution":  "${resolution}",
            "type": "writers.gdal",
            "window_size": 6,
            "dimension": "DemantkeVerticality",
            "bounds": "${bbox}"
            },
            {
            "filename": "${outpath}/${tile}_Linearity.tif",
            "gdalopts": "tiled=yes,     compress=deflate",
            "nodata": -9999,
            "output_type": "idw",
            "resolution":  "${resolution}",
            "type": "writers.gdal",
            "window_size": 6,
            "dimension": "Linearity",
            "bounds": "${bbox}"
            },
            {
            "filename": "${outpath}/${tile}_Omnivariance.tif",
            "gdalopts": "tiled=yes,     compress=deflate",
            "nodata": -9999,
            "output_type": "idw",
            "resolution":  "${resolution}",
            "type": "writers.gdal",
            "window_size": 6,
            "dimension": "Omnivariance",
            "bounds": "${bbox}"
            },
            {
            "filename": "${outpath}/${tile}_Planarity.tif",
            "gdalopts": "tiled=yes,     compress=deflate",
            "nodata": -9999,
            "output_type": "idw",
            "resolution":  "${resolution}",
            "type": "writers.gdal",
            "window_size": 6,
            "dimension": "Planarity",
            "bounds": "${bbox}"
            },
            {
            "filename": "${outpath}/${tile}_Scattering.tif",
            "gdalopts": "tiled=yes,     compress=deflate",
            "nodata": -9999,
            "output_type": "idw",
            "resolution":  "${resolution}",
            "type": "writers.gdal",
            "window_size": 6,
            "dimension": "Scattering",
            "bounds": "${bbox}"
            },
            {
            "filename": "${outpath}/${tile}_SurfaceVariation.tif",
            "gdalopts": "tiled=yes,     compress=deflate",
            "nodata": -9999,
            "output_type": "idw",
            "resolution":  "${resolution}",
            "type": "writers.gdal",
            "window_size": 6,
            "dimension": "SurfaceVariation",
            "bounds": "${bbox}"
            },
            {
            "filename": "${outpath}/${tile}_Verticality.tif",
            "gdalopts": "tiled=yes,     compress=deflate",
            "nodata": -9999,
            "output_type": "idw",
            "resolution":  "${resolution}",
            "type": "writers.gdal",
            "window_size": 6,
            "dimension": "Verticality",
            "bounds": "${bbox}"
            }
        ]
    }''')

    pipe = t.substitute(f=f, bbox=bbox, outpath=out_path, tile=tile, resolution=resolution)
    pipeline = pdal.Pipeline(pipe)
    if pipeline.validate():
        return(pipeline, tile)
    else:
        raise Exception('Bad pipeline (sorry to be so ambigous)!')
        
def download_laz(f, tmp_path, t0, productcode, site, date):
    '''Takes an entry from output of generete_laz_download, saves to tmp_path'''

    # make sure url is still valid
    refresh_url(f, t0, productcode, site, date)
    
    # name of file to be stored
    name = os.path.join(tmp_path, f['name'])
    
    try:
        # Download the laz
        download_from_NEON_API(f, tmp_path)
        return(None)
    except:
        return(f['name'])
    
def extract_bbox(name, tries=1):
    cmd = f'pdal info {name}'
    try:
        reply = subprocess.run(cmd, shell=True, capture_output=True, timeout=20)
        if len(reply.stderr) > 0: print(reply.stderr)
        meta = json.loads(reply.stdout)
        bbox = meta['stats']['bbox']['native']['bbox']
        return(bbox)
    except subprocess.TimeoutExpired:
        if tries < 2:
            bbox = extract_bbox(name, tries=tries+1)
        else:
            return(None)
        
def execute_pipe(fname, extent, tmp_path, resolution=1):
    bbox = extract_bbox(fname)
    try:
        bounds = ([bbox['minx'], bbox['maxx']], [bbox['miny'], bbox['maxy']])
        # if the bounds are at least partially within the extent...
        a = bbox['minx'] <= extent['maxx']
        b = bbox['maxx'] >= extent['minx']
        c = bbox['miny'] <= extent['maxy']
        d = bbox['maxy'] >= extent['miny']
        if a and b and c and d:
            # make and execute the pdal pipeline
            pipeline, tile = make_pipe(fname, bounds, tmp_path, resolution=resolution)
            count = pipeline.execute()
    except TypeError:
        print(f'Failed to determin bounds for {fname}!')
    except:
        pass
            
def origin_warp_if_needed(f):
    if not origin_good_1mresolution(f):
        warp(f)

def origin_good_1mresolution(f):
    '''Checks to make sure the origin is centered on 1m resolution'''
    cmd = f'gdalinfo {f} | grep \'Origin =\''
    result = subprocess.run(cmd, shell=True, capture_output=True)
    if len(result.stderr) > 0: print(result.stderr)
        
    #meta = json.loads(result.stdout)
    x, y = re.search('\(([^)]+)', str(result.stdout)).group(1).split(',')
    x, y = float(x), float(y)
    
    # make sure x and y are whole numbers
    soft_pink_truth = x.is_integer() and y.is_integer()
    
    return(soft_pink_truth)


def warp(f, resolution=1):
    '''Runs gdalwarp -tr -tap on the file, f.
    This will ensure that the tif pixels are aligned to the origin'''
    # Note: this will fail if there are more than one . in the fname
    base = f.split('.')[0]
    
    # warp the pixels to ensure they are on origin
    cmd = f'gdalwarp -tr {resolution} {resolution} -tap {f} {base}_w.tiff'
    result = subprocess.run(cmd, shell=True, capture_output=True)
    if len(result.stderr) > 0: print(result.stderr)
    
    # move new file to old file name
    cmd = f'mv {base}_w.tiff {f}'
    result = subprocess.run(cmd, shell=True, capture_output=True)
    if len(result.stderr) > 0: print(result.stderr)

def make_mosaic(layer, files, tmp_path):
    '''makes a mosaic of tiles'''
    # make a vrt for the layer
    vrt = gdal.BuildVRT(os.path.join(tmp_path, f'{layer}.vrt'), files)

    # make a mosaic for the layer
    mosaic = gdal.Translate(os.path.join(tmp_path, f'{layer}.tif'), vrt)

In [None]:
def make_covariance_tifs(productcode, site, date, tmp_path, extent):
    # make sure dir exists 
    os.makedirs(tmp_path, exist_ok=True)

    # get list of files to download
    t0, laz = generate_laz_download_info(productcode, site, date)

    # download the files
    print('Downlaoding laz files:')
    lazy0 = []
    for f in laz:
        lazy0.append(delayed(download_laz)(f, tmp_path, t0, productcode, site, date))

    with ProgressBar():
        fails = compute(*lazy0)

    fails = [thing for thing in fails if thing!=None]

    # diy glob the files that were actually downloaded
    down_laz = [os.path.join(tmp_path,f) for f in os.listdir(tmp_path) if '.laz' in f]

    # make the pipelines and run them
    print('Making and executing pipeline:')
    lazy1 =[]
    for f in down_laz:
        lazy1.append(delayed(execute_pipe)(fname, extent, tmp_path, resolution=1))

    with ProgressBar():
        _ = compute(*lazy1)

    # make a dict of tiles for each layer
    layers = ['Anisotropy', 'DemantkeVerticality', 'Linearity', 'Omnivariance', 'Planarity',
              'Scattering', 'SurfaceVariation', 'Verticality']

    eigendict ={}
    for layer in layers:
        eigendict[layer] = [os.path.join(tmp_path,f) for f in os.listdir(tmp_path) if layer in f]

    # make sure tifs have pixels aligned to origin, do it in || with dask.
    print('Waroing if necessary')
    lazy2 = []
    for key, val in eigendict.items():
        for f in val:
              lazy2.append(delayed(origin_warp_if_needed)(f))

    with ProgressBar():
        _ = compute(*lazy2)

    # build vrt, make mosaic for each layer, in ||
    print('Making mosaic *\(-_-)/*')
    also_lazy = []
    for layer, files in eigendict.items():    
          also_lazy.append(delayed(make_mosaic)(layer, files, tmp_path))

    with ProgressBar():
        _ = compute(*also_lazy)

In [10]:
make_covariance_tifs(productcode, site, date, tmp_path, extent):

[########################################] | 100% Completed | 16.7s
