In [96]:
# coding=utf-8
"""
Ingest data from the command-line.
"""
from __future__ import absolute_import
import os
import uuid
import logging
import re
from pathlib import Path
import yaml
import netCDF4
import click

# image boundary imports
import rasterio
from rasterio.errors import RasterioIOError
import rasterio.features
import shapely.affinity
import shapely.geometry
import shapely.ops

###IMAGE BOUNDARY CODE

def safe_valid_region(images, mask_value=None):
    try:
        return valid_region(images, mask_value)
    except (OSError, RasterioIOError):
        return None


def valid_region(images, mask_value=None):
    mask = None

    for fname in images:
        ## ensure formats match
        with rasterio.open(str(fname), 'r') as ds:
            transform = ds.affine
            img = ds.read(1)

            if mask_value is not None:
                new_mask = img & mask_value == mask_value
            else:
                new_mask = img != ds.nodata
            if mask is None:
                mask = new_mask
            else:
                mask |= new_mask

    shapes = rasterio.features.shapes(mask.astype('uint8'), mask=mask)
    shape = shapely.ops.unary_union([shapely.geometry.shape(shape) for shape, val in shapes if val == 1])

    # convex hull
    geom = shape.convex_hull

    # buffer by 1 pixel
    geom = geom.buffer(1, join_style=3, cap_style=3)

    # simplify with 1 pixel radius
    geom = geom.simplify(1)

    # intersect with image bounding box
    geom = geom.intersection(shapely.geometry.box(0, 0, mask.shape[1], mask.shape[0]))

    # transform from pixel space into CRS space
    geom = shapely.affinity.affine_transform(geom, (transform.a, transform.b, transform.d,
                                                    transform.e, transform.xoff, transform.yoff))
    
    #output = shapely.geometry.mapping(geom)
    #output['coordinates'] = _to_lists(output['coordinates'])
    #return output
    return geom
    #output['coordinates'] = _to_lists(output['coordinates'])
    #return output

def _to_lists(x):
    """
    Returns lists of lists when given tuples of tuples
    """
    if isinstance(x, tuple):
        return [_to_lists(el) for el in x]

    return x


###END IMAGE BOUNDARY CODE

def get_projection(image):
    if 'geostationary' in image.variables:
        projection = str(image['geostationary'].spatial_ref)
        transform = image['geostationary'].GeoTransform
    else:
        projection = str(image['geostationary_satellite'].spatial_ref)
        transform = image['geostationary_satellite'].GeoTransform

    left = transform[0].item()
    bottom = transform[3].item()
    right = left + transform[1].item()*image['x'].size
    top = bottom + transform[5].item()*image['y'].size
    if left > right:
        left, right = right, left
    if bottom > top:
        bottom, top = top, bottom
    return {
        'spatial_reference': projection,
        'geo_ref_points': {
            'ul': {'x': left, 'y': top},
            'ur': {'x': right, 'y': top},
            'll': {'x': left, 'y': bottom},
            'lr': {'x': right, 'y': bottom},
            }
        }


def get_extent(image):
    left = float(image.getncattr('geospatial_lon_min'))
    bottom = float(image.getncattr('geospatial_lat_min'))
    right = float(image.getncattr('geospatial_lon_max'))
    top = float(image.getncattr('geospatial_lat_max'))
    return {
        'ul': {'lon': left, 'lat': top},
        'ur': {'lon': right, 'lat': top},
        'll': {'lon': left, 'lat': bottom},
        'lr': {'lon': right, 'lat': bottom},
    }





def get_ang_dataset(path):
    band_re = re.compile('.*-P1S-ABOM_GEOM_(.*)-PRJ.*_(500|1000|2000)-HIMAWARI8-AHI.nc')
    images = {}
    for image in path.glob('*-P1S-ABOM_GEOM_*-HIMAWARI8-AHI.nc'):
        match = band_re.match(str(image)).groups()
        images['%s_%s' % match] = {
            'path': image.name,
            'layer': 'solar_zenith_angle',
        }
    if not images:
        return None
    return get_skeleton(str(path/images['SOLAR_2000']['path']), 'GEOM_SOLAR', images)


def get_obs_dataset(path):
    band_re = re.compile('.*-P1S-ABOM_OBS_B(.*)-PRJ.*_(500|1000|2000)-HIMAWARI8-AHI.nc')
    images = {}
    for image in path.glob('*-P1S-ABOM_OBS_*-HIMAWARI8-AHI.nc'):
        match = band_re.match(str(image)).groups()
        images['%s_%s' % match] = {
            'path': image.name,
            'layer': 'channel_00' + match[0] + '_scaled_radiance',
        }
    if not images:
        return None
    return get_skeleton(str(path/images['01_2000']['path']), 'OBS', images)


def get_brf_dataset(path):
    band_re = re.compile('.*-P1S-ABOM_BRF_B(.*)-PRJ.*_(500|1000|2000)-HIMAWARI8-AHI.nc')
    images = {}
    for image in path.glob('*-P1S-ABOM_BRF_*-HIMAWARI8-AHI.nc'):
        match = band_re.match(str(image)).groups()
        images['%s_%s' % match] = {
            'path': image.name,
            'layer': 'channel_00' + match[0] + '_brf',
        }
    if not images:
        return None
    #if "_1000-" in str(path/images['01_2000']['path'])
    #print str(path/images['01_2000']['path'])
    return get_skeleton(str(path/images['01_2000']['path']), 'BRF', images)


def prepare_dataset(path):
    
    brf = get_brf_dataset(path)
    if not brf:
        return []
    ang = get_ang_dataset(path)
    obs = get_obs_dataset(path)
    brf['lineage']['source_datasets'] = {ds['id']: ds for ds in [ang, obs] if ds}
    
    return [brf]


def make_datasets(datasets):
    for dataset in datasets:
        
        path = Path(dataset)

        logging.info("Processing %s", path)
        documents = prepare_dataset(path)
        if not documents:
            logging.info("No datasets found in %s", path)
            continue
        
        yield path, documents

def get_geomdict(bands,path):

    images_500 = []
    images_1000 = []
    images_2000 = []
    #print documents[0]['image']['bands'].keys()
    for key in bands.keys():
        #print documents[0]['image']['bands'][key]['path']
        if '_500' in key:
            images_500.append(os.path.join(str(path),str(documents[0]['image']['bands'][key]['path'])))
        if '_1000' in key:
            images_1000.append(os.path.join(str(path),str(documents[0]['image']['bands'][key]['path'])))
        if '_2000' in key:
            images_2000.append(os.path.join(str(path),str(documents[0]['image']['bands'][key]['path'])))
        
    geometry = _to_lists(shapely.geometry.mapping(shapely.ops.unary_union([safe_valid_region(images_500),\
                                                                           safe_valid_region(images_1000),safe_valid_region(images_2000)]))['coordinates'])           
    return geometry
        
def absolutify_paths(doc, path):
    for band in doc['image']['bands'].values():
        band['path'] = str(path/band['path'])
    #print doc
    return doc

def get_skeleton(path, prod, bands):
    image = netCDF4.Dataset(path)
    times = image['time']
    sensing_time = str(netCDF4.num2date(times[0], units=times.units, calendar=times.calendar))
    
    return {
        'id': str(uuid.uuid4()),
        'processing_level': str(image.processing_level),
        'product_type': prod,
        'creation_dt': str(image.date_created),
        'platform': {'code': 'HIMAWARI_8'},
        'instrument': {'name': str(image.instrument)},
        # 'acquisition': {'groundstation': {'code': station}},
        'extent': {
            'coord': get_extent(image),
            'from_dt': sensing_time,
            'to_dt': sensing_time,
            'center_dt': sensing_time
        },
        'format': {'name': 'NETCDF'},
        'grid_spatial': {
            'projection': get_projection(image),
            'valid_data': {
                'coordinates': get_geomdict(bands,path),
                'type': "Polygon"},
        
        },
        'image': {
            'bands': bands
        },
        'lineage': {'source_datasets': {}},
    }

output = None
datasets = ['/media/simonaoliver/datacube/input/H8/2015/12/05/0710/']
for dataset in datasets:
    
    if output:
        docs = (absolutify_paths(doc, path) for path, docs in make_datasets(datasets) for doc in docs)
        with open(output, 'w') as stream:
            yaml.dump_all(docs, stream)
    else:
        for path, docs in make_datasets(datasets):
            yaml_path = str(path.joinpath('agdc-metadata.yaml'))
            logging.info("Writing %s dataset(s) into %s", len(docs), yaml_path)
            with open(yaml_path, 'w') as stream:
                yaml.dump_all(docs, stream)



IndexError: list index out of range

[]