In [24]:
# coding=utf-8
"""
Ingest data from the command-line.
"""
from __future__ import absolute_import

import uuid
import logging
from xml.etree import ElementTree
from pathlib import Path
import yaml
import click
from osgeo import osr
import os
# image boundary imports
import rasterio
from rasterio.errors import RasterioIOError
import rasterio.features
import shapely.affinity
import shapely.geometry
import shapely.ops

###IMAGE BOUNDARY CODE

def safe_valid_region(images, mask_value=None):
    try:
        return valid_region(images, mask_value)
    except (OSError, RasterioIOError):
        return None


def valid_region(images, mask_value=None):
    mask = None
    for fname in images:
        ## ensure formats match
        with rasterio.open(str(fname), 'r') as ds:
            transform = ds.affine
            img = ds.read(1)

            if mask_value is not None:
                new_mask = img & mask_value == mask_value
            else:
                new_mask = img != ds.nodata
                #new_mask = img != 0
            if mask is None:
                mask = new_mask
            else:
                mask |= new_mask

    shapes = rasterio.features.shapes(mask.astype('uint8'), mask=mask)
    shape = shapely.ops.unary_union([shapely.geometry.shape(shape) for shape, val in shapes if val == 1])
    type(shapes)
    # convex hull
    geom = shape.convex_hull

    # buffer by 1 pixel
    geom = geom.buffer(1, join_style=3, cap_style=3)

    # simplify with 1 pixel radius
    geom = geom.simplify(1)

    # intersect with image bounding box
    geom = geom.intersection(shapely.geometry.box(0, 0, mask.shape[1], mask.shape[0]))

    # transform from pixel space into CRS space
    geom = shapely.affinity.affine_transform(geom, (transform.a, transform.b, transform.d,
                                                    transform.e, transform.xoff, transform.yoff))

    output = shapely.geometry.mapping(geom)
    print output
    return geom
    #output['coordinates'] = _to_lists(output['coordinates'])
    #return output


def _to_lists(x):
    """
    Returns lists of lists when given tuples of tuples
    """
    if isinstance(x, tuple):
        return [_to_lists(el) for el in x]

    return x

def get_geo_ref_points(root):
    nrows = int(root.findall('./*/Tile_Geocoding/Size[@resolution="10"]/NROWS')[0].text)
    ncols = int(root.findall('./*/Tile_Geocoding/Size[@resolution="10"]/NCOLS')[0].text)

    ulx = int(root.findall('./*/Tile_Geocoding/Geoposition[@resolution="10"]/ULX')[0].text)
    uly = int(root.findall('./*/Tile_Geocoding/Geoposition[@resolution="10"]/ULY')[0].text)

    xdim = int(root.findall('./*/Tile_Geocoding/Geoposition[@resolution="10"]/XDIM')[0].text)
    ydim = int(root.findall('./*/Tile_Geocoding/Geoposition[@resolution="10"]/YDIM')[0].text)

    return {
        'ul': {'x': ulx, 'y': uly},
        'ur': {'x': ulx + ncols * abs(xdim), 'y': uly},
        'll': {'x': ulx, 'y': uly - nrows * abs(ydim)},
        'lr': {'x': ulx + ncols * abs(xdim), 'y': uly - nrows * abs(ydim)},
    }


def get_coords(geo_ref_points, spatial_ref):
    t = osr.CoordinateTransformation(spatial_ref, spatial_ref.CloneGeogCS())

    def transform(p):
        lon, lat, z = t.TransformPoint(p['x'], p['y'])
        return {'lon': lon, 'lat': lat}
    return {key: transform(p) for key, p in geo_ref_points.items()}


def prepare_dataset(path):
    
    root = ElementTree.parse(str(path)).getroot()

    level = root.findall('./*/Product_Info/PROCESSING_LEVEL')[0].text
    product_type = root.findall('./*/Product_Info/PRODUCT_TYPE')[0].text
    ct_time = root.findall('./*/Product_Info/GENERATION_TIME')[0].text

    granules = {granule.get('granuleIdentifier'): [imid.text for imid in granule.findall('IMAGE_ID')] for granule in
                root.findall('./*/Product_Info/Product_Organisation/Granule_List/Granules')}

    documents = []
    for granule_id, images in granules.items():
        images_ten_list = []
        images_twenty_list = []
        images_sixty_list = []   
        gran_path = str(path.parent.joinpath('GRANULE', granule_id, granule_id[:-7].replace('MSI', 'MTD')+'.xml'))
        root = ElementTree.parse(gran_path).getroot()
        sensing_time = root.findall('./*/SENSING_TIME')[0].text
        
        img_data_path = str(path.parent.joinpath('GRANULE', granule_id,'IMG_DATA'))
        for image in images:
            ten_list = ['B02','B03','B04','B08']
            twenty_list=['B05','B06','B07','B11','B12','B8A']
            sixty_list=['B01','B09','B10']

            for item in ten_list:
                if item in image:
                    images_ten_list.append(os.path.join(img_data_path,image+".jp2"))
            for item in twenty_list:
                if item in image:
                    images_twenty_list.append(os.path.join(img_data_path,image+".jp2"))
            for item in sixty_list:
                if item in image:
                    images_sixty_list.append(os.path.join(img_data_path,image+".jp2"))
        print "TEN",images_ten_list
        print "TWENTY",images_twenty_list
        print "SIXY", images_sixty_list
            #if 'B01' in image:
            #    images_list.append(os.path.join(img_data_path,image+".jp2"))
        
        station = root.findall('./*/Archiving_Info/ARCHIVING_CENTRE')[0].text

        cs_code = root.findall('./*/Tile_Geocoding/HORIZONTAL_CS_CODE')[0].text
        spatial_ref = osr.SpatialReference()
        spatial_ref.SetFromUserInput(cs_code)

        geo_ref_points = get_geo_ref_points(root)

        documents.append({
            'id': str(uuid.uuid4()),
            'processing_level': level.replace('Level-', 'L'),
            'product_type': product_type,
            'creation_dt': ct_time,
            'platform': {'code': 'SENTINEL_2A'},
            'instrument': {'name': 'MSI'},
            'acquisition': {'groundstation': {'code': station}},
            'extent': {
                'from_dt': sensing_time,
                'to_dt': sensing_time,
                'center_dt': sensing_time,
                'coord': get_coords(geo_ref_points, spatial_ref),
            },
            'format': {'name': 'JPEG2000'},
            'grid_spatial': {
                'projection': {
                    'geo_ref_points': geo_ref_points,
                    'spatial_reference': spatial_ref.ExportToWkt(),
                }
            },
            'image': {
                'bands': {
                    image[-2:]: {
                        'path': str(Path('GRANULE', granule_id, 'IMG_DATA', image + '.jp2')),
                        'layer': 1,
                    } for image in images
                }
            },
            'valid_data': _to_lists(shapely.geometry.mapping(shapely.ops.unary_union([safe_valid_region(images_sixty_list),\
                                                  safe_valid_region(images_ten_list),\
                                                  safe_valid_region(images_twenty_list)]))['coordinates']),              
            'lineage': {'source_datasets': {}},
        })
    return documents


datasets = ['/home/simonaoliver/data/S2A_OPER_PRD_MSIL1C_PDMC_20151205T075742_R073_V20151205T001643_20151205T001643.SAFE']
for dataset in datasets:
    path = Path(dataset)

    if path.is_dir():
        path = Path(path.joinpath(path.stem.replace('PRD_MSIL1C', 'MTD_SAFL1C')+'.xml'))
    if path.suffix != '.xml':
        raise RuntimeError('want xml')

    logging.info("Processing %s", path)
    documents = prepare_dataset(path)
    if documents:
        yaml_path = str(path.parent.joinpath('agdc-metadata.yaml'))
        logging.info("Writing %s dataset(s) into %s", len(documents), yaml_path)
        with open(yaml_path, 'w') as stream:
            yaml.dump_all(documents, stream)
    else:
        logging.info("No datasets discovered. Bye!")

'''
@click.command(help="Prepare Sentinel 2 dataset for ingestion into the Data Cube.")
@click.argument('datasets',
                type=click.Path(exists=True, readable=True, writable=True),
                nargs=-1)
def main(datasets):
    logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', level=logging.INFO)

    for dataset in datasets:
        path = Path(dataset)

        if path.is_dir():
            path = Path(path.joinpath(path.stem.replace('PRD_MSIL1C', 'MTD_SAFL1C')+'.xml'))
        if path.suffix != '.xml':
            raise RuntimeError('want xml')

        logging.info("Processing %s", path)
        documents = prepare_dataset(path)
        if documents:
            yaml_path = str(path.parent.joinpath('agdc-metadata.yaml'))
            logging.info("Writing %s dataset(s) into %s", len(documents), yaml_path)
            with open(yaml_path, 'w') as stream:
                yaml.dump_all(documents, stream)
        else:
            logging.info("No datasets discovered. Bye!")


if __name__ == "__main__":
    main()
'''

TEN ['/home/simonaoliver/data/S2A_OPER_PRD_MSIL1C_PDMC_20151205T075742_R073_V20151205T001643_20151205T001643.SAFE/GRANULE/S2A_OPER_MSI_L1C_TL_SGS__20151205T065551_A002358_T55HFS_N02.00/IMG_DATA/S2A_OPER_MSI_L1C_TL_SGS__20151205T065551_A002358_T55HFS_B02.jp2', '/home/simonaoliver/data/S2A_OPER_PRD_MSIL1C_PDMC_20151205T075742_R073_V20151205T001643_20151205T001643.SAFE/GRANULE/S2A_OPER_MSI_L1C_TL_SGS__20151205T065551_A002358_T55HFS_N02.00/IMG_DATA/S2A_OPER_MSI_L1C_TL_SGS__20151205T065551_A002358_T55HFS_B04.jp2', '/home/simonaoliver/data/S2A_OPER_PRD_MSIL1C_PDMC_20151205T075742_R073_V20151205T001643_20151205T001643.SAFE/GRANULE/S2A_OPER_MSI_L1C_TL_SGS__20151205T065551_A002358_T55HFS_N02.00/IMG_DATA/S2A_OPER_MSI_L1C_TL_SGS__20151205T065551_A002358_T55HFS_B03.jp2', '/home/simonaoliver/data/S2A_OPER_PRD_MSIL1C_PDMC_20151205T075742_R073_V20151205T001643_20151205T001643.SAFE/GRANULE/S2A_OPER_MSI_L1C_TL_SGS__20151205T065551_A002358_T55HFS_N02.00/IMG_DATA/S2A_OPER_MSI_L1C_TL_SGS__20151205T065551_



AttributeError: 'bool' object has no attribute 'astype'

In [None]:
import folium