In [1]:
import os
import sys
import xml.etree.ElementTree as ET
from shapely.geometry import Polygon
import numpy as np
import re

In [2]:
def granule_identifier_to_xml_name(granule_identifier):
    '''
    Very ugly way to convert the granule identifier.
    e.g.
    From
    Granule Identifier:
    S2A_OPER_MSI_L1C_TL_SGS__20150817T131818_A000792_T28QBG_N01.03
    To
    Granule Metadata XML name:
    S2A_OPER_MTD_L1C_TL_SGS__20150817T131818_A000792_T28QCJ.xml
    '''
    # Replace "MSI" with "MTD".
    changed_item_type = re.sub("_MSI_", "_MTD_", granule_identifier)
    # Split string up by underscores.
    split_by_underscores = changed_item_type.split("_")
    del split_by_underscores[-1]
    cleaned = str()
    # Stitch string list together, adding the previously removed underscores.
    for i in split_by_underscores:
        cleaned += (i + "_")
    # Remove last underscore and append XML file extension.
    out_xml = cleaned[:-1] + ".xml"

    return out_xml


def get_granule_xml_path(granule_path, granule_identifier):
    '''
    Determines the metadata path by joining the granule path with the XML path.
    '''
    xml_name = granule_identifier_to_xml_name(granule_identifier)
    metadata_path = os.path.join(granule_path, xml_name)
    try:
        assert os.path.exists(metadata_path)
    except Exception:
        print "Granule metadata XML does not exist:", metadata_path
        raise

    return metadata_path


class Granule(object):
    '''
    This object contains relevant metadata from a granule.
    '''
    def __init__(self, granule, dataset):
        granules_path = os.path.join(dataset.path, "GRANULE")
        self.granule_identifier = granule.attrib["granuleIdentifier"]
        self.granule_path = os.path.join(granules_path, self.granule_identifier)
        self.datastrip_identifier = granule.attrib["datastripIdentifier"]
        self.metadata_path = get_granule_xml_path(
            self.granule_path,
            self.granule_identifier
            )
        metadata = ET.parse(self.metadata_path)
        self.footprint = get_footprint(metadata)


class SentinelDataSet(object):
    '''
    This object contains relevant metadata from the SAFE file and its containing
    granules as Granule() object.
    '''
    def __init__(self, path):
        self.path = os.path.normpath(path)

        # Find manifest.safe.
        manifest_safe = os.path.join(self.path, "manifest.safe")
        try:
            assert os.path.isfile(manifest_safe)
        except AssertionError:
            error = "manifest.safe not found: %s" %(manifest_safe)
            sys.exit(error)
        # Read manifest.safe.
        self.product_metadata_path = get_product_metadata_path(
            manifest_safe,
            self.path
            )
        # Read product metadata XML.
        product_metadata = ET.parse(self.product_metadata_path)
        # Get timestamps.
        (
        self.product_start_time,
        self.product_stop_time,
        self.generation_time
        ) = get_timestamps(product_metadata)
        # Read processing level (e.g. Level-1C)
        self.processing_level = get_processing_level(product_metadata)
        # Get product Footprint
        self.footprint = get_footprint(product_metadata)
        # Read granule info.
        self.granules = get_granules(product_metadata, self)


def get_processing_level(product_metadata):
    '''
    Finds and returns the "Processing Level".
    '''
    for element in product_metadata.iter("Product_Info"):
        processing_level = element.find("PROCESSING_LEVEL").text
    return processing_level


def get_timestamps(product_metadata):
    '''
    Finds and returns the "Product Start Time", "Product Stop Time" and
    "Generation Time".
    '''
    for element in product_metadata.iter("Product_Info"):
        # Read timestamps.
        product_start_time = element.find("PRODUCT_START_TIME").text
        product_stop_time = element.find("PRODUCT_STOP_TIME").text
        generation_time = element.find("GENERATION_TIME").text
    return product_start_time, product_stop_time, generation_time


def get_granules(product_metadata, self):
    '''
    Finds granules information and returns a list of Granule objects.
    '''
    for element in product_metadata.iter("Product_Info"):
        product_organisation = element.find("Product_Organisation")
    granules = [
        Granule(_id.find("Granules"), self)
        for _id in product_organisation.findall("Granule_List")
        ]
    return granules


def is_granule_metadata(metadata):
    '''
    If metadata XML has element "TILE_ID", it is assumed that it is the granule
    metadata file.
    '''
    # A bit hacky in checking whether there is a TILE_ID element.
    if sum(1 for i in metadata.iter("TILE_ID")) == 0:
        return False
    else:
        return True


def get_footprint(metadata):
    '''
    Finds the footprint coordinates and returns them as a shapely
    polygon.
    '''
    # Check whether product or granule footprint needs to be calculated.
    if is_granule_metadata(metadata):
        footprint = footprint_from_geocoding(metadata)
    else:
        product_footprint = metadata.iter("Product_Footprint")
        # I don't know why two "Product_Footprint" items are found.
        for element in product_footprint:
            global_footprint = None
            for global_footprint in element.iter("Global_Footprint"):
                coords = global_footprint.find("EXT_POS_LIST").text.split()
                footprint = footprint_from_coords(coords)
    try:
        assert footprint.is_valid
    except Exception:
        from shapely.validation import explain_validity
        print "No valid footprint could be determined."
        print explain_validity(footprint)
        raise
    return footprint


def footprint_from_geocoding(metadata):
    tile_geocoding = metadata.iter("Tile_Geocoding").next()
    tile_epsg = tile_geocoding.findall("HORIZONTAL_CS_CODE")[0].text
    resolution = 10
    searchstring = ".//*[@resolution='%s']" %(resolution)
    size, geoposition = tile_geocoding.findall(searchstring)
    nrows, ncols = (int(i.text) for i in size)
    ulx, uly, xdim, ydim = (int(i.text) for i in geoposition)
    lrx = ulx + nrows * resolution
    lry = uly - ncols * resolution
    left = ulx
    right = lrx
    top = uly
    bottom = lry
    points = [
        (left, top),
        (right, top),
        (right, bottom),
        (left, bottom),
        (left, top)
        ]
    utm_footprint = Polygon(points)
    from functools import partial
    import pyproj
    from shapely.ops import transform
    project = partial(
        pyproj.transform,
        pyproj.Proj(init=tile_epsg),
        pyproj.Proj(init='EPSG:4326')
        )
    footprint = transform(project, utm_footprint)
    return footprint


def footprint_from_coords(coords):
    '''
    Convert list of alterating latitude / longitude coordinates and returns it
    as a shapely Polygon.
    '''
    number_of_points = len(coords)/2
    coords_as_array = np.array(coords)
    reshaped = coords_as_array.reshape(number_of_points, 2)
    points = [
        (float(i[1]), float(i[0]))
        for i in reshaped.tolist()
        ]
    footprint = Polygon(points)
    try:
        assert footprint.is_valid
    except Exception:
        print "Footprint is not valid."
        raise
    return footprint


def get_product_metadata_path(manifest_safe, basepath):
    '''
    Returns path to product metadata XML file.
    '''
    manifest = ET.parse(manifest_safe)
    data_object_section = manifest.find("dataObjectSection")
    for data_object in data_object_section:
        # Find product metadata XML.
        if data_object.attrib.get("ID") == "S2_Level-1C_Product_Metadata":
            relpath = data_object.iter("fileLocation").next().attrib["href"]
            abspath = os.path.join(basepath, relpath)
            product_metadata_path = abspath
            try:
                assert os.path.isfile(product_metadata_path)
            except AssertionError:
                print "S2_Level-1C_product_metadata_path not found: %s" %(
                    product_metadata_path)
                raise
            return product_metadata_path

In [32]:
def printS2(input_SAFE):

    dataset = SentinelDataSet(input_SAFE)

    # Paths
    print "dataset.path:", dataset.path
    print "dataset.product_metadata_path:", dataset.product_metadata_path
    print '\n'

    # Timestamps
    print "dataset.product_start_time:", dataset.product_start_time
    print "dataset.product_stop_time:", dataset.product_stop_time
    print "dataset.generation_time:", dataset.generation_time
    print '\n'
    
    # Processing level
    print "dataset.processing_level:", dataset.processing_level
    print '\n'
    
    # Footprint
    print "dataset.footprint:", dataset.footprint
    print '\n'

    # Granules
    print len(dataset.granules), "granule(s))"
    x = 1
    for granule in dataset.granules:
        print "granule", x
        print "granule.datastrip_identifier", granule.datastrip_identifier
        print "granule.granule_identifier", granule.granule_identifier
        print granule.footprint
        assert dataset.footprint.intersects(granule.footprint)
        x += 1

In [33]:
pn = '/media/SOLabNFS2/tmp/sentinel-2_test/'
fn = 'S2A_OPER_PRD_MSIL1C_PDMC_20160108T151349_R135_V20160108T082023_20160108T082023.SAFE/'

In [35]:
dataset = SentinelDataSet(pn+fn)

In [34]:
printS2(pn+fn)

dataset.path: /media/SOLabNFS2/tmp/sentinel-2_test/S2A_OPER_PRD_MSIL1C_PDMC_20160108T151349_R135_V20160108T082023_20160108T082023.SAFE
dataset.product_metadata_path: /media/SOLabNFS2/tmp/sentinel-2_test/S2A_OPER_PRD_MSIL1C_PDMC_20160108T151349_R135_V20160108T082023_20160108T082023.SAFE/./S2A_OPER_MTD_SAFL1C_PDMC_20160108T151349_R135_V20160108T082023_20160108T082023.xml


dataset.product_start_time: 2016-01-08T08:20:23.173Z
dataset.product_stop_time: 2016-01-08T08:20:23.173Z
dataset.generation_time: 2016-01-08T15:13:49.000668Z


dataset.processing_level: Level-1C


dataset.footprint: POLYGON ((23.66572827693663 -36.14156270149769, 23.62480741769897 -37.08613548526965, 24.74904798397539 -37.11230540027556, 24.74903290034776 -37.11282730461503, 24.80401952948014 -37.11358502024014, 24.85903336989541 -37.11486562473975, 24.85904772807312 -37.11434330869233, 25.87377688433556 -37.12832627417114, 25.87376928330126 -37.12885182959283, 25.92914264529567 -37.12908921423198, 25.98452935378108 -3