In [1]:
import os
import rasterio
import shapely
import pyproj
import threading
import pandas as pd
import geopandas as gpd
from collections import namedtuple, defaultdict, OrderedDict

In [2]:
cache_folders = {
    os.path.split(root)[-1]: root
    for root, dirs, files in os.walk('data')
}

Directories = namedtuple('Directories', cache_folders.keys())
dirs = Directories(**cache_folders)

print(dirs)

Directories(data='data', auxiliary='data/auxiliary', core='data/core', gl30='data/core/gl30', masks='data/auxiliary/masks', gfc='data/core/gfc', gc='data/core/gc')


# GFC mask

In [3]:
gfc_files = sorted(os.listdir(dirs.gfc))
sub = int(len(gfc_files) / 3)
gain, lossyear, treecover = gfc_files[0:sub], gfc_files[sub:2*sub], gfc_files[2*sub:3*sub]
gfc_files = list(zip(gain, lossyear, treecover))

In [32]:
class OrderedDefaultListDict(OrderedDict):
    # SOURCE https://stackoverflow.com/questions/6190331/can-i-do-an-ordered-default-dict-in-python#6190500
    def __missing__(self, key):
        self[key] = value = []
        return value

    
def get_raster_meta(path: str)-> tuple:
    Meta = namedtuple('Meta', 'bounds crs') 
    
    with rasterio.open(path, 'r') as src:
        bounds = src.bounds
        crs = src.crs
        
    return Meta(bounds=bounds, crs=crs)


def bounds_to_polygon(bounds: tuple)-> shapely.geometry.Polygon:
    x_points = ['left', 'left', 'right', 'right']
    y_points = ['top', 'bottom', 'bottom', 'top']
    
    polygon_bounds = [
        (bounds.__getattribute__(x), bounds.__getattribute__(y))
        for x, y in zip(x_points, y_points)
    ]  
    
    return shapely.geometry.Polygon(polygon_bounds)


def reproject_bounds(bounds: tuple, source_crs: dict, target_crs: dict)-> tuple:
    BoundingBox = namedtuple('BoundingBox', 'left bottom right top')
    p1 = pyproj.Proj(**source_crs)
    p2 = pyproj.Proj(**target_crs)
    left, bottom = pyproj.transform(p1, p2, bounds.left, bounds.bottom)
    right, top = pyproj.transform(p1, p2, bounds.right, bounds.top)
    return BoundingBox(left, bottom, right, top)


def make_properties_table(raster_files)->pd.DataFrame:
    # if raster_files contains tuples or lists of different size sort them in ascending order
    tmp = OrderedDefaultListDict()
    column_name = 'prop'
   
    for item in raster_files:
        if isinstance(item, str):
            tmp[column_name + '1'].append(item)
            # implement fill with Nan if more then one key in dictionary
        else:
            for idx, value in enumerate(item):
                tmp[column_name + str(idx)].append(value)

    return pd.DataFrame(tmp)


def tile_index(path_to: str, raster_files: list, target_crs: dict)->gpd.GeoDataFrame:
    # REFACTOR put raster_files to args and properties should be provided as a extra list
    polygons = []
    
    for item in raster_files:
        if isinstance(item, str):
            raster = os.path.join(path_to, item)
        else:
            raster = os.path.join(path_to, item[0])           
        bounds, crs = get_raster_meta(raster)        
        if crs != target_crs:
            bounds = reproject_bounds(bounds, crs, target_crs)
        polygons.append(bounds_to_polygon(bounds))
    
    properties = make_properties_table(raster_files)
    geometry = gpd.GeoSeries(polygons)
    layer = gpd.GeoDataFrame(properties, geometry=geometry)
    layer.crs = target_crs    
    return layer


layer = tile_index(dirs.gfc, gfc_files, {'init': 'epsg:4326'})
layer.to_file(os.path.join(dirs.masks, 'gfc_mask.shp'))

# GL30 mask

In [6]:
gl30_files = sorted(os.listdir(dirs.gl30))

In [34]:
layer = tile_index(dirs.gl30, gl30_files, {'init': 'epsg:4326'})
layer.to_file(os.path.join(dirs.data, 'gfc_mask.shp'))

edge tiles have coordinate system issues -> x/long coords overflow bounding box of applied coordinate system

In [50]:
BoundingBox = namedtuple('BoundingBox', 'left bottom right top')
meta = get_raster_meta(os.path.join(dirs.gl30, 'n01_00_2010lc030.tif'))
meta2 = get_raster_meta(os.path.join(dirs.gl30, 'n02_15_2010lc030.tif'))
tmp = BoundingBox(203394.629525, meta.bounds.bottom, meta.bounds.right, meta.bounds.top)
bounds = reproject_bounds(tmp, meta.crs, {'init': 'epsg:4326'})
bounds2 = reproject_bounds(meta2.bounds, meta2.crs, {'init': 'epsg:4326'})
print(meta)
print(tmp)
print(bounds)
print(meta2)
print(bounds2)

Meta(bounds=BoundingBox(left=165406.4430837062, bottom=-601.0131174263079, right=834586.4430837199, top=554038.9868832752), crs=CRS({'init': 'epsg:32601'}))
BoundingBox(left=203394.629525, bottom=-601.0131174263079, right=834586.4430837199, top=554038.9868832752)
BoundingBox(left=-179.6645501420702, bottom=-0.005431635641659461, right=-173.9831051386183, top=5.005478418984218)
Meta(bounds=BoundingBox(left=176734.03821238442, bottom=1657719.0263254964, right=823264.038233795, top=2214909.026326404), crs=CRS({'init': 'epsg:32602'}))
BoundingBox(left=-174.0053601744084, bottom=14.974692376549765, right=-167.91075246245728, top=20.00401663536249)
