In [14]:
import os
import re
import fiona
import rasterio
import shapely
import threading
import pyproj
import pandas as pd
import geopandas as gpd
from collections import namedtuple, defaultdict

In [2]:
cache_folders = {
    os.path.split(root)[-1]: root
    for root, dirs, files in os.walk('data')
}

Directories = namedtuple('Directories', cache_folders.keys())
dirs = Directories(**cache_folders)

print(dirs)

Directories(data='data', core='data/core', gc='data/core/gc', masks='data/masks', gfc='data/core/gfc', auxiliary='data/auxiliary', gl30='data/core/gl30')


# GFC mask

In [3]:
regex = re.compile(r'(?P<id>\d{2}(?:N|S)_\d{3}(?:W|E))', re.VERBOSE | re.IGNORECASE)

gfc_files = defaultdict(list)
for item in os.listdir(dirs.gfc):
    match = regex.search(item).group('id')
    gfc_files[match].append(item)
    if len(gfc_files[match]) == 3:
        gfc_files[match] = sorted(gfc_files[match])

In [53]:
def get_raster_meta(path):
    with rasterio.open(path, 'r') as src:
        bounds = src.bounds
        crs = src.crs
    return crs, bounds


def bounds_to_polygon(bounds):
    x_points = ['left', 'left', 'right', 'right']
    y_points = ['top', 'bottom', 'bottom', 'top']

    polygon_bounds = [
        (bounds.__getattribute__(x), bounds.__getattribute__(y))
        for x, y in zip(x_points, y_points)
    ]
    return shapely.geometry.Polygon(polygon_bounds)


def reproject_bounds(bounds, crs):
    pass


properties = {'id': [], 'gain': [], 'loss': [], 'cover': []}
polygons = []
for key, value in gfc_files.items():
    crs, bounds = get_raster_meta(dirs.gfc + os.sep + value[0])
    polygons.append(bounds_to_polygon(bounds))
    properties['id'].append(key)
    properties['gain'].append(value[0])
    properties['loss'].append(value[1])
    properties['cover'].append(value[2])

df = pd.DataFrame(properties)
gs = gpd.GeoSeries(polygons)
layer = gpd.GeoDataFrame(df, geometry=gs)
layer.crs = {'init': 'epsg:4326'}
layer.to_file('data/test.shp')