In [None]:
import os
import json
from PIL import Image

import numpy as np
import pandas as pd

import rasterio
from rasterio import warp
from rasterio.mask import mask

In [None]:
# Utility functions
Path     = os.path.join
DataPath = lambda path: os.path.join(DATA_DIR, path)

def mkdir_p(directory):
    if not os.path.exists(directory):
        os.mkdir(directory)

In [None]:
# The reference for lat/long is epsg:4326
# This is the format that all of the coordinates in geojson files is
src_crs = rasterio.crs.CRS.from_string('EPSG:4326')

# Create the base data directory
DATA_DIR = os.path.join(os.getcwd(), "data")

# Create directories for verified and un-verified data
TEST_DIR       = DataPath("test")
VERIFIED_DIR   = DataPath("verified")
UNVERIFIED_DIR = DataPath("unverified")

mkdir_p(TEST_DIR)
mkdir_p(DATA_DIR)
mkdir_p(VERIFIED_DIR)
mkdir_p(UNVERIFIED_DIR)

print(f"Data Directory  : {DATA_DIR}")
print(f"Verified Data   : {VERIFIED_DIR}")
print(f"Unverified Data : {UNVERIFIED_DIR}")
print(f"Test Directory  : {TEST_DIR}")

In [None]:
# Now we need to fill those directories with possible classes directories
train_labels = pd.read_csv(DataPath("train_labels.csv"))

for roof_type in train_labels.columns[2:]:
    mkdir_p(Path(VERIFIED_DIR, roof_type))
    mkdir_p(Path(UNVERIFIED_DIR, roof_type))

# Training data

In [None]:
# Now, we need to go through every single load of data
# And put it in the right place
metadata = pd.read_csv(DataPath('metadata.csv'))
for i, row in metadata.iterrows():
    img_path     = row['image']
    geojson_path = row['train']
    
    with rasterio.open(DataPath(img_path)) as tiff:
        with open(DataPath(geojson_path)) as gjfile:
            feature_collection = json.load(gjfile)
            for feature in feature_collection['features']:
                # Feature fields we'll need
                fid      = feature['id']
                verified =  'verified' if feature['properties']['verified'] else 'unverified'
                material = feature['properties']['roof_material']
                geom     = feature['geometry']

                # Warp to the right reference 
                warped_geom = warp.transform_geom(src_crs, tiff.crs, geom, precision=16)

                # Now we want to crop out the image itself
                out_image, out_transform = mask(tiff, [warped_geom], crop=True)
                out_meta = tiff.meta.copy()

                # Update the metadata for the image
                out_meta.update({"driver": "GTiff",
                                 "height": out_image.shape[1],
                                 "width":  out_image.shape[2],
                                 "transform": out_transform})

                # Write it to its own file
                with rasterio.open(Path(DATA_DIR, verified, material, f"{fid}.tif"), "w", **out_meta) as dest:
                    dest.write(out_image)

# Testing data

In [None]:
# Now, we need to go through every single load of data
# And put it in the right place
metadata = pd.read_csv(DataPath('metadata.csv'))
for i, row in metadata.iterrows():
    img_path     = row['image']
    geojson_path = row['test']
    
    # Check if geojson_path is nan
    if geojson_path != geojson_path: continue

    with rasterio.open(DataPath(img_path)) as tiff:
        with open(DataPath(geojson_path)) as gjfile:
            feature_collection = json.load(gjfile)
            
            for feature in feature_collection['features']:
                # Feature fields we'll need
                fid      = feature['id']
                geom     = feature['geometry']

                # Warp to the right reference 
                warped_geom = warp.transform_geom(src_crs, tiff.crs, geom, precision=16)

                # Now we want to crop out the image itself
                out_image, out_transform = mask(tiff, [warped_geom], crop=True)
                out_meta = tiff.meta.copy()

                # Update the metadata for the image
                out_meta.update({"driver": "GTiff",
                                 "height": out_image.shape[1],
                                 "width":  out_image.shape[2],
                                 "transform": out_transform})

                # Write it to its own file
                with rasterio.open(Path(DATA_DIR, "test", f"{fid}.tif"), "w", **out_meta) as dest:
                    dest.write(out_image)