This script allows to filter rasters to test some result and functions.

- based on pixel values so we can visualize the extrem pixels and look out for anomaly
- to test the mask function

In [9]:
import os, sys
import logging, logging.config
import yaml
import glob
from joblib import Parallel, delayed
from tqdm import tqdm

import pandas as pd
import geopandas as gpd
from shapely.geometry import shape
from shapely.geometry import mapping

import rasterio
from rasterio.mask import mask
from rasterio.features import shapes

import numpy as np

import fct_misc

from helpers import XYZ


In [13]:
TILES_DIR='/mnt/data-01/gsalamin/proj-roadsurf-b/02_Data/processed/obj_detector/all-images'
ROADS='/mnt/data-01/gsalamin/proj-roadsurf-b/02_Data/processed/shapefiles_gpkg/roads_polygons.shp'
TILES_INFO='/mnt/data-01/gsalamin/proj-roadsurf-b/02_Data/processed/json/tiles_aoi.geojson'

roads=gpd.read_file(ROADS)
tiles_info=gpd.read_file(TILES_INFO)

# Testing extrem values
## Making polygons on the zones to check

In [None]:
files=glob.glob(TILES_DIR+'/*.tif')
print(files[:2])

In [None]:
geom=[]
bands=[]
pixel_values=[]

for file in tqdm(files, desc='Checking files'):
    for band in range(1,5):
        with rasterio.open(file) as f:
            image = f.read(band)

            lim_sup=200
            lim_inf=1

            # create a binary image, 0 where there's nodata, 1 where it's valid
            is_valid = ((image < lim_inf) | (image > lim_sup)).astype(np.uint8)

            
            # vectorize the binary image, supplying the transform so it returns maps coords
            for coords, value in shapes(is_valid, transform=f.transform):

                # ignore polygons corresponding to nodata
                if value != 0:
                    # convert geojson to shapely geometry
                    geom.append(shape(coords))
                    bands.append(band)
                    pixel_values.append(value)

fid=[x for x in range(1, len(geom)+1)]
zones_dict={'fid':fid, 'band':bands, 'pixel_value':pixel_values, 'geometry': geom}


In [None]:
extrem_zones=gpd.GeoDataFrame(zones_dict, crs='EPSG:3857')

In [None]:
roads_reproject=roads.to_crs(epsg=3857)

misc_fct.test_crs(roads_reproject.crs, extrem_zones.crs)

extrem_zones_on_roads=gpd.overlay(extrem_zones,roads_reproject[['OBJECTID', 'geometry']])

In [None]:
extrem_zones_on_roads.shape

In [None]:
extrem_zones_on_roads.drop_duplicates(subset=['fid'], inplace=True, ignore_index=True)

extrem_zones_on_roads.to_file('/mnt/data-01/gsalamin/proj-roadsurf-b/02_Data/processed/shapefiles_gpkg/test_extrem_pixels.shp')

In [None]:
extrem_zones_on_roads.shape

## Downloading tiles for the zones to check

In [None]:
with open('config.yaml') as fp:
    cfg = yaml.load(fp, Loader=yaml.FullLoader)['generate_tilesets.py']

OUTPUT_DIR = '/mnt/data-01/gsalamin/proj-roadsurf-b/02_Data/processed/images'

ORTHO_WS_TYPE = cfg['datasets']['orthophotos_web_service']['type']
ORTHO_WS_URL = cfg['datasets']['orthophotos_web_service']['url']
ORTHO_WS_SRS = cfg['datasets']['orthophotos_web_service']['srs']
if 'layers' in cfg['datasets']['orthophotos_web_service'].keys():
    ORTHO_WS_LAYERS = cfg['datasets']['orthophotos_web_service']['layers']
if 'parameters' in cfg['datasets']['orthophotos_web_service'].keys():
    ORTHO_WS_PARAMETERS=cfg['datasets']['orthophotos_web_service']['parameters']
else:
    ORTHO_WS_PARAMETERS={}

SAVE_METADATA = True
OVERWRITE = cfg['overwrite']
TILE_SIZE = cfg['tile_size']

ALL_IMG_PATH = os.path.join(OUTPUT_DIR, f"test")
if not os.path.exists(ALL_IMG_PATH):
        os.makedirs(ALL_IMG_PATH)

In [None]:
tiles_info_reproj=tiles_info.to_crs(crs=3857)

misc_fct.test_crs(tiles_info_reproj.crs, extrem_zones_on_roads.crs)

tiles_info_on_zones=gpd.overlay(tiles_info_reproj, extrem_zones_on_roads[['fid','geometry']])

In [None]:
tiles_info_on_zones.drop_duplicates(subset=['id'], inplace=True, ignore_index=True)

In [None]:
job_dict = XYZ.get_job_dict(
    tiles_gdf=tiles_info_on_zones.to_crs(ORTHO_WS_SRS), # <- note the reprojection
    XYZ_url=ORTHO_WS_URL, 
    img_path=ALL_IMG_PATH, 
    save_metadata=SAVE_METADATA,
    overwrite=OVERWRITE
)

image_getter = XYZ.get_geotiff

In [None]:
import warnings

with warnings.catch_warnings(record=True):
    N_JOBS=10
    job_outcome = Parallel(n_jobs=N_JOBS, backend="loky")(
                delayed(image_getter)(**v) for k, v in tqdm( sorted(list(job_dict.items())) )
        )

    all_tiles_were_downloaded = True
    for job in job_dict.keys():
        if not os.path.isfile(job) or not os.path.isfile(job.replace('.tif', '.json')):
            all_tiles_were_downloaded = False
            print('Failed task: ', job)

    if all_tiles_were_downloaded:
        print("...done.")
    else:
        print("Some tiles were not downloaded. Please try to run this script again.")
        sys.exit(1)

# Testing the mask 

In [14]:
if roads[roads.is_valid==False].shape[0]!=0:
        print(f"There are {roads[roads.is_valid==False].shape[0]} invalid geometries for the roads.")
        sys.exit(1)          

simplified_roads=roads.drop(columns=['ERSTELLUNG', 'ERSTELLU_1', 'HERKUNFT', 'HERKUNFT_J', 'HERKUNFT_M',
        'KUNSTBAUTE', 'WANDERWEGE', 'VERKEHRSBE', 'BEFAHRBARK', 'EROEFFNUNG', 'STUFE', 'RICHTUNGSG',
        'KREISEL', 'EIGENTUEME', 'VERKEHRS_1', 'NAME', 'TLM_STRASS', 'STRASSENNA', 'SHAPE_Leng'])

roads_reproj=simplified_roads.to_crs(epsg=3857)
tiles_info_reproj=tiles_info.to_crs(epsg=3857)

fp_list=[]
for tile_idx in tiles_info_reproj['id'].values:
        # Get the name of the tiles
        x, y, z = tile_idx.lstrip('(,)').rstrip('(,)').split(',')
        im_name = z.lstrip() + '_' + x + '_' + y.lstrip() + '.tif'
        im_path = os.path.join(TILES_DIR, im_name)
        fp_list.append(im_path)

tiles_info_reproj['filepath']=fp_list

fct_misc.test_crs(roads_reproj.crs, tiles_info_reproj.crs)

if roads_reproj[roads_reproj.is_valid==False].shape[0]!=0:
        print(f"There are {roads_reproj[roads_reproj.is_valid==False].shape[0]} invalid geometries for the roads after the reprojection.")

print("Correction of the roads presenting an invalid geometry with a buffer of 0 m...")
corrected_roads=roads_reproj.copy()
corrected_roads.loc[corrected_roads.is_valid==False,'geometry']=corrected_roads[corrected_roads.is_valid==False]['geometry'].buffer(0)


There are 84 invalid geometries for the roads after the reprojection.
Correction of the roads presenting an invalid geometry with a buffer of 0 m...


In [34]:
fct_misc.test_crs(corrected_roads.crs, tiles_info_reproj.crs)
intersected_tiles=gpd.sjoin(tiles_info_reproj, corrected_roads[['OBJECTID', 'geometry']])
intersected_tiles.drop_duplicates(subset=['id','OBJECTID'], inplace=True)

try:
    assert not corrected_roads['OBJECTID'].duplicated().any()
except:
    print('Some roads are separated on mulitple lines. They must be transformed to multipolygons or fused first.')
    sys.exit(1)

pixels_per_band=pd.DataFrame()

In [35]:
BANDS=range(1,5)

In [40]:
%%timeit

pixels_per_band=pd.DataFrame()
objectid=corrected_roads.OBJECTID[0]

# Get the corresponding tile(s)
intersected_tiles_with_road=intersected_tiles[intersected_tiles['OBJECTID'] == objectid].copy()
intersected_tiles_with_road.reset_index(drop=True, inplace=True)

# Get the pixels for each tile
for tile_filepath in intersected_tiles_with_road['filepath'].values:
    pixel_values=pd.DataFrame()
    
    # extract the geometry in GeoJSON format
    geoms = [mapping(corrected_roads.geometry[0])]

    # extract the raster values values within the polygon 
    with rasterio.open(tile_filepath) as src:
        out_image, _ = mask(src, geoms, crop=True)

        # no data values of the original raster
        no_data=src.nodata

    dico={}
    length_bands=[]
    for band in BANDS:

        # extract the values of the masked array
        data = out_image[band-1]

        # extract the the valid values
        val = np.extract(data != no_data, data)

        dico[f'band{band}']=val
        length_bands.append(len(val))

    dico.update({'road_id': objectid})
    pixels_from_tile = pd.DataFrame(dico)

    pixel_values = pd.concat([pixel_values, pixels_from_tile],ignore_index=True)

pixels_per_band=pd.concat([pixels_per_band, pixel_values], ignore_index=True)

5.51 ms ± 193 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [30]:
out_image[0]


array([[ 0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0, 87, 86,  0,  0],
       [ 0,  0,  0,  0, 89, 88, 88,  0],
       [ 0,  0,  0, 86, 87, 88, 89,  0],
       [ 0,  0,  0, 79, 84, 87, 88,  0],
       [ 0,  0, 78, 79, 79, 81,  0,  0],
       [ 0, 78, 74, 77, 80, 79,  0,  0],
       [ 0, 82, 78, 77, 75,  0,  0,  0],
       [ 0,  0,  0, 79, 80,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0]], dtype=uint8)

In [32]:
256%256

0