In [None]:
import os, sys
import logging, logging.config
import yaml
import glob
from joblib import Parallel, delayed
from tqdm import tqdm

import pandas as pd
import geopandas as gpd
import rasterio
from rasterio.features import shapes
from shapely.geometry import shape

import numpy as np

import misc_fct

from helpers import XYZ


In [None]:
TILES_DIR='/mnt/data-01/gsalamin/proj-roadsurf-b/02_Data/processed/obj_detector/all-images/*.tif'
ROADS='/mnt/data-01/gsalamin/proj-roadsurf-b/02_Data/processed/shapefiles_gpkg/roads_polygons.shp'
TILES_INFO='/mnt/data-01/gsalamin/proj-roadsurf-b/02_Data/processed/json/tiles_aoi.geojson'

roads=gpd.read_file(ROADS)
tiles_info=gpd.read_file(TILES_INFO)

In [None]:
files=glob.glob(TILES_DIR)
print(files[:2])

# Making polygons on the zones to check

In [None]:
geom=[]
bands=[]
pixel_values=[]

for file in tqdm(files, desc='Checking files'):
    for band in range(1,5):
        with rasterio.open(file) as f:
            image = f.read(band)

            lim_sup=200
            lim_inf=1

            # create a binary image, 0 where there's nodata, 1 where it's valid
            is_valid = ((image < lim_inf) | (image > lim_sup)).astype(np.uint8)

            
            # vectorize the binary image, supplying the transform so it returns maps coords
            for coords, value in shapes(is_valid, transform=f.transform):

                # ignore polygons corresponding to nodata
                if value != 0:
                    # convert geojson to shapely geometry
                    geom.append(shape(coords))
                    bands.append(band)
                    pixel_values.append(value)

fid=[x for x in range(1, len(geom)+1)]
zones_dict={'fid':fid, 'band':bands, 'pixel_value':pixel_values, 'geometry': geom}


In [None]:
extrem_zones=gpd.GeoDataFrame(zones_dict, crs='EPSG:3857')

In [None]:
roads_reproject=roads.to_crs(epsg=3857)

misc_fct.test_crs(roads_reproject.crs, extrem_zones.crs)

extrem_zones_on_roads=gpd.overlay(extrem_zones,roads_reproject[['OBJECTID', 'geometry']])

In [None]:
extrem_zones_on_roads.shape

In [None]:
extrem_zones_on_roads.drop_duplicates(subset=['fid'], inplace=True, ignore_index=True)

extrem_zones_on_roads.to_file('/mnt/data-01/gsalamin/proj-roadsurf-b/02_Data/processed/shapefiles_gpkg/test_extrem_pixels.shp')

In [None]:
extrem_zones_on_roads.shape

# Downloading tiles for the zones to check

In [None]:
with open('config.yaml') as fp:
    cfg = yaml.load(fp, Loader=yaml.FullLoader)['generate_tilesets.py']

OUTPUT_DIR = '/mnt/data-01/gsalamin/proj-roadsurf-b/02_Data/processed/images'

ORTHO_WS_TYPE = cfg['datasets']['orthophotos_web_service']['type']
ORTHO_WS_URL = cfg['datasets']['orthophotos_web_service']['url']
ORTHO_WS_SRS = cfg['datasets']['orthophotos_web_service']['srs']
if 'layers' in cfg['datasets']['orthophotos_web_service'].keys():
    ORTHO_WS_LAYERS = cfg['datasets']['orthophotos_web_service']['layers']
if 'parameters' in cfg['datasets']['orthophotos_web_service'].keys():
    ORTHO_WS_PARAMETERS=cfg['datasets']['orthophotos_web_service']['parameters']
else:
    ORTHO_WS_PARAMETERS={}

SAVE_METADATA = True
OVERWRITE = cfg['overwrite']
TILE_SIZE = cfg['tile_size']

ALL_IMG_PATH = os.path.join(OUTPUT_DIR, f"test")
if not os.path.exists(ALL_IMG_PATH):
        os.makedirs(ALL_IMG_PATH)

In [None]:
tiles_info_reproj=tiles_info.to_crs(crs=3857)

misc_fct.test_crs(tiles_info_reproj.crs, extrem_zones_on_roads.crs)

tiles_info_on_zones=gpd.overlay(tiles_info_reproj, extrem_zones_on_roads[['fid','geometry']])

In [None]:
tiles_info_on_zones.drop_duplicates(subset=['id'], inplace=True, ignore_index=True)

In [None]:
job_dict = XYZ.get_job_dict(
    tiles_gdf=tiles_info_on_zones.to_crs(ORTHO_WS_SRS), # <- note the reprojection
    XYZ_url=ORTHO_WS_URL, 
    img_path=ALL_IMG_PATH, 
    save_metadata=SAVE_METADATA,
    overwrite=OVERWRITE
)

image_getter = XYZ.get_geotiff

In [None]:
import warnings

with warnings.catch_warnings(record=True):
    N_JOBS=10
    job_outcome = Parallel(n_jobs=N_JOBS, backend="loky")(
                delayed(image_getter)(**v) for k, v in tqdm( sorted(list(job_dict.items())) )
        )

    all_tiles_were_downloaded = True
    for job in job_dict.keys():
        if not os.path.isfile(job) or not os.path.isfile(job.replace('.tif', '.json')):
            all_tiles_were_downloaded = False
            print('Failed task: ', job)

    if all_tiles_were_downloaded:
        print("...done.")
    else:
        print("Some tiles were not downloaded. Please try to run this script again.")
        sys.exit(1)