In [26]:
import os
from os.path import exists

import pandas as pd
import geopandas as gpd
from shapely import geometry
import numpy as np
import urllib

import cv2
import rasterio
from rasterio import features as fet

from tenacity import retry, stop_after_attempt
from tqdm import tqdm 
import matplotlib.pyplot as plt
from multiprocessing import Pool
import mercantile

from shapely.geometry import Polygon
from rasterio.transform import Affine
import time
from glob import glob  

from shapely.geometry import shape

output_path = "../data/test_data"

In [21]:
def create_data_for_county(tile_path,tile_id, crs='4326',out_path='output_data',zoom=16,img_height=1024,img_width=1024,n_buildings=1):
    gdf = gpd.read_file(tile_path)
    outer_bounds = gdf.total_bounds
    spartial_index = gdf.sindex
    tiles = mercantile.tiles(outer_bounds[0], outer_bounds[1], outer_bounds[2], outer_bounds[3], zooms=16)
    for i, tile in tqdm(enumerate(tiles)):
        val = mercantile.bounds(tile)
        
        img = extractSatelliteImages(val.west, val.south, val.east, val.north,
                                            height=str(img_height),
                                            width=str(img_width),
                                            )
        polygon = Polygon(
                [(val.west, val.south), (val.west, val.north), (val.east, val.north), (val.east, val.south),
                 (val.west, val.south)])
        
        possible_matches_index = list(spartial_index.intersection(polygon.bounds))
        possible_matches = gdf.iloc[possible_matches_index]
        
        if possible_matches.empty:
            continue
        elif len(possible_matches)>n_buildings:
            convertToTiff(img, val.south,val.west,val.north,val.east,
                                height=img_height,
                                width=img_width,
                                path=os.path.join(out_path,'images'),
                                imagename='{}_{}_{}'.format(tile_id,zoom,i)
                                )


def convertToTiff(img, minlat, minlong, maxlat, maxlong, height=512, width=512, imagename='sample_image',
                    path='output', crs="epsg:4326",chanenls=3,prefix=1):
    if not os.path.isdir(path):
        os.makedirs(path)

    filename = imagename + '.tiff'
    if os.path.isfile(filename):
        return 
    transform = rasterio.transform.from_bounds(minlong, minlat, maxlong, maxlat, width, height)
    with rasterio.open(os.path.join(path, filename), 'w', driver='GTiff', dtype=rasterio.uint8, count=3,
                        width=width, height=height, transform=transform, crs=crs) as dst:
        for index in range(chanenls):
            dst.write(img[:, :, index], indexes=index + 1)

@retry(stop=stop_after_attempt(3))
def extractSatelliteImages(minX, minY, maxX, maxY, height='512', width='512'):
    url = f"http://wms3.mapsavvy.com/WMSService.svc/db45ac1c32ac4e9caa5ecc3473998c81/WMSLatLon?SERVICE=WMS&VERSION=1.1.1&REQUEST=GetMap&LAYERS=Aerial&SRS=EPSG:4326&CRS=EPSG:4326&BBOX={minX},{minY},{maxX},{maxY}&WIDTH={width}&HEIGHT={height}&STYLES=&TRANSPARENT=false&FORMAT=image/png"
    req = urllib.request.Request(url)
    resp = urllib.request.urlopen(req)
    arr = np.asarray(bytearray(resp.read()), dtype=np.uint8)
    img = cv2.imdecode(arr, cv2.IMREAD_UNCHANGED)
    return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

In [32]:
# this is the name of the geography you want to retrieve. update to meet your needs
location = 'France'

dataset_links = pd.read_csv("https://minedbuildings.blob.core.windows.net/global-buildings/dataset-links.csv")
location_links = dataset_links[dataset_links.Location == location]
print("Found {} links for {}".format(len(location_links),location))

Found 236 links for France


In [33]:
import fiona 
for _, row in location_links.iterrows():
    df = pd.read_json(row.Url, lines=True)
    df['geometry'] = df['geometry'].apply(shape)
    gdf = gpd.GeoDataFrame(df, crs=4326,geometry='geometry')
    # gdf.to_file(f"{row.QuadKey}.geojson", driver="GeoJSON")
    with fiona.Env(OSR_WKT_FORMAT="WKT2_2018"):
        gdf.to_file(os.path.join(output_path,f"{row.QuadKey}.geojson"),driver="GeoJSON")
    

KeyboardInterrupt: 

In [27]:
geodata_files = glob(output_path+'*/*.geojson')
print("Found {} geojson files".format(len(geodata_files)))

for geojson in geodata_files:
    create_data_for_county(tile_path =geojson,tile_id="120201013",out_path=output_path)

Found 1 geojson files
