# Microsoft Global Buildings Downloader for QGIS - FULL TILES VERSION (larger Areas)

This notebook downloads **COMPLETE TILES** from Microsoft Global Buildings dataset based on your QGIS canvas extent.

**This version downloads entire QuadKey tiles without filtering**, so you get all buildings in the tiles that intersect your area.

Developed by Sahan WanniArachchi | Modified to download full tiles

In [None]:
import os
import tempfile
import pandas as pd
import geopandas as gpd
from shapely import geometry
import mercantile
from qgis.core import QgsVectorLayer, QgsProject, QgsCoordinateReferenceSystem, QgsCoordinateTransform, QgsPointXY

## Step 1: Get Canvas Extent and Transform to EPSG:4326

In [None]:
canvas = iface.mapCanvas()
extent = canvas.extent()
crs = canvas.mapSettings().destinationCrs().authid()

minx = extent.xMinimum()
miny = extent.yMinimum()
maxx = extent.xMaximum()
maxy = extent.yMaximum()

print(f"Canvas CRS: {crs}")
print(f"Original extent: ({minx:.6f}, {miny:.6f}, {maxx:.6f}, {maxy:.6f})")

# Transform to EPSG:4326 if needed
if crs != "EPSG:4326":
    source_crs = QgsCoordinateReferenceSystem(crs)
    dest_crs = QgsCoordinateReferenceSystem("EPSG:4326")
    transform = QgsCoordinateTransform(source_crs, dest_crs, QgsProject.instance())
    
    min_point = transform.transform(QgsPointXY(minx, miny))
    max_point = transform.transform(QgsPointXY(maxx, maxy))
    
    minx, miny = min_point.x(), min_point.y()
    maxx, maxy = max_point.x(), max_point.y()
    
    print(f"Transformed to EPSG:4326: ({minx:.6f}, {miny:.6f}, {maxx:.6f}, {maxy:.6f})")

# Create AOI geometry (for reference only, not used for filtering)
aoi_shape = geometry.box(minx, miny, maxx, maxy)
print(f"\nAOI created: {aoi_shape.bounds}")
print(f"Note: Will download COMPLETE tiles, not just AOI")

## Step 2: Set Output Path

In [None]:
output_dir = r"C:\Users\Sahan\pygis\GeoAI\MG building downloader"
os.makedirs(output_dir, exist_ok=True)

output_fn = os.path.join(output_dir, "buildings_full_tiles.geojson")
print(f"Output file: {output_fn}")

## Step 3: Generate QuadKeys for the Area

In [None]:
quad_keys = set()
for tile in mercantile.tiles(minx, miny, maxx, maxy, zooms=9):
    quad_keys.add(mercantile.quadkey(tile))
    
quad_keys = list(quad_keys)
print(f"Generated {len(quad_keys)} QuadKeys")
print(f"QuadKeys: {quad_keys}")

## Step 4: Load Microsoft Global Buildings Dataset Links

In [None]:
print("Loading Microsoft Global Buildings dataset...")
df = pd.read_csv(
    "https://minedbuildings.z5.web.core.windows.net/global-buildings/dataset-links.csv",
    dtype=str
)
print(f"Dataset loaded: {len(df)} tiles available")
df.head()

## Step 5: Download Complete Tiles (NO FILTERING)

**Downloads entire QuadKey tiles and handles multiple rows per QuadKey**

In [None]:
idx = 0
combined_gdf = gpd.GeoDataFrame()

print(f"\nProcessing {len(quad_keys)} tiles...\n")

with tempfile.TemporaryDirectory() as tmpdir:
    tmp_fns = []
    
    for i, quad_key in enumerate(quad_keys):
        print(f"[{i+1}/{len(quad_keys)}] Processing QuadKey: {quad_key}")
        
        rows = df[df["QuadKey"] == quad_key]
        
        if rows.shape[0] >= 1:
            if rows.shape[0] > 1:
                print(f"  Found {rows.shape[0]} tiles with this QuadKey")
            
            # Process ALL rows for this QuadKey
            for idx_row, row in rows.iterrows():
                url = row["Url"]
                location = row["Location"]
                print(f"  Downloading: {location}...")
                
                try:
                    # Download and convert to GeoDataFrame
                    df2 = pd.read_json(url, lines=True)
                    df2["geometry"] = df2["geometry"].apply(geometry.shape)
                    
                    gdf = gpd.GeoDataFrame(df2, crs=4326)
                    print(f"    Downloaded {len(gdf)} buildings (COMPLETE TILE)")
                    
                    # NO FILTERING - save the entire tile
                    if len(gdf) > 0:
                        fn = os.path.join(tmpdir, f"{quad_key}_{location.replace(' ', '_').replace('/', '_')}.geojson")
                        tmp_fns.append(fn)
                        gdf.to_file(fn, driver="GeoJSON")
                    
                except Exception as e:
                    print(f"    ERROR: {e}")
        else:
            print(f"  NOT FOUND in dataset")
    
    # Merge all files
    print(f"\nMerging all downloaded tiles...")
    for fn in tmp_fns:
        gdf = gpd.read_file(fn)
        
        if len(gdf) > 0:
            gdf["id"] = range(idx, idx + len(gdf))
            idx += len(gdf)
            combined_gdf = pd.concat([combined_gdf, gdf], ignore_index=True)

print(f"\n{'='*60}")
print(f"TOTAL BUILDINGS DOWNLOADED: {len(combined_gdf)}")
print(f"{'='*60}")

## Step 6: Save to File

In [None]:
import time

if len(combined_gdf) > 0:
    # Remove old layer from QGIS first to unlock the file
    layers_to_remove = []
    for layer in QgsProject.instance().mapLayers().values():
        if layer.source() == output_fn:
            layers_to_remove.append(layer.id())
    
    if layers_to_remove:
        QgsProject.instance().removeMapLayers(layers_to_remove)
        print("Removed old layer to unlock file...")
        time.sleep(0.5)
    
    # Save complete dataset
    combined_gdf = combined_gdf.to_crs("EPSG:4326")
    combined_gdf.to_file(output_fn, driver="GeoJSON")
    print(f"✓ Saved {len(combined_gdf)} buildings to:")
    print(f"  {output_fn}")
else:
    print("⚠ No buildings found!")

## Step 7: Add Layer to QGIS

In [None]:
if len(combined_gdf) > 0 and os.path.exists(output_fn):
    layer = QgsVectorLayer(output_fn, "Buildings (Full Tiles)", "ogr")
    
    if layer.isValid():
        QgsProject.instance().addMapLayer(layer)
        print(f"✓ Layer added to QGIS successfully!")
        print(f"  Features: {layer.featureCount()}")
    else:
        print("❌ Layer is not valid")
else:
    print("❌ No file to load")