# Idea: Geo-Enabled Depth Estimation

Overhead imagery can be used to understand the scale of the scene. If the geospatial context of an image is known (i.e., it is geocalibrated) we can infer an intermediate estimate of scale from the co-located overhead image and use it to augment depth estimation.

Copyright © Scott Workman. 2025.

In [None]:
import _init_paths

In [None]:
import torch

from nets.ops import generate_cutout
from nets.geo import depth2voxel, voxel2pano

import imageio
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

In [None]:
zoom = 16
gsd = 0.7432

data_dir = "../holicity-overhead/"

df_images = pd.read_csv("{}images.txt".format(data_dir), names=["ground", "lat", "lon"])
df_overhead = pd.read_csv("{}overhead/images_{}.txt".format(data_dir, zoom),
                          names=[
                              "overhead", "lat", "lon", "min_lon",
                              "min_lat", "max_lon", "max_lat"
                          ])
df = pd.concat((df_images, df_overhead), axis=1)
df = df.loc[:, ~df.columns.duplicated()]
print(df.iloc[0])

### Given a height map, generate a synthetic depth panorama

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

pano_size = [512, 1024]
interesting_inds = [2000, 17000]

for idx in interesting_inds:  
  im_overhead = imageio.v2.imread("{}/overhead/{}".format(data_dir, df["overhead"].iloc[idx]))
  im_height = np.load("{}/height/{}.npy".format(data_dir, df["overhead"].iloc[idx][:-4]))

  orientations = torch.zeros(1).to(device)
  t_height = torch.from_numpy(im_height).unsqueeze(0).unsqueeze(0)
  voxel = depth2voxel(t_height.to(device), torch.tensor(gsd).to(device))
  pano = voxel2pano(voxel, orientations, pano_size).detach().cpu().float().numpy().squeeze()
  
  # handle no data regions
  im_height[im_height == -9999] = np.nan
  pano[pano > (256 * gsd * .95)] = np.nan
  
  plt.figure(figsize=(15,15))
  plt.subplot(131)
  plt.imshow(im_overhead)
  plt.axis('off')
  plt.subplot(132)
  plt.imshow(im_height, vmin=np.nanquantile(im_height, [.1]), vmax=np.nanquantile(im_height, [.9]))
  plt.axis('off')
  plt.subplot(133)
  plt.imshow(pano, vmax=50)
  plt.axis('off')
  
  plt.show()

### Given a geocalibrated image, extract the corresponding depth cutout

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

pano_size = [512, 1024]
interesting_inds = [2000, 17000]

for idx in interesting_inds: 
  im =  imageio.v2.imread("{}image/{}_imag.jpg".format(data_dir, df["ground"].iloc[idx]))
  depth = np.load("{}depth/{}_dpth.npz".format(data_dir, df["ground"].iloc[idx]))["depth"].squeeze()
  geo = np.load("{}geo/{}_camr.npz".format(data_dir, df["ground"].iloc[idx]))
  im_overhead = imageio.v2.imread("{}/overhead/{}".format(data_dir, df["overhead"].iloc[idx]))
  im_height = np.load("{}/height/{}.npy".format(data_dir, df["overhead"].iloc[idx][:-4]))

  yaw = torch.tensor(geo["yaw"])
  pitch = torch.tensor(geo["pitch"])

  orientations = torch.zeros(1).to(device)
  t_height = torch.from_numpy(im_height).unsqueeze(0).unsqueeze(0)
  voxel = depth2voxel(t_height.to(device), torch.tensor(gsd).to(device))
  pano = voxel2pano(voxel, orientations, pano_size).squeeze(0)
  cutout = generate_cutout(pano, yaw=yaw, pitch=pitch).detach().cpu().numpy().squeeze()

  pano = pano.detach().cpu().numpy().squeeze()
  
  # handle no data regions
  depth[depth == 0] = np.nan
  pano[pano > (256 * gsd * .95)] = np.nan
  cutout[cutout > (256 * gsd * .95)] = np.nan
  
  plt.figure(figsize=(15,15))
  plt.subplot(141)
  plt.imshow(pano, vmax=50)  
  plt.axis("off")
  plt.subplot(142)
  plt.imshow(im)
  plt.axis("off")
  plt.subplot(143)
  plt.imshow(depth, vmin=0, vmax=50)
  plt.axis("off")
  plt.subplot(144)
  plt.imshow(cutout, vmin=0, vmax=50)
  plt.axis("off")
  plt.show()
  
  plt.show()