In [7]:
%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
from datasets import load_dataset
import numpy as np
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
import time
import os
# Start timing for overall progress tracking
start_time = time.time()

print("NYU Depth V2 Dataset Loading and Visualization")
print("----------------------------------------------")

# Progress bar for dataset loading
print("Step 1/4: Loading dataset... (this may take several minutes)")
loading_bar = tqdm(total=100, desc="Loading dataset", position=0)
loading_bar.update(10)  # Show initial progress

# Load dataset - we'll break this into steps to show progress
try:
    # Note that we're loading the dataset without specifying trust_remote_code
    # as this seems to be causing issues
    ds = load_dataset("sayakpaul/nyu_depth_v2")
    loading_bar.update(90)  # Complete the loading progress
    loading_bar.close()
    print(f"✓ Dataset loaded successfully with {len(ds['train'])} training examples")
except Exception as e:
    loading_bar.close()
    print(f"✗ Error loading dataset: {e}")
    raise e

# Setup for visualization
print("Step 2/4: Setting up visualization functions")
setup_bar = tqdm(total=100, desc="Setting up", position=0)

# Define coloring function
cmap = plt.cm.viridis
def colored_depthmap(depth, d_min=None, d_max=None):
    if d_min is None:
        d_min = np.min(depth)
    if d_max is None:
        d_max = np.max(depth)
    depth_relative = (depth - d_min) / (d_max - d_min)
    return 255 * cmap(depth_relative)[:,:,:3] # H, W, C

def merge_into_row(input, depth_target):
    input = np.array(input)
    depth_target = np.squeeze(np.array(depth_target))
    d_min = np.min(depth_target)
    d_max = np.max(depth_target)
    depth_target_col = colored_depthmap(depth_target, d_min, d_max)
    img_merge = np.hstack([input, depth_target_col])
    return img_merge

setup_bar.update(100)
setup_bar.close()

# Sample selection
print("Step 3/4: Selecting random samples")
selection_bar = tqdm(total=100, desc="Selecting samples", position=0) 
random_indices = np.random.choice(len(ds["train"]), 9).tolist()
train_set = ds["train"]
selection_bar.update(100)
selection_bar.close()

# Visualization with progress bar
print("Step 4/4: Generating visualizations")
plt.figure(figsize=(15, 6))
visualization_bar = tqdm(total=9, desc="Processing images", position=0)

for i, idx in enumerate(random_indices):
    ax = plt.subplot(3, 3, i + 1)
    image_viz = merge_into_row(
        train_set[idx]["image"], train_set[idx]["depth_map"]
    )
    plt.imshow(image_viz.astype("uint8"))
    plt.axis("off")
    visualization_bar.update(1)  # Update progress for each image

visualization_bar.close()

# Save and show results
plt.tight_layout()
plt.savefig("nyu_depth_samples.png")
total_time = time.time() - start_time
print(f"\nCompleted in {total_time:.2f} seconds")
print(f"Visualization saved to: {os.path.abspath('nyu_depth_samples.png')}")
plt.show()

  from .autonotebook import tqdm as notebook_tqdm


NYU Depth V2 Dataset Loading and Visualization
----------------------------------------------
Step 1/4: Loading dataset... (this may take several minutes)


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
