In [18]:
from transformers import SegformerFeatureExtractor, SegformerForSemanticSegmentation
from PIL import Image
import requests
import torch
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd

In [19]:
raw_folder = 'raw' #raw folder
seg_folder = 'predicted_segmentations'

In [20]:
# # Create an empty DataFrame with columns for panoid and different object categories
# treecoveragestats = pd.DataFrame(columns=['panoid', 'tree', 'terrain', 'car'])

# # Export the empty DataFrame to CSV
# treecoveragestats.to_csv('treecoveragestats.csv', index=False)

# print("Created empty DataFrame 'treecoveragestats' and exported to CSV")

# treecoveragestats

In [21]:

# Get downloaded images from the folder instead of CSV
all_set = set()
if os.path.exists(raw_folder):
    for filename in os.listdir(raw_folder):
        if filename.endswith('.jpeg'):
            # Extract the panoid from the filename (remove .jpeg extension)
            panoid = filename.replace('.jpeg', '')
            all_set.add(panoid)

len(all_set)

122414

In [22]:
# Get all folder names in the segmentation folder as a set
seg_folders_set = set()
if os.path.exists(seg_folder):
    # List all items in the segmentation folder
    for item in os.listdir(seg_folder):
        # Check if the item is a directory
        if os.path.isdir(os.path.join(seg_folder, item)):
            seg_folders_set.add(item)
    
    print(f"Found {len(seg_folders_set)} folders in {seg_folder}")
else:
    print(f"Segmentation folder '{seg_folder}' does not exist")

# Display the set of folders
seg_folders_set

Found 2 folders in predicted_segmentations


{'8PVBCj7Sw4vQlcPdx_eSFg', '_miVWUcyN3K_B-vj6W28sA'}

In [23]:
feature_extractor = SegformerFeatureExtractor.from_pretrained("nvidia/segformer-b5-finetuned-cityscapes-1024-1024")
model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b5-finetuned-cityscapes-1024-1024")

  return func(*args, **kwargs)


In [24]:
def batch_inference(images, image_numbers):
    # Process batch of images
    inputs = feature_extractor(images=images, return_tensors="pt", padding=True)
    outputs = model(**inputs)
    logits = outputs.logits  # shape (batch_size, num_labels, height/4, width/4)

    pred_converted_masks = []
    pred_segs = []  # Store all original segmentation masks
    for i, image in enumerate(images):
        # Predicted semantic segmentation
        upsampled_logits = torch.nn.functional.interpolate(
            logits[i:i+1],
            size=image.size[::-1], # (height, width)
            mode='bilinear',
            align_corners=False
        )
        pred_seg = upsampled_logits.argmax(dim=1)[0]

        # Convert 9->2 and 8->1, all else becomes 0, car 13
        pred_converted_mask = np.zeros_like(pred_seg.cpu().numpy())
        pred_converted_mask[pred_seg.cpu().numpy() == 9] = 1  # Terrain
        pred_converted_mask[pred_seg.cpu().numpy() == 8] = 2  # Tree
        pred_converted_mask[pred_seg.cpu().numpy() == 13] = 3  # Car

        pred_converted_masks.append(pred_converted_mask)
        pred_segs.append(pred_seg.cpu().numpy())  # Store the original segmentation mask

    return pred_converted_masks, pred_segs

def process_image(image_path):
    # Process a single image
    image = Image.open(image_path).resize((1080, 1080))
    image = image.convert('RGB')
    image_number = os.path.basename(image_path)[:-5]  # Extract panoid from filename
    return image, image_number

# Create treecoveragestats DataFrame if it doesn't exist
if not os.path.exists('treecoveragestats.csv'):
    treecoveragestats = pd.DataFrame(columns=['panoid', 'tree', 'terrain', 'car'])
    treecoveragestats.to_csv('treecoveragestats.csv', index=False)
else:
    treecoveragestats = pd.read_csv('treecoveragestats.csv')

# Get the list of panoids that need processing (in all_set but not in seg_folders_set)
panoids_to_process = list(all_set - seg_folders_set)
print(f"Found {len(panoids_to_process)} images to process")

# Process images in batches of 2
batch_size = 2
for i in range(0, len(panoids_to_process), batch_size):
    batch_panoids = panoids_to_process[i:i+batch_size]
    batch_images = []
    
    print(f"Processing batch {i//batch_size + 1}/{(len(panoids_to_process) + batch_size - 1)//batch_size}")
    
    # Load images for this batch
    for panoid in batch_panoids:
        image_path = os.path.join(raw_folder, f"{panoid}.jpeg")
        if os.path.exists(image_path):
            image, _ = process_image(image_path)
            batch_images.append(image)
        else:
            print(f"Warning: Image file for panoid {panoid} not found")
            batch_panoids.remove(panoid)
    
    if not batch_images:
        continue
    
    # Run inference on the batch
    pred_masks, pred_segs = batch_inference(batch_images, batch_panoids)
    
    # Process each image in the batch
    for j, panoid in enumerate(batch_panoids):
        if j >= len(pred_masks):
            continue
            
        mask = pred_masks[j]
        total_pixels = mask.size
        
        # Calculate percentages
        tree_percent = np.sum(mask == 2) / total_pixels * 100
        terrain_percent = np.sum(mask == 1) / total_pixels * 100
        car_percent = np.sum(mask == 3) / total_pixels * 100
        
        # Append to DataFrame
        new_row = pd.DataFrame({
            'panoid': [panoid],
            'tree': [tree_percent],
            'terrain': [terrain_percent],
            'car': [car_percent]
        })
        
        # Append to CSV directly
        new_row.to_csv('treecoveragestats.csv', mode='a', header=False, index=False)
        
        # Create directory for this panoid in the segmentation folder
        panoid_dir = os.path.join(seg_folder, panoid)
        if not os.path.exists(panoid_dir):
            os.makedirs(panoid_dir)
        
        # Save individual class masks
        class_indices = {'terrain': 1, 'tree': 2, 'car': 3}
        for class_name, class_idx in class_indices.items():
            plt.figure(figsize=(10, 10))
            class_mask = np.zeros_like(mask)
            class_mask[mask == class_idx] = 1
            plt.imshow(class_mask)
            plt.axis('off')
            plt.title('')
            plt.savefig(f'{panoid_dir}/{class_name}.png', bbox_inches='tight', pad_inches=0)
            plt.close()
        
        print(f"Processed panoid {panoid}: Tree {tree_percent:.2f}%, Terrain {terrain_percent:.2f}%, Car {car_percent:.2f}%")

print("Processing complete. All images have been processed and saved.")


Found 122412 images to process
Processing batch 1/61206


  return func(*args, **kwargs)


Processed panoid wSsieCSIlk5JikU6XI47BQ: Tree 7.26%, Terrain 1.56%, Car 0.10%
Processed panoid 4hE-_bg5_9xblMLkKBaDiw: Tree 33.77%, Terrain 6.80%, Car 0.52%
Processing batch 2/61206


KeyboardInterrupt: 