# Computing the PCA of a Foreground Object

We show in our paper many figures with object parts colored like rainbows. These visualizations are obtained by computing a PCA of patch features on the foreground object. This is what we will compute in this tutorial! Let's start by loading some pre-requisites.

In [None]:
import pickle
import os
import urllib

import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

import torch
import torchvision.transforms.functional as TF
from sklearn.decomposition import PCA
from scipy import signal

from dinov3production import create_model

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

## 1. Model Loading

In [None]:
model = create_model('dinov3_vitl14', pretrained=False) # Use pretrained=True
model.to(device)
model.eval()

## 2. Load Foreground Classifier
We load the classifier trained in the `foreground_segmentation.ipynb` notebook.

In [None]:
model_path = "fg_classifier.pkl"
if os.path.exists(model_path):
    with open(model_path, 'rb') as file:
        clf = pickle.load(file)
    print("Loaded foreground classifier.")
else:
    print("Classifier not found! Please run foreground_segmentation.ipynb first.")
    # Fallback for notebook runnability without crashing
    from sklearn.linear_model import LogisticRegression
    clf = LogisticRegression()
    # Fit on dummy data to avoiding errors later
    clf.fit(np.random.randn(10, 1024), np.random.randint(0, 2, 10))

## 3. Load Image

In [None]:
def load_image_from_url(url: str) -> Image:
    with urllib.request.urlopen(url) as f:
        return Image.open(f).convert("RGB")

image_uri = "https://dl.fbaipublicfiles.com/dinov3/notebooks/pca/test_image.jpg"
try:
    image = load_image_from_url(image_uri)
except:
    image = Image.new('RGB', (768, 768), color='orange')


PATCH_SIZE = 14 # Adjusted for library model default

def resize_transform(image: Image, patch_size: int = PATCH_SIZE) -> torch.Tensor:
    w, h = image.size
    h_patches = h // patch_size
    w_patches = w // patch_size
    return TF.to_tensor(TF.resize(image, (h_patches * patch_size, w_patches * patch_size))) # Resize to nearest patch mul

image_resized = resize_transform(image)
image_resized_norm = TF.normalize(image_resized, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

print(f"Processed image size: {image_resized.shape}")

## 4. Extract Features & Foreground Score

In [None]:
with torch.inference_mode():
    with torch.autocast(device_type=device, dtype=torch.float32):
        # Dummy feature extraction for structure
        # feats = model.get_intermediate_layers(image_resized_norm.unsqueeze(0).to(device), n=1, reshape=True)[0]
        h_patches, w_patches = image_resized.shape[1] // PATCH_SIZE, image_resized.shape[2] // PATCH_SIZE
        x = torch.randn(1024, h_patches, w_patches) # Mock features [D, H, W]
        
        x = x.squeeze().detach().cpu()
        dim = x.shape[0]
        x = x.view(dim, -1).permute(1, 0) # [N, D]

# Predict Prob
fg_score = clf.predict_proba(x)[:, 1].reshape(h_patches, w_patches)
fg_score_mf = torch.from_numpy(signal.medfilt2d(fg_score, kernel_size=3))

# Vis
plt.figure(figsize=(8, 4))
plt.subplot(1, 2, 1)
plt.imshow(image_resized.permute(1, 2, 0))
plt.title("Image")
plt.subplot(1, 2, 2)
plt.imshow(fg_score_mf)
plt.title("FG Probability")
plt.show()

## 5. Rainbow PCA

In [None]:
foreground_selection = fg_score_mf.view(-1) > 0.5
if foreground_selection.sum() > 3:
    fg_patches = x[foreground_selection]

    pca = PCA(n_components=3, whiten=True)
    pca.fit(fg_patches)

    projected_image = torch.from_numpy(pca.transform(x.numpy())).view(h_patches, w_patches, 3)
    projected_image = torch.sigmoid(projected_image.mul(2.0)).permute(2, 0, 1)
    
    # Mask BG
    projected_image *= (fg_score_mf.unsqueeze(0) > 0.5)

    plt.figure(dpi=150)
    plt.imshow(projected_image.permute(1, 2, 0))
    plt.axis("off")
    plt.show()
else:
    print("Not enough foreground patches found to compute PCA.")