In [None]:
import torch
from PIL import Image
from transformers import AutoModel, CLIPImageProcessor

hf_repo = "nvidia/C-RADIOv2-H"

image_processor = CLIPImageProcessor.from_pretrained(hf_repo)
model = AutoModel.from_pretrained(hf_repo, trust_remote_code=True)
model.eval().cuda()




In [None]:
url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
import requests
image = Image.open(requests.get(url, stream=True).raw)
pixel_values = image_processor(images=image, return_tensors='pt', do_resize=True).pixel_values
pixel_values = pixel_values.cuda()
print(pixel_values.shape)
summary, spatial_features = model(pixel_values)
print(spatial_features.shape)

In [None]:
# create 2D grid of the features.
from einops import rearrange
patch_size= 16

spatial_features = rearrange(spatial_features, 'b (h w) d -> b d h w', h=pixel_values.shape[-2] // patch_size, w=pixel_values.shape[-1] // patch_size)

print(spatial_features.shape)

In [None]:
# visualize the features?

# do PCA on the features

import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

pca = PCA(n_components=3)       

# create features as (H*W,D) from (B,D,H,W)
features = spatial_features[0].detach().cpu().numpy()
features = features.transpose(1,2,0)
features = features.reshape(-1, features.shape[-1])
pca.fit(features)

# plot as image, side by side to original image
result = pca.transform(features)
result = result.reshape(spatial_features.shape[2], spatial_features.shape[3], 3)

# create a side by side image
fig, axs = plt.subplots(1, 2, figsize=(10, 5))
axs[0].imshow(image)
axs[1].imshow(result)
plt.show()


In [None]:
x = model.model.forward_intermediates(pixel_values,indices=[15,31],intermediates_only=True,aggregation="dense")
z=x
print(len(z))
print(z[0].shape)

In [None]:
print(torch.cuda.memory_allocated()/1024**2)
print(torch.cuda.memory_reserved()/1024**2)
print(torch.cuda.memory_summary(device=None, abbreviated=False))

In [None]:
import torch
import gc   


del model
del x
del z
del spatial_features
del pixel_values
del image
del image_processor
del summary

model = None
gc.collect()
torch.cuda.empty_cache()

In [None]:
print(torch.cuda.memory_allocated()/1024**2)
print(torch.cuda.memory_reserved()/1024**2)
print(torch.cuda.memory_summary(device=None, abbreviated=False))