In [33]:
import clip
import torch
import torch.nn.functional as F
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from types import MethodType
import os
import zipfile
import requests
from io import BytesIO
from torchvision import datasets, transforms
from torch.utils.data import DataLoader


In [45]:
import torch
import torch.nn.functional as F
from types import MethodType
import os
import csv

activations = {}

def wrap_vit_blocks_dino(model):
    activations.clear()
    original_blocks = model.blocks  

    for i, block in enumerate(original_blocks):
        def make_custom_forward(orig_forward, layer_name):
            def custom_forward(self, x):
                out = orig_forward(x)
                activations[layer_name] = out.detach()
                return out
            return custom_forward

        block.forward = MethodType(make_custom_forward(block.forward, f"layer_{i}"), block)

    return activations


def logit_lens_analysis_dino(activations, projection_head, final_output, temperature=1.0, top_k=5):
    distances = {}
    predictions = {}

    for name, x in activations.items():
        # x: (batch, seq_len, dim)
        cls_token = x[:, 0, :]  # CLS token
        projected = projection_head(cls_token) 
        projected = F.normalize(projected, dim=-1)

        cls_projected = projected #[:, 0, :]
        cls_final_output = final_output[:, 0, :]

        similarity = F.cosine_similarity(cls_projected, cls_final_output, dim=-1)

        distances[name] = similarity.detach().cpu().item()

    return distances


def perform_logit_lens_analysis(model, dataset, device, cosine_path="logit_lens_results/cosine_similarity.csv"):
    model.eval()
    os.makedirs("logit_lens_results", exist_ok=True)

    wrap_vit_blocks_dino(model)

    headers = [f"layer_{i}" for i in range(len(model.blocks))]

    for image_idx, (image, label) in enumerate(dataset):
        image = image.unsqueeze(0).to(device)

        with torch.no_grad():
            final_output = model.forward_features(image) 
            final_output = F.normalize(final_output, dim=-1)

            distances = logit_lens_analysis_dino(
                activations,
                model.head,  
                final_output
            )

        os.makedirs(os.path.dirname(cosine_path), exist_ok=True)
        # os.makedirs(os.path.dirname(preds_path), exist_ok=True) 

        cosine_header = ['Image'] + headers
        # pred_header = ['Image'] + [f"{layer}_label" for layer in headers] + [f"{layer}_prob" for layer in headers]

        write_header = not os.path.exists(cosine_path) or os.path.getsize(cosine_path) == 0
        with open(cosine_path, 'a', newline='') as f:
            writer = csv.writer(f)
            if write_header:
                writer.writerow(cosine_header)
            cosine_row = [f"Image_{image_idx + 1}"] + [distances[layer] for layer in headers]
            writer.writerow(cosine_row)

        # write_header = not os.path.exists(preds_path) or os.path.getsize(preds_path) == 0
        # with open(preds_path, 'a', newline='') as f:
        #     writer = csv.writer(f)
        #     if write_header:
        #         writer.writerow(pred_header)
        #     pred_labels = [predictions[layer][0][0] for layer in headers] 
        #     pred_probs = [predictions[layer][0][1] for layer in headers] 
        #     pred_row = [f"Image_{image_idx + 1}"] + pred_labels + pred_probs
        #     writer.writerow(pred_row)

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

def plot_results(distances, predictions):
    '''
    Plots cosine similarity and prediction probabilities for each layer
    '''
    layer_names = sorted(
        [k for k in distances.keys() if k.startswith("layer_")],
        key=lambda x: int(x.split('_')[1])
    )

    try:
        similarity_values = [float(distances[layer]) for layer in layer_names]
        prob_values = [float(predictions.get(f"{layer}_prob", np.nan)) for layer in layer_names]
        predicted_labels = [predictions.get(f"{layer}_label", "") for layer in layer_names]
    except Exception as e:
        print("Błąd podczas przygotowywania danych:", e)
        return

    if not all(isinstance(v, (int, float)) for v in similarity_values + prob_values):
        print("Niektóre wartości nie są liczbami.")
        return

    fig, axes = plt.subplots(2, 1, figsize=(12, 6), gridspec_kw={'height_ratios': [1, 1]})

    sns.heatmap(np.array(similarity_values).reshape(1, -1), annot=True, cmap="viridis",
                xticklabels=layer_names, yticklabels=["Cosine Similarity"], cbar=True,
                ax=axes[0], cbar_kws={'label': 'Cosine Similarity'})

    sns.heatmap(np.array(prob_values).reshape(1, -1), annot=True, cmap="magma",
                xticklabels=layer_names, yticklabels=["Prediction Prob."], cbar=True,
                ax=axes[1], cbar_kws={'label': 'Prediction Probability'})

    for i, label in enumerate(predicted_labels):
        if label: 
            axes[1].text(i + 0.5, -0.3, label, ha='center', va='center',
                         color='black', fontsize=9, rotation=90,
                         transform=axes[1].transData)

    plt.suptitle("Cosine Similarity & Prediction Probability per Layer", fontsize=14)
    plt.tight_layout(rect=[0, 0, 1, 0.95])
    plt.show()

In [35]:
url = "http://cs231n.stanford.edu/tiny-imagenet-200.zip"
extract_path = "./tiny-imagenet-200"

print("Downloading Tiny ImageNet...")
response = requests.get(url)
with zipfile.ZipFile(BytesIO(response.content)) as zip_ref:
    zip_ref.extractall(".")

print("Download and extraction complete.")


train_dir = os.path.join(extract_path, "train")
transform = transforms.Compose([
    transforms.Resize((224, 224)), 
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.48145466, 0.4578275, 0.40821073],
                        std=[0.26862954, 0.26130258, 0.27577711]),
])
train_dataset = datasets.ImageFolder(train_dir, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)



Downloading Tiny ImageNet...


KeyboardInterrupt: 

In [37]:
import timm

device = "cuda" if torch.cuda.is_available() else "cpu"
model = timm.create_model('vit_small_patch16_224.dino', pretrained=True)
model.eval()

VisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 384, kernel_size=(16, 16), stride=(16, 16))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (patch_drop): Identity()
  (norm_pre): Identity()
  (blocks): Sequential(
    (0): Block(
      (norm1): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=384, out_features=1152, bias=True)
        (q_norm): Identity()
        (k_norm): Identity()
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=384, out_features=384, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (ls1): Identity()
      (drop_path1): Identity()
      (norm2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=384, out_features=1536, bias=True)
        (act): GELU(approximate='none')
        (drop1): Dropout(p=0.0, inplace=False)
        (norm): Identity(

In [None]:
perform_logit_lens_analysis(model=model, dataset=train_dataset, device=device, cosine_path="logit_lens_results/DINO/cosine_similarity.csv")

In [None]:
import pandas as pd

distances = pd.read_csv("logit_lens_results/DINO/cosine_similarity.csv")
predictions = pd.read_csv("logit_lens_results/DINO/predictions.csv")