In [None]:
import torch
from pathlib import Path
from PIL import Image
import os
import sys

sys.path.append(os.path.abspath(".."))

from clip.clip import load
from concept_extraction.concept_extraction import (
    prepare_image_from_datasets,
    extract_embedding,
    extract_sae_activations,
    load_sae
)


STL10_FOLDER = Path("stl10_sample_500")
SAE_CHECKPOINT = Path("..") / "sae_checkpoints" / "clip_ViT-B_16_sparse_autoencoder_final.pt"
OUTPUT_CLIP = Path("stl10_embeddings.pt")
OUTPUT_SAE = Path("stl10_sae_activations.pt")

def save_embeddings_and_sae_activations():
    # Załaduj model CLIP i przekształcenia
    ViT_B_16_clip, image_transform = load("ViT-B/16")

    # Załaduj sparse autoencoder
    sparse_autoencoder = load_sae(SAE_CHECKPOINT)

    # Wczytaj obrazy
    image_paths = list(STL10_FOLDER.glob("*.png"))[:200]
    samples = []
    for path in image_paths:
        img = Image.open(path).convert("RGB")
        samples.append({"image": img})

    # Oblicz embeddingi CLIP
    embeddings_list = []
    for sample in samples:
        image = sample["image"]
        _, image_transformed = prepare_image_from_datasets(image, image_transform)
        embedding = extract_embedding(image_transformed=image_transformed, clip_model=ViT_B_16_clip)
        embeddings_list.append(embedding)

    embeddings = torch.stack(embeddings_list)

    # Zapisz embeddingi
    torch.save({
        'embeddings': embeddings,
        'image_paths': [str(p) for p in image_paths],
    }, OUTPUT_CLIP)
    print(f"✅ Saved CLIP embeddings to {OUTPUT_CLIP}")

    # Oblicz aktywacje SAE
    sae_activation_list = []
    for embedding in embeddings_list:
        activation = extract_sae_activations(embedding, sparse_autoencoder)
        sae_activation_list.append(activation)

    sae_activations = torch.stack(sae_activation_list)

    # Zapisz aktywacje SAE
    torch.save({
        'sae_activations': sae_activations,
        'image_paths': [str(p) for p in image_paths],
    }, OUTPUT_SAE)
    print(f"✅ Saved SAE activations to {OUTPUT_SAE}")

# Uruchom zapis
if __name__ == "__main__":
    save_embeddings_and_sae_activations()


In [None]:
import torch
from pathlib import Path
from PIL import Image
import os
import sys

sys.path.append(os.path.abspath(".."))

from clip.clip import load
from concept_extraction.concept_extraction import (
    prepare_image_from_datasets,
    extract_embedding,
    extract_sae_activations,
    load_sae
)


STL10_FOLDER = Path("stl10_sample_500")
SAE_CHECKPOINT = Path("..") / "sae_checkpoints" / "clip_ViT-B_16_sparse_autoencoder_final.pt"
OUTPUT_CLIP = Path("stl10_embeddings.pt")
OUTPUT_SAE = Path("stl10_sae_activations.pt")

def save_embeddings_and_sae_activations():
    ViT_B_16_clip, image_transform = load("ViT-B/16")
    sparse_autoencoder = load_sae(SAE_CHECKPOINT)

    image_paths = list(STL10_FOLDER.glob("*.png"))[:200]
    samples = []
    for path in image_paths:
        img = Image.open(path).convert("RGB")
        samples.append({"image": img})

    embeddings_list = []
    for sample in samples:
        image = sample["image"]
        _, image_transformed = prepare_image_from_datasets(image, image_transform)
        embedding = extract_embedding(image_transformed=image_transformed, clip_model=ViT_B_16_clip)
        embeddings_list.append(embedding)

    embeddings = torch.stack(embeddings_list)

    torch.save({
        'embeddings': embeddings,
        'image_paths': [str(p) for p in image_paths],
    }, OUTPUT_CLIP)
    print(f"Saved CLIP embeddings to {OUTPUT_CLIP}")

    sae_activation_list = []
    for embedding in embeddings_list:
        activation = extract_sae_activations(embedding, sparse_autoencoder)
        sae_activation_list.append(activation)

    sae_activations = torch.stack(sae_activation_list)

    torch.save({
        'sae_activations': sae_activations,
        'image_paths': [str(p) for p in image_paths],
    }, OUTPUT_SAE)
    print(f"Saved SAE activations to {OUTPUT_SAE}")


In [None]:
save_embeddings_and_sae_activations()