# Export Embeddings

> Given a folder of frames, export their embeddings

In [None]:
# | default_exp models.frame_embeddings

In [None]:
# | hide
%reload_ext nb_black
%reload_ext autoreload
%autoreload 2

from nbdev.showdoc import *
import sys

__root = "../../"
sys.path.append(__root)

In [None]:
# | export
from clip_video_classifier.cli import cli
from torch_snippets import *
import clip

In [None]:
# | export


class Frame2Embeddings:
    def __init__(self, model="ViT-B/32", device="cuda", batch_size=16):
        self.model, self.preprocess = clip.load(model, device=device)
        self.batch_size = batch_size

    def __call__(self, frames_tensor_path_or_folder, **kwargs):
        if os.path.isdir(frames_tensor_path_or_folder):
            return self.extract_clip_embeddings_for_folder(
                frames_tensor_path_or_folder, **kwargs
            )
        else:
            return self.frames2clip_image_embeddings(
                frames_tensor_path_or_folder, **kwargs
            )

    @torch.no_grad()
    def frames2clip_image_embeddings(self, frames_tensor_path):
        frames = loaddill(frames_tensor_path)
        frames = [
            (np.array(a) * 255).astype(np.uint8).transpose(1, 2, 0) for a in frames
        ]
        frames = [Image.fromarray(im) for im in frames]
        frames = torch.stack([self.preprocess(im) for im in frames]).to(device)
        batches = torch.split(frames, self.batch_size)
        embeddings = []
        for batch in batches:
            embeddings.append(self.model.encode_image(batch).cpu().detach())
        embeddings = torch.cat(embeddings)

        return embeddings

    def extract_clip_embeddings_for_folder(
        self, frames_folder, embeddings_folder, n=None
    ):
        frames_folder = P(frames_folder)
        embeddings_folder = P(embeddings_folder)
        makedir(embeddings_folder)
        for ix, frames_tensor_path in E((tracker := track2(frames_folder.ls()))):
            tracker.send(f"Processing {frames_tensor_path}")
            if n is not None and ix >= n:
                return
            to = f"{embeddings_folder}/{stem(frames_tensor_path)}.embeddings.tensor"
            if exists(to):
                Info(f"Skipping {to} as it already exists")
                continue
            embeddings = self(frames_tensor_path)
            dumpdill(embeddings, to)


@cli.command()
def frames_to_embeddings(frames_folder, embeddings_folder, model, device):
    f2e = Frame2Embeddings(model, device)
    f2e(frames_folder, embeddings_folder=embeddings_folder)

Setup the object
```python
root = P("/mnt/347832F37832B388/ml-datasets/ssbd/")
f2e = Frame2Embeddings()
```

Usage for a single set of frames
```python
frames_folder = root / "ssbd-frames/10fps"
frames_path = frames_folder.ls()[0]
frames = loaddill(frames_path)
subplots(frames)
f2e(frames_path)
```
---
Usage for a folder of frames
```python
embeddings_folder = root/"ssbd-embeddings/10fps"
f2e(frames_folder, embeddings_folder=embeddings_folder, n=3)
```

In [None]:
import nbdev

nbdev.nbdev_export()