In [1]:
# Importing libraries
import lancedb
import pyarrow as pa
from transformers import CLIPModel, CLIPProcessor
from torchvision.io import read_image
import torch
from torch.utils.data import Dataset, DataLoader
import glob
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
import os

In [2]:
# Dataset and dataloader
class KeyframesDataset(Dataset):
    def __init__(self):
        print("Image path for dataset: .\\keyframes\\*\\*")
        self.images_paths = glob.glob(".\\keyframes\\*\\*")

        map_keyframes_paths = glob.glob('.\\map-keyframes\\*.csv')
        self.map_keyframes_dfs = {}
        for path in map_keyframes_paths:
            self.map_keyframes_dfs[path.rsplit('\\', 1)[1][:-4]] = pd.read_csv(path)
        
    def __len__(self):
        return len(self.images_paths)

    def __getitem__(self, idx):
        image = read_image(self.images_paths[idx])
        video_name = self.images_paths[idx].rsplit('\\', 2)[1]
        image_name = self.images_paths[idx].rsplit('\\', 1)[1]
        frame_idx = self.map_keyframes_dfs[video_name].at[int(image_name[:-4])-1, 'frame_idx']
        return image, video_name, image_name, frame_idx, self.images_paths[idx]

dataset = KeyframesDataset()
dataloader = DataLoader(dataset, batch_size=256, shuffle=False)
print("Number of images: ", len(dataset))
print("Number of batches: ", len(dataloader))

Image path for dataset: .\keyframes\*\*
Number of images:  106589
Number of batches:  417


In [3]:
# Load CLIP model
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14").to(device)
processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")



In [2]:
# Create lancedb instance
lancedb_instance = lancedb.connect("database.lance")
TABLE_NAME = "patch14v2"
if TABLE_NAME in lancedb_instance.table_names():
    database = lancedb_instance[TABLE_NAME]
    print(f"Warning: Table {TABLE_NAME} already exists!")
else:
    schema = pa.schema([
        pa.field("embedding", pa.list_(pa.float32(), 768)),
        pa.field("video_name", pa.string()),
        pa.field("image_name", pa.string()),
        pa.field("frame_idx", pa.int32()),
        pa.field("path", pa.string()),
    ])
    lancedb_instance.create_table(TABLE_NAME, schema=schema)
    database = lancedb_instance[TABLE_NAME]



In [5]:
LEN_DATALOADER = len(dataloader)
SAVE_EVERY = int(0.05 * LEN_DATALOADER)

df = pd.DataFrame(columns=['embedding', 'video_name', 'image_name', 'frame_idx', 'path'])

for i, (images, video_names, image_names, frame_idxs, paths) in enumerate(tqdm(dataloader)):
    inputs = processor(images=images, return_tensors="pt").to(device)
    with torch.inference_mode():
        embeddings = model.get_image_features(**inputs).cpu().squeeze().numpy()
    data = {
        'embedding': [],
        'video_name': [],
        'image_name': [],
        'frame_idx': [],
        'path': []
    }
    for embedding, video_name, image_name, frame_idx, path in zip(embeddings, video_names, image_names, frame_idxs, paths):
        data['embedding'].append(embedding)
        data['video_name'].append(video_name)
        data['image_name'].append(image_name)
        data['frame_idx'].append(int(frame_idx))
        data['path'].append(path)
    df = pd.concat([df, pd.DataFrame(data)], ignore_index=True)
    if (i + 1) % SAVE_EVERY == 0 or i + 1 == LEN_DATALOADER:
        lancedb_instance[TABLE_NAME].add(df)
        df = pd.DataFrame(columns=['embedding', 'video_name', 'image_name', 'frame_idx', 'path'])
        print(f"Saved embeddings for batch {i+1}/{LEN_DATALOADER}")

  0%|          | 0/417 [00:00<?, ?it/s]

  attn_output = torch.nn.functional.scaled_dot_product_attention(


Saved embeddings for batch 20/417
Saved embeddings for batch 40/417
Saved embeddings for batch 60/417
Saved embeddings for batch 80/417
Saved embeddings for batch 100/417
Saved embeddings for batch 120/417
Saved embeddings for batch 140/417
Saved embeddings for batch 160/417
Saved embeddings for batch 180/417
Saved embeddings for batch 200/417
Saved embeddings for batch 220/417
Saved embeddings for batch 240/417
Saved embeddings for batch 260/417
Saved embeddings for batch 280/417
Saved embeddings for batch 300/417
Saved embeddings for batch 320/417
Saved embeddings for batch 340/417
Saved embeddings for batch 360/417
Saved embeddings for batch 380/417
Saved embeddings for batch 400/417
Saved embeddings for batch 417/417
