In [None]:
!pip install -q faiss-cpu
!pip install -q git+https://github.com/openai/CLIP.git

In [None]:
!pip install -q translate
!pip install -q underthesea==1.3.5a3
!pip install -q underthesea[deep]
!pip install -q pyvi
!pip install -q langdetect
!pip install -q googletrans==3.1.0a0
!pip install -q peft
!pip install bitsandbytes
!pip install transformers
!pip install flash-attn
!pip install -U sentence-transformers
!pip install xformers

In [None]:
import os
import torch
import clip
from PIL import Image
import faiss
import numpy as np
import json
import matplotlib.pyplot as plt
import math
import googletrans
import translate
import glob
import underthesea
import sys
import time
from tqdm import tqdm
from pyvi import ViUtils, ViTokenizer
from difflib import SequenceMatcher
from langdetect import detect
from pathlib import Path
import re

In [None]:
ROOT = Path(os.getcwd()).resolve()

# Add ROOT to sys.path
sys.path.append(str(ROOT))

# Determine the working directory
if len(ROOT.parents) > 1:
    WORK_DIR = ROOT.parents[0]
else:
    WORK_DIR = ROOT  # Fallback to ROOT if it doesn't have enough parents

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model, preprocess = clip.load("ViT-B/32", device=device)

In [None]:
des_path =  f"{WORK_DIR}/data/dicts/npy_clip"
paths = f"{WORK_DIR}/data/keyframes"


for keyframe in tqdm(os.listdir(paths)):
  path_keyframe = os.path.join(paths,keyframe)
  video_paths = sorted(glob.glob(f"{path_keyframe}/*/"))
  video_paths = ['/'.join(i.split('/')[:-1]) for i in video_paths]

  start_time = time.time()
  for vd_path in video_paths:

    re_feats = []
    keyframe_paths = glob.glob(f'{vd_path}/*.jpg')
    keyframe_paths = sorted(keyframe_paths, key=lambda x : x.split('/')[-1].replace('.jpg',''))

    for keyframe_path in keyframe_paths:
      image = preprocess(Image.open(keyframe_path)).unsqueeze(0).to(device)

      with torch.no_grad():
          image_feats = model.encode_image(image)

      image_feats /= image_feats.norm(dim=-1, keepdim=True)
      image_feats = image_feats.detach().cpu().numpy().astype(np.float16).flatten()

      re_feats.append(image_feats)

    name_npy = vd_path.split('/')[-1]

# Construct output file path
    outfile = os.path.join(des_path, f'{name_npy}.npy')

# Ensure the directory exists before saving
    os.makedirs(des_path, exist_ok=True)
    np.save(outfile, re_feats)

In [None]:
def process_name(name: int):
    return "0"*(6-len(str(name))) + str(name)

def sort_key(file_path):
    file_name = os.path.basename(file_path)
    match = re.match(r'(\D+)(\d+)_(V)(\d+)', file_name)
    if match:
        prefix = match.group(1)
        number = int(match.group(2))
        suffix_number = int(match.group(4))
        return (prefix, number, suffix_number)
    return (file_name, 0, 0)


def write_bin_file_clip(bin_path: str, npy_path: str  ,method='cosine', feature_shape= 512): # Edit 512, 768
  if method in 'L2':
    index = faiss.IndexFlatL2(feature_shape)
  elif method in 'cosine':
    index = faiss.IndexFlatIP(feature_shape)
  else:
    assert f"{method} not supported"

  npy_files = glob.glob(os.path.join(npy_path, "*.npy"))
  npy_files_sorted = sorted(npy_files, key=sort_key)

  for npy_file in npy_files_sorted:
    feats = np.load(npy_file)
    index.add(feats)

  faiss.write_index(index, os.path.join(bin_path, f"faiss_CLIP_{method}.bin"))

  print(f'Saved {os.path.join(bin_path, f"faiss_CLIP_{method}.bin")}')

write_bin_file_clip(bin_path = f"{WORK_DIR}/data/dicts/bin_clip", npy_path = f"{WORK_DIR}/data/dicts/npy_clip")
