In [None]:
import os
import torch
import av
import numpy as np
from transformers import VideoLlavaForConditionalGeneration, VideoLlavaProcessor
from accelerate import cpu_offload

In [2]:
#Ignore warning
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"

In [None]:
#Loading the model & processor
model=VideoLlavaForConditionalGeneration.from_pretrained(
    "LanguageBind/Video-LLaVA-7B-hf",
    torch_dtype=torch.float16,
    device_map={"": "cpu"}
)

#Offloading the model to disk to manage memory
cpu_offload(model, execution_device="cpu")

#Load the processor
processor=VideoLlavaProcessor.from_pretrained("LanguageBind/Video-LLaVA-7B-hf")

#Setting device for later use
device=torch.device("cpu")

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [4]:
#Video frame extraction
def read_video_pyav(container, indices):
    frames=[]
    container.seek(0)
    start_index=indices[0]
    end_index=indices[-1]
    for i, frame in enumerate(container.decode(video=0)):
        if i > end_index:
            break
        if i in indices:
            frames.append(frame)
    return np.stack([f.to_ndarray(format="rgb24") for f in frames])

In [5]:
#Generating Captions
def caption_video(video_path, prompt="USER: <video>\nDescribe what is happening in the video. ASSISTANT:"):
    container=av.open(video_path)
    total_frames=container.streams.video[0].frames
    indices=np.linspace(0, total_frames-1, num=8, dtype=int)
    video=read_video_pyav(container, indices)

    inputs=processor(text=prompt, videos=video, return_tensors="pt")
    outputs=model.generate(**inputs, max_new_tokens=100)

    return processor.batch_decode(outputs, skip_special_tokens=True, clean_uptokenization_spaces=True)[0]

In [1]:
#Generating output
video_dir="Videos"
video_files=[f for f in os.listdir(video_dir) if f.endswith(('.mp4', '.avi'))]

for vf in video_files:
    path=os.path.join(video_dir, vf)
    caption=caption_video(path)
    print(f"Video: {vf}\nCaption: {caption}\n{'-'*60}")

NameError: name 'os' is not defined