# Load Configuration File

In [2]:
from pathlib import Path
from box import Box  
from vast.utils import load_yaml

# Read configuration file
cfg = Box(load_yaml("config.yaml"))

# Access configuration parameters
print(cfg.download.default_url)
print(cfg.data.raw_videos)
print(cfg.model.whisper_size)
print(cfg.model.modelname)


https://www.youtube.com/watch?v=LwJfk1NUeg4
data/raw_videos
small
Salesforce/blip-image-captioning-base


# Download Videos

In [3]:
from vast.video_downloader import download_video

url = cfg.download.default_url
output_dir = Path(cfg.data.raw_videos)

videos = download_video(url, output_dir)

print(f"Downloaded: {videos}")


Downloading video: https://www.youtube.com/watch?v=LwJfk1NUeg4
[youtube] Extracting URL: https://www.youtube.com/watch?v=LwJfk1NUeg4
[youtube] LwJfk1NUeg4: Downloading webpage
[youtube] LwJfk1NUeg4: Downloading android sdkless player API JSON
[youtube] LwJfk1NUeg4: Downloading tv client config
[youtube] LwJfk1NUeg4: Downloading tv player API JSON
[youtube] LwJfk1NUeg4: Downloading web safari player API JSON
[youtube] LwJfk1NUeg4: Downloading player c6d7bdc9-main


         player = https://www.youtube.com/s/player/c6d7bdc9/player_ias.vflset/en_US/base.js
         n = gOP7IfHPrpMzsuTT ; player = https://www.youtube.com/s/player/c6d7bdc9/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U


[youtube] LwJfk1NUeg4: Downloading m3u8 information
[info] LwJfk1NUeg4: Downloading 1 format(s): 137+140
[download] Sleeping 5.00 seconds as required by the site...
[download] Destination: data/raw_videos/10 Min Conversation in Slow German ｜ Super Easy German 287.f137.mp4
[download] 100% of  141.83MiB in 00:00:07 at 18.54MiB/s    
[download] Destination: data/raw_videos/10 Min Conversation in Slow German ｜ Super Easy German 287.f140.m4a
[download] 100% of   10.87MiB in 00:00:00 at 18.07MiB/s  
[Merger] Merging formats into "data/raw_videos/10 Min Conversation in Slow German ｜ Super Easy German 287.mp4"
Deleting original file data/raw_videos/10 Min Conversation in Slow German ｜ Super Easy German 287.f140.m4a (pass -k to keep)
Deleting original file data/raw_videos/10 Min Conversation in Slow German ｜ Super Easy German 287.f137.mp4 (pass -k to keep)
Detected video codec: h264
Final file: data/raw_videos/10 Min Conversation in Slow German ｜ Super Easy German 287.mp4
Downloaded: data/raw_v

# Generate Subtitles

In [4]:
from vast.subtitle_generator import generate_subtitle

output_dir = Path(cfg.data.subtitles)
model_size=cfg.model.whisper_size

subtitles = generate_subtitle(videos, output_dir, model_size)

print(f"Subtitles saved to: {subtitles}")


Loading Whisper model: small ...
Transcribing audio (model: small)...




Subtitle file created: data/subtitles/10 Min Conversation in Slow German ｜ Super Easy German 287.srt
Subtitles saved to: data/subtitles/10 Min Conversation in Slow German ｜ Super Easy German 287.srt


# Extract Keyframes

In [5]:
from vast.keyframe_extractor import extract_keyframes

output_dir = Path(cfg.data.keyframes)
interval = cfg.keyframes.interval

keyframes = extract_keyframes(videos, output_dir, interval)

print(f"Number of keyframes extracted: {len(keyframes)}")


Extraction completed: 352 keyframes generated.
Number of keyframes extracted: 352


# Analyze Scenes

In [6]:
from vast.scene_analyzer import analyze_directory

input_dir = Path(cfg.data.keyframes)
output_dir = Path(cfg.data.scene_descriptions)
model_name = cfg.model.modelname

print(model_name)

analyze_directory(input_dir, output_dir, model_name)

print("Scene analysis completed!")


Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


Salesforce/blip-image-captioning-base
Loading model: Salesforce/blip-image-captioning-base ...
Model loaded successfully.
10 Min Conversation in Slow German ｜ Super Easy German 287_0000.jpg → a man sitting at a table with a coffee machine
10 Min Conversation in Slow German ｜ Super Easy German 287_0001.jpg → a man standing in front of a coffee machine
10 Min Conversation in Slow German ｜ Super Easy German 287_0002.jpg → a man standing in front of a coffee machine
10 Min Conversation in Slow German ｜ Super Easy German 287_0003.jpg → a woman sitting at a table with a laptop
10 Min Conversation in Slow German ｜ Super Easy German 287_0004.jpg → a woman sitting at a table with a laptop
10 Min Conversation in Slow German ｜ Super Easy German 287_0005.jpg → a man standing in front of a coffee machine
10 Min Conversation in Slow German ｜ Super Easy German 287_0006.jpg → a man standing in front of a kitchen with a green plant
10 Min Conversation in Slow German ｜ Super Easy German 287_0007.jpg → a