# Set up

In [None]:
import os
from pathlib import Path

from natsort import natsorted
import torch

from src.alacen.alacen import ALACen
from src.alacen.asr.whisper import Whisper
from src.alacen.paraphrase.pegasus import PegasusAlacen
from src.alacen.tts.voicecraft.voicecraft import VoiceCraftTTS, VoiceCraftArgs
from src.alacen.lipsync.diff2lip.diff2lip import Diff2Lip, Diff2LipArgs

device = "cuda" if torch.cuda.is_available() else "cpu"

asr = Whisper()
paraphrase = PegasusAlacen()
tts = VoiceCraftTTS(model_name="330M_TTSEnhanced")
lipsync = Diff2Lip(Diff2LipArgs(num_gpus=3))

alacen = ALACen(asr, paraphrase, tts, lipsync)

# Configure

In [None]:
VERBOSE = True
VIDEO_DIR = Path("videos")
OUT_DIR = Path("output")
NUM_PARAPHRASES = 5
TTS_ARGS = VoiceCraftArgs.constructor(padding="end", num_samples=5)

In [None]:
video_list = natsorted(
    [f for f in os.listdir(VIDEO_DIR) if os.path.isfile(VIDEO_DIR / f)]
)
video_list = [VIDEO_DIR / f for f in video_list]
video_list

# Mode: fully automatic

In [None]:
mode = "auto"
for i, video in enumerate(video_list, 1):
    print(f"Video {i}: {video}", flush=True)
    alacen.run(
        video,
        OUT_DIR / mode,
        TTS_ARGS,
        num_paraphrases=NUM_PARAPHRASES,
        merge_av=True,
        mode=mode,
        device=device,
        verbose=VERBOSE,
        clean_up=True,
    )
    print()

# Mode: semi-automatic

In [None]:
mode = "semi"
for i, video in enumerate(video_list, 1):
    print(f"Video {i}: {video}", flush=True)
    alacen.run(
        video,
        OUT_DIR / mode,
        TTS_ARGS,
        num_paraphrases=NUM_PARAPHRASES,
        merge_av=True,
        mode=mode,
        device=device,
        verbose=VERBOSE,
        clean_up=True,
    )
    print()