In [None]:
!unzip -q attack-on-genai.zip
%cd attack-on-genai

In [None]:
%cd eval
!git clone https://github.com/JunyaoHu/common_metrics_on_video_quality
%cd ..

In [None]:
!pip install -r requirements.txt

In [None]:
from huggingface_hub import login
import os
from dotenv import load_dotenv

load_dotenv()

# Login to Hugging Face
# Option 1: Use token from environment variable (recommended for Colab)
# Set HF_TOKEN in Colab secrets or environment variables
hf_token = os.getenv("HF_TOKEN")
if hf_token:
    login(token=hf_token)
    print("Logged in to Hugging Face using token from environment variable")
else:
    # Option 2: Interactive login (will prompt for token)
    print("Please enter your Hugging Face token:")
    login()
    print("Logged in to Hugging Face")


In [None]:
import os
import datetime
import wandb
from dotenv import load_dotenv

load_dotenv()
wandb_api_key = os.getenv("WANDB_API_KEY")
wandb_project = "attack-on-genai"
wandb_entity = "attack-on-genai"
wandb_run_name = os.getenv("WANDB_RUN_NAME") or f"wan-train-{datetime.datetime.now():%Y%m%d-%H%M%S}"

if wandb_api_key:
    wandb.login(key=wandb_api_key)
    print("Logged in to Weights & Biases using WANDB_API_KEY")
else:
    print("WANDB_API_KEY not set; you'll be prompted to log in.")
    wandb.login()

os.environ["WANDB_PROJECT"] = wandb_project
os.environ["WANDB_RUN_NAME"] = wandb_run_name
os.environ.setdefault("WANDB_MODE", "online")
os.environ["WANDB_ENTITY"] = wandb_entity

print(f"W&B project: {wandb_project}")
print(f"W&B run: {wandb_run_name}")
print(f"W&B entity: {wandb_entity}")



In [None]:
# SETUP: Download & Split Dataset
dataset_repo_id = "attack-on-genai/video-frames"
checkpoint_dir = "checkpoint_setup"
max_samples = 5
test_split = 0.2
seed = 42

setup_cmd = f"""python train_setup.py \
    --dataset_repo_id {dataset_repo_id} \
    --checkpoint_dir {checkpoint_dir} \
    --max_samples {max_samples} \
    --test_split {test_split} \
    --seed {seed}"""

!{setup_cmd}


In [None]:
# TRAINING
train_data_dir = "checkpoint_setup/train"  
dataset_repo_id = None  
model_id = "Wan-AI/Wan2.1-FLF2V-14B-720P-diffusers"
output_dir = "wan_flf2v_lora"

max_train_steps = 50
learning_rate = 1e-4
weight_decay = 1e-2
batch_size = 1
gradient_accumulation_steps = 1
log_every = 1
save_every = 1000
max_samples = None  

lora_rank = 8
lora_alpha = 16
lora_dropout = 0.0

dynamic_frames = True  
num_frames = 17  
flow_shift = 3.0
quantization = "no"
mixed_precision = "bf16"  
device = "cuda"

# Optical flow loss (optional but improves motion quality)
use_flow_loss = True  
flow_loss_weight = 0.2  
flow_downsample = 1  

os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

print(
    f"Logging to W&B project={os.getenv('WANDB_PROJECT', 'attack-on-genai')} "
    f"run={os.getenv('WANDB_RUN_NAME', 'wan-train-manual')} "
    f"entity={os.getenv('WANDB_ENTITY', 'attack-on-genai')}"
)

cmd_parts = [
    "python train.py",
    f"--train_data_dir {train_data_dir}" if train_data_dir else None,
    f"--dataset_repo_id {dataset_repo_id}" if dataset_repo_id else None,
    f"--model_id {model_id}",
    f"--output_dir {output_dir}",
    f"--max_train_steps {max_train_steps}",
    f"--learning_rate {learning_rate}",
    f"--weight_decay {weight_decay}",
    f"--batch_size {batch_size}",
    f"--gradient_accumulation_steps {gradient_accumulation_steps}",
    f"--log_every {log_every}",
    f"--save_every {save_every}",
    f"--lora_rank {lora_rank}",
    f"--lora_alpha {lora_alpha}",
    f"--lora_dropout {lora_dropout}",
    "--dynamic_frames" if dynamic_frames else None,
    f"--num_frames {num_frames}" if not dynamic_frames else None,
    f"--flow_shift {flow_shift}",
    f"--max_samples {max_samples}" if max_samples else None,
    f"--mixed_precision {mixed_precision}",
    f"--quantization {quantization}",
    f"--device {device}",
    "--use_flow_loss" if use_flow_loss else None,
    f"--flow_loss_weight {flow_loss_weight}" if use_flow_loss else None,
    f"--flow_downsample {flow_downsample}" if use_flow_loss else None,
]
cmd = " \\\n    ".join([p for p in cmd_parts if p])

print("Running:\n", cmd)
!{cmd}

print(f"Training complete. W&B run configured via environment.")


In [None]:
# UPLOAD TO HUGGINGFACE (Optional) 
from huggingface_hub import HfApi
import os

output_dir = "wan_flf2v_lora"
repo_id = "attack-on-genai/wan-finetune"

if os.path.exists(output_dir):
    hf_token = os.getenv("HF_TOKEN")
    api = HfApi(token=hf_token)
    api.upload_folder(
        folder_path=output_dir,
        repo_id=repo_id,
        repo_type="model",
        commit_message="Upload LoRA weights"
    )
    print(f"Uploaded to https://huggingface.co/{repo_id}")
else:
    print(f"{output_dir} not found")

In [None]:
# EVALUATION
import os
import json

hf_lora_repo = "attack-on-genai/wan-finetune"  
test_data_dir = "checkpoint_setup/test"
resize_for_eval = 480  
output_json = "evaluation_results.json"
device = "cuda"

eval_frame_counts = "13,17,21,25,29,33,37,41,45,49,53,57,61,65,69,73,77,81" 

num_inference_steps = 30
eval_base = True  

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

eval_cmd = f"""python evaluation.py \
    --hf_lora_repo {hf_lora_repo} \
    --test_data_dir {test_data_dir} \
    --eval_frame_counts {eval_frame_counts} \
    --num_inference_steps {num_inference_steps} \
    --resize {resize_for_eval} \
    --output_json {output_json} \
    --device {device} \
    {"--eval_base" if eval_base else ""}"""

print(f"Evaluating on frame counts: {eval_frame_counts}")
!{eval_cmd}

if os.path.exists(output_json):
    with open(output_json, 'r') as f:
        results = json.load(f)
    print("\nEvaluation Results by Frame Count:")
    for frame_key in sorted(results.keys()):
        if frame_key.startswith('frames_'):
            frame_count = frame_key.split('_')[1]
            print(f"\n{frame_key} ({frame_count} frames):")
            if 'finetuned' in results[frame_key]:
                for m in ['fvd', 'ssim', 'psnr', 'lpips']:
                    if m in results[frame_key]['finetuned']:
                        print(f"  {m.upper()}: {results[frame_key]['finetuned'][m]['value'][0]:.4f}")
else:
    print(f"{output_json} not found")


In [None]:
# BATCH INFERENCE: 3 BASE + 3 FINETUNED (examples)
import os
import subprocess
import wandb
from tqdm import tqdm
import time

examples_dir = "examples"
hf_lora_repo = "attack-on-genai/wan-finetune"
prompt = "high quality anime style, smooth motion, consistent characters"
negative_prompt = "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"
num_frames = 13
num_inference_steps = 30
seed = 42

example_pairs = [
    ("first_1.png", "last_1.png", "ex1"),
    ("first_2.png", "last_2.png", "ex2"),
    ("first_3.png", "last_3.png", "ex3"),
]

wandb_project = os.getenv("WANDB_PROJECT", "attack-on-genai")
wandb_entity = os.getenv("WANDB_ENTITY") or None
wandb_run_name = os.getenv("WANDB_RUN_NAME") or "wan-multi-infer"
wandb_run_id = os.getenv("WANDB_RUN_ID")

if wandb_run_id:
    wb = wandb.init(project=wandb_project, entity=wandb_entity, id=wandb_run_id, resume="must")
else:
    wb = wandb.init(project=wandb_project, entity=wandb_entity, name=wandb_run_name)

logged_videos = {}
start_time = time.time()
total_videos = len(example_pairs) * 2  

with tqdm(total=total_videos, desc="Generating examples", unit="video") as pbar:
    for first_name, last_name, tag in example_pairs:
        first_path = os.path.join(examples_dir, first_name)
        last_path = os.path.join(examples_dir, last_name)
        assert os.path.exists(first_path), f"Missing {first_path}"
        assert os.path.exists(last_path), f"Missing {last_path}"

        base_out = f"{tag}_base.mp4"
        ft_out = f"{tag}_finetuned.mp4"

        base_cmd = [
            "python", "inference.py",
            "--first_frame", first_path,
            "--last_frame", last_path,
            "--output", base_out,
            "--prompt", prompt,
            "--negative_prompt", negative_prompt,
            "--num_frames", str(num_frames),
            "--num_inference_steps", str(num_inference_steps),
            "--seed", str(seed),
        ]
        pbar.set_description(f"Generating {tag} (base)")
        subprocess.run(base_cmd, check=True)
        pbar.update(1)

        ft_cmd = [
            "python", "inference.py",
            "--lora_path", hf_lora_repo,
            "--first_frame", first_path,
            "--last_frame", last_path,
            "--output", ft_out,
            "--prompt", prompt,
            "--negative_prompt", negative_prompt,
            "--num_frames", str(num_frames),
            "--num_inference_steps", str(num_inference_steps),
            "--seed", str(seed),
        ]
        pbar.set_description(f"Generating {tag} (finetuned)")
        subprocess.run(ft_cmd, check=True)
        pbar.update(1)

        wb.log({
            f"examples/{tag}/base": wandb.Video(base_out, fps=16, format="mp4"),
            f"examples/{tag}/finetuned": wandb.Video(ft_out, fps=16, format="mp4"),
        })

print("Logged 3 base + 3 finetuned videos to W&B")



In [None]:
# Inference configuration
from tqdm import tqdm
import time

hf_lora_repo = "attack-on-genai/wan-finetune"
first_frame_path = "first_frame.png"
last_frame_path = "last_frame.png"
prompt = "high quality anime style, smooth motion, consistent characters"
negative_prompt = "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"
num_frames = 17
num_inference_steps = 30
seed = 42

start_time = time.time()

with tqdm(total=2, desc="Generating inference examples", unit="video") as pbar:
    # BASE MODEL
    pbar.set_description("Generating BASE MODEL (no LoRA)")
    base_cmd = f"""python inference.py \
        --first_frame {first_frame_path} \
        --last_frame {last_frame_path} \
        --output base_output.mp4 \
        --prompt "{prompt}" \
        --negative_prompt "{negative_prompt}" \
        --num_frames {num_frames} \
        --num_inference_steps {num_inference_steps} \
        --seed {seed if seed is not None else 'None'}"""
    !{base_cmd}
    pbar.update(1)

    # FINE-TUNED MODEL
    pbar.set_description("Generating FINE-TUNED MODEL (with LoRA)")
    ft_cmd = f"""python inference.py \
        --lora_path {hf_lora_repo} \
        --first_frame {first_frame_path} \
        --last_frame {last_frame_path} \
        --output finetuned_output.mp4 \
        --prompt "{prompt}" \
        --negative_prompt "{negative_prompt}" \
        --num_frames {num_frames} \
        --num_inference_steps {num_inference_steps} \
        --seed {seed if seed is not None else 'None'}"""
    !{ft_cmd}
    pbar.update(1)

print("\n" + "=" * 60)
print("âœ“ Done! Generated videos:")
print("  - base_output.mp4 (base model)")
print("  - finetuned_output.mp4 (fine-tuned model)")
print("=" * 60)