In [1]:
import torch
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model
from transformers import AutoProcessor, BitsAndBytesConfig, Idefics3ForConditionalGeneration
from transformers import TrainingArguments, Trainer
from PIL import Image
import cv2
import numpy as np
import matplotlib.pyplot as plt
from typing import List
import pandas as pd

import subprocess
import os
from yt_dlp import YoutubeDL

import re

import logging
import random
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

from datasets import Dataset, Features, Value

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def set_seed(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed) # if you are using cuda
    torch.cuda.manual_seed_all(seed) # if you are using multi-GPU.
    np.random.seed(seed) # Numpy module.
    random.seed(seed) # Python random module.
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

set_seed()

USE_LORA = False
USE_QLORA = True
SMOL = True

model_id = "HuggingFaceTB/SmolVLM-Instruct" if SMOL else "HuggingFaceM4/Idefics3-8B-Llama3"

processor = AutoProcessor.from_pretrained(
    model_id
)

if USE_QLORA or USE_LORA:
    lora_config = LoraConfig(
        r=8,
        lora_alpha=8,
        lora_dropout=0.1,
        target_modules=['down_proj','o_proj','k_proj','q_proj','gate_proj','up_proj','v_proj'],
        use_dora=False if USE_QLORA else True,
        init_lora_weights="gaussian",
        # task_type=TaskType.
    )
    lora_config.inference_mode = False
    if USE_QLORA:
        bnb_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_use_double_quant=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype=torch.bfloat16
        )

    model = Idefics3ForConditionalGeneration.from_pretrained(
        model_id,
        quantization_config=bnb_config if USE_QLORA else None,
        _attn_implementation="flash_attention_2",
        device_map="auto"
    )
    model.add_adapter(lora_config)
    model.enable_adapters()
    model = prepare_model_for_kbit_training(model)
    model = get_peft_model(model, lora_config)
    print(model.get_nb_trainable_parameters())
else:
    model = Idefics3ForConditionalGeneration.from_pretrained(
        model_id,
        torch_dtype=torch.bfloat16,
        _attn_implementation="flash_attention_2",
        device_map='auto'
    )

    # if you'd like to only fine-tune LLM
    for param in model.model.vision_model.parameters():
        param.requires_grad = False


Some kwargs in processor config are unused and will not have any effect: image_seq_len. 
INFO:peft.tuners.tuners_utils:Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing!


(10536960, 2256809840)


In [3]:
def check_params_with_grad(model):
    """
    Checks and prints the parameters of a PyTorch model that have requires_grad=True.

    Args:
        model: The PyTorch model to check.
    """

    params_with_grad = []
    for name, param in model.named_parameters():
        if param.requires_grad:
            params_with_grad.append((name, param))

    if params_with_grad:
        print("Parameters with requires_grad=True:")
        for name, param in params_with_grad:
            print(f"- {name}: shape={param.shape}, dtype={param.dtype}, params={param}")
            break
            # Optionally print the number of elements:
            print(f"  Number of elements: {param.numel()}")
    else:
        print("No parameters with requires_grad=True found.")

In [4]:
check_params_with_grad(model)
'''Parameters with requires_grad=True:
- base_model.model.model.vision_model.encoder.layers.0.self_attn.k_proj.lora_A.default.weight: shape=torch.Size([8, 1152]), dtype=torch.float32, params=Parameter containing:
tensor([[ 0.0562, -0.1976, -0.0880,  ...,  0.0183, -0.1876, -0.0010],
        [-0.0583,  0.0611, -0.2928,  ...,  0.0292,  0.1838, -0.0823],
        [ 0.3044,  0.0196,  0.2708,  ...,  0.0573, -0.1446,  0.0779],
        ...,
        [-0.1374, -0.0511,  0.0835,  ...,  0.0658, -0.2263, -0.3412],
        [-0.2727, -0.0401,  0.0655,  ...,  0.0611, -0.2045,  0.0332],
        [ 0.0171,  0.0390, -0.1298,  ...,  0.0427,  0.0269,  0.3102]],
       device='cuda:0', requires_grad=True)'''

Parameters with requires_grad=True:
- base_model.model.model.vision_model.encoder.layers.0.self_attn.k_proj.lora_A.default.weight: shape=torch.Size([8, 1152]), dtype=torch.float32, params=Parameter containing:
tensor([[ 0.0562, -0.1976, -0.0880,  ...,  0.0183, -0.1876, -0.0010],
        [-0.0583,  0.0611, -0.2928,  ...,  0.0292,  0.1838, -0.0823],
        [ 0.3044,  0.0196,  0.2708,  ...,  0.0573, -0.1446,  0.0779],
        ...,
        [-0.1374, -0.0511,  0.0835,  ...,  0.0658, -0.2263, -0.3412],
        [-0.2727, -0.0401,  0.0655,  ...,  0.0611, -0.2045,  0.0332],
        [ 0.0171,  0.0390, -0.1298,  ...,  0.0427,  0.0269,  0.3102]],
       device='cuda:0', requires_grad=True)


"Parameters with requires_grad=True:\n- base_model.model.model.vision_model.encoder.layers.0.self_attn.k_proj.lora_A.default.weight: shape=torch.Size([8, 1152]), dtype=torch.float32, params=Parameter containing:\ntensor([[ 0.0562, -0.1976, -0.0880,  ...,  0.0183, -0.1876, -0.0010],\n        [-0.0583,  0.0611, -0.2928,  ...,  0.0292,  0.1838, -0.0823],\n        [ 0.3044,  0.0196,  0.2708,  ...,  0.0573, -0.1446,  0.0779],\n        ...,\n        [-0.1374, -0.0511,  0.0835,  ...,  0.0658, -0.2263, -0.3412],\n        [-0.2727, -0.0401,  0.0655,  ...,  0.0611, -0.2045,  0.0332],\n        [ 0.0171,  0.0390, -0.1298,  ...,  0.0427,  0.0269,  0.3102]],\n       device='cuda:0', requires_grad=True)"

In [2]:
# Load and preprocess the CSV.
df = pd.read_csv("/DATA/rishav_2311mc12/complaint_gen2/Gender-Extension of Complaint Generation model - Sheet1.csv")
df = df[['Video Link', 'Gender', 'Age-Group', 'Our Label']]  # Select necessary columns
df.dropna(inplace=True)  # Drop rows with missing values
df = df.reset_index(drop=True)


# Add the prompt column
df['prompt'] = df.apply(
    lambda row: f"Given a video made by {row['Gender']} aged {row['Age-Group']}, generate a complaint in Hinglish Language about their experience from their perspective of the given product.",
    axis=1
)


# Rename columns to match the features schema
df.rename(columns={
    'Video Link': 'video_path',
    'Gender': 'gender',
    'Age-Group': 'age_group',
    'Our Label': 'label'
}, inplace=True)


# Define features for the Dataset
features = Features({
    'video_path': Value(dtype='string'),
    'gender': Value(dtype='string'),
    'age_group': Value(dtype='string'),
    'label': Value(dtype='string'),
    'prompt': Value(dtype='string'),
})


# Create Hugging Face Dataset
my_dataset = Dataset.from_pandas(df, features=features)

# Perform train-test split
split_dataset = my_dataset.train_test_split(test_size=0.3)  # Adjust test_size as needed

# Access train and test datasets
train_ds = split_dataset['train']
test_ds = split_dataset['test']


In [3]:
# train_ds

In [4]:
def extract_frames(video_path: str, max_frames: int) -> List[Image.Image]:
    """
    Extract frames from a video file. The video must already exist locally.

    Args:
        video_path (str): Path to the video file.
        max_frames (int): Maximum number of frames to extract.
    
    Returns:
        List[Image.Image]: A list of PIL Image objects extracted from the video.
    """
    if not os.path.exists(video_path):
        print(f"Warning: Could not locate video: {video_path}. Skipping...")
        return []  # Return an empty list for the frames

    # Open the video
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Warning: Could not open video: {video_path}. Skipping...")
        return []  # Return an empty list for the frames

    # Get video properties
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    # Calculate frame indices to extract (1 fps)
    frame_indices = list(range(0, total_frames, fps))

    # Sample evenly if more frames than max_frames
    if len(frame_indices) > max_frames:
        indices = np.linspace(0, len(frame_indices) - 1, max_frames, dtype=int)
        frame_indices = [frame_indices[i] for i in indices]

    frames = []
    for frame_idx in frame_indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
        ret, frame = cap.read()
        if ret:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            pil_image = Image.fromarray(frame)
            frames.append(pil_image)

    cap.release()
    return frames

In [8]:
# train_ds['video_path']

In [9]:
import os
image_token_id = processor.tokenizer.additional_special_tokens_ids[
            processor.tokenizer.additional_special_tokens.index("<image>")]

def get_video_id_from_path(video_path: str) -> str:
    """
    Extract the last ID (e.g., B0DBJ4SZHW) from the video path (Amazon review URL or any other identifier).
    Assumes video path contains the product ID or video ID in a known format.
    """
    # Assuming the video path format has the ID at the end of the URL or as part of the name
    match = re.search(r'ASIN=([A-Z0-9]{10})', video_path)
    if match:
        return match.group(1)
    return None

def collate_fn(examples):
    texts = []
    images = []

    max_frames = 8
    for example in examples:
        # Extract the video ID from the video path
        video_path = example["video_path"]
        video_id = get_video_id_from_path(video_path)
        
        if video_id is None:
            print(f"Error: No valid video ID found in {video_path}. Skipping...")
            continue  # Skip this example if no valid ID is found

        # Define the path to the downloaded video (expected to be in the "downloads" folder)
        download_folder = "/DATA/rishav_2311mc12/complaint_gen2/downloads"
        video_file_path = os.path.join(download_folder, f"{video_id}.mp4")

        # Check if the video already exists in the download folder
        if not os.path.exists(video_file_path):
            print(f"Error: Video with ID {video_id} not found in {download_folder}. Skipping...")
            continue  # Skip this example if the video file does not exist

        # Extract frames from the video
        frames = extract_frames(video_file_path, max_frames)  # Returns a list of PIL images

        prompt = example["prompt"]
        label = example["label"]

        # Prepare messages - Repeat <image> token for each frame
        messages = [
            {
                "role": "user",
                "content": [
                    # Repeat <image> for each frame
                    *[{"type": "image"} for _ in range(len(frames))],
                    {"type": "text", "text": prompt}
                ]
            },
            {
                "role": "assistant",
                "content": [
                    {"type": "text", "text": label}
                ]
            }
        ]
        text = processor.apply_chat_template(messages, add_generation_prompt=False)

        # Add processed data
        texts.append(text.strip())
        images.extend(frames)  # Add frames for the video

    # Create the batch using the processor
    batch = processor(text=texts, images=images, return_tensors="pt", padding=True)

    # Prepare labels
    labels = batch["input_ids"].clone()
    labels[labels == processor.tokenizer.pad_token_id] = -100
    labels[labels == image_token_id] = -100
    batch["labels"] = labels

    return batch

In [10]:
model_name = model_id.split("/")[-1]

training_args = TrainingArguments(
    num_train_epochs=5,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=1,
    warmup_steps=50,
    learning_rate=1e-4,
    weight_decay=0.01,
    logging_steps=25,
    save_strategy="steps",
    save_steps=250,
    save_total_limit=1,
    optim="paged_adamw_8bit", # for 8-bit, keep this, else adamw_hf
    bf16=True, # underlying precision for 8bit
    output_dir=f"./{model_name}-codemix",
    hub_model_id=f"{model_name}-codemix",
    report_to="tensorboard",
    remove_unused_columns=False,
    gradient_checkpointing=True
)

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=collate_fn,
    train_dataset=train_ds,
)

trainer.train()

torch.cuda.empty_cache()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
The input hidden states seems to be silently casted in float32, this might be related to the fact you have upcasted embedding or layer norm layers in float32. We will cast back the input in torch.bfloat16.
The input hidden states seems to be silently casted in float32, this might be related to the fact you have upcasted embedding or layer norm layers in float32. We will cast back the input in torch.bfloat16.


Step,Training Loss
25,0.622
50,0.4123
75,0.2729


In [11]:
trainer.push_to_hub()

adapter_model.safetensors:   0%|          | 0.00/42.2M [00:00<?, ?B/s]
[A

adapter_model.safetensors:   0%|          | 164k/42.2M [00:00<01:18, 537kB/s]  
training_args.bin: 100%|██████████| 5.30k/5.30k [00:01<00:00, 3.77kB/s].82MB/s]
events.out.tfevents.1735941011.user.1612814.0: 100%|██████████| 13.3k/13.3k [00:01<00:00, 9.35kB/s]
adapter_model.safetensors: 100%|██████████| 42.2M/42.2M [00:08<00:00, 4.73MB/s]


Upload 3 LFS files: 100%|██████████| 3/3 [00:09<00:00,  3.25s/it]


CommitInfo(commit_url='https://huggingface.co/rishavranaut/SmolVLM-Instruct-codemix/commit/bd0b5605a2031cda825c9f8e3576d4dc5fdcf3df', commit_message='End of training', commit_description='', oid='bd0b5605a2031cda825c9f8e3576d4dc5fdcf3df', pr_url=None, repo_url=RepoUrl('https://huggingface.co/rishavranaut/SmolVLM-Instruct-codemix', endpoint='https://huggingface.co', repo_type='model', repo_id='rishavranaut/SmolVLM-Instruct-codemix'), pr_revision=None, pr_num=None)

# inference

In [13]:
check_params_with_grad(model)
'''Parameters with requires_grad=True:
- base_model.model.model.vision_model.encoder.layers.0.self_attn.k_proj.lora_A.default.weight: shape=torch.Size([8, 1152]), dtype=torch.float32, params=Parameter containing:
tensor([[ 0.0562, -0.1976, -0.0880,  ...,  0.0183, -0.1876, -0.0010],
        [-0.0583,  0.0611, -0.2928,  ...,  0.0292,  0.1838, -0.0823],
        [ 0.3044,  0.0196,  0.2708,  ...,  0.0573, -0.1446,  0.0779],
        ...,
        [-0.1374, -0.0511,  0.0835,  ...,  0.0658, -0.2263, -0.3412],
        [-0.2727, -0.0401,  0.0655,  ...,  0.0611, -0.2045,  0.0332],
        [ 0.0171,  0.0390, -0.1298,  ...,  0.0427,  0.0269,  0.3102]],
       device='cuda:0', requires_grad=True)'''


No parameters with requires_grad=True found.


"Parameters with requires_grad=True:\n- base_model.model.model.vision_model.encoder.layers.0.self_attn.k_proj.lora_A.default.weight: shape=torch.Size([8, 1152]), dtype=torch.float32, params=Parameter containing:\ntensor([[ 0.0562, -0.1976, -0.0880,  ...,  0.0183, -0.1876, -0.0010],\n        [-0.0583,  0.0611, -0.2928,  ...,  0.0292,  0.1838, -0.0823],\n        [ 0.3044,  0.0196,  0.2708,  ...,  0.0573, -0.1446,  0.0779],\n        ...,\n        [-0.1374, -0.0511,  0.0835,  ...,  0.0658, -0.2263, -0.3412],\n        [-0.2727, -0.0401,  0.0655,  ...,  0.0611, -0.2045,  0.0332],\n        [ 0.0171,  0.0390, -0.1298,  ...,  0.0427,  0.0269,  0.3102]],\n       device='cuda:0', requires_grad=True)"

In [5]:
from peft import PeftModel, PeftConfig
path = "rishavranaut/SmolVLM-Instruct-codemix"
config = PeftConfig.from_pretrained(path)
model1 = Idefics3ForConditionalGeneration.from_pretrained(config.base_model_name_or_path,device_map='auto')
model = PeftModel.from_pretrained(model1, path)
processor = AutoProcessor.from_pretrained(
    config.base_model_name_or_path
)
model

Some kwargs in processor config are unused and will not have any effect: image_seq_len. 


PeftModel(
  (base_model): LoraModel(
    (model): Idefics3ForConditionalGeneration(
      (model): Idefics3Model(
        (vision_model): Idefics3VisionTransformer(
          (embeddings): Idefics3VisionEmbeddings(
            (patch_embedding): Conv2d(3, 1152, kernel_size=(14, 14), stride=(14, 14), padding=valid)
            (position_embedding): Embedding(729, 1152)
          )
          (encoder): Idefics3Encoder(
            (layers): ModuleList(
              (0-26): 27 x Idefics3EncoderLayer(
                (self_attn): Idefics3VisionAttention(
                  (k_proj): lora.Linear(
                    (base_layer): Linear(in_features=1152, out_features=1152, bias=True)
                    (lora_dropout): ModuleDict(
                      (default): Dropout(p=0.1, inplace=False)
                    )
                    (lora_A): ModuleDict(
                      (default): Linear(in_features=1152, out_features=8, bias=False)
                    )
                    (lora_B)

In [6]:
model.eval()

# Example usage

import os
image_token_id = processor.tokenizer.additional_special_tokens_ids[
            processor.tokenizer.additional_special_tokens.index("<image>")]

def get_video_id_from_path(video_path: str) -> str:
    """
    Extract the last ID (e.g., B0DBJ4SZHW) from the video path (Amazon review URL or any other identifier).
    Assumes video path contains the product ID or video ID in a known format.
    """
    # Assuming the video path format has the ID at the end of the URL or as part of the name
    match = re.search(r'ASIN=([A-Z0-9]{10})', video_path)
    if match:
        return match.group(1)
    return None

def get_video_path(example):

    video_path = example
    video_id = get_video_id_from_path(video_path)

    # Define the path to the downloaded video (expected to be in the "downloads" folder)
    download_folder = "/DATA/rishav_2311mc12/complaint_gen2/downloads"
    video_file_path = os.path.join(download_folder, f"{video_id}.mp4")
    return video_file_path





def generate_response(model, processor, video_path: str, prompt: str, max_frames: int = 3):
    # Extract frames
    frames  = extract_frames(video_path,max_frames)
    
    logger.info(f"Extracted {len(frames)} frames from video")
    
    # Create prompt with frames
    prompt = prompt

    # Prepare messages - Repeat <image> token for each frame
    messages = [
        {
            "role": "user",
            "content": [
                # Repeat <image> for each frame
                *[{"type": "image"} for _ in range(len(frames))],
                {"type": "text", "text": prompt}
            ]
        }
    ]

    # Process inputs
    inputs = processor(
        text=processor.apply_chat_template(messages, add_generation_prompt=True),
        images=[img for img in frames],
        return_tensors="pt"
    ).to(model.device)
     
    # Generate response
    outputs = model.generate(
        **inputs,
        max_new_tokens=100,
        num_beams=5,
        temperature=0.7,
        do_sample=True,
        use_cache=True
    )
    # Decode response
    response = processor.decode(outputs[0], skip_special_tokens=True)
    return response

def main(): 
    video_path = video_path = get_video_path(test_ds['video_path'][0])
    prompt = test_ds['prompt'][0]

    device = "cuda" if torch.cuda.is_available() else "cpu"
    
  
    # Generate response
    logger.info("Generating response...")
    response = generate_response(model, processor, video_path, prompt)
    
    # Print results
    print("Question:", prompt)
    print("Response:", response)

if __name__ == "__main__":
    main()



INFO:__main__:Generating response...


INFO:__main__:Extracted 3 frames from video


Question: Given a video made by Male aged middle Aged, generate a complaint in Hinglish Language about their experience from their perspective of the given product.
Response: User:<image>Given a video made by Male aged middle Aged, generate a complaint in Hinglish Language about their experience from their perspective of the given product.
Assistant: Video mein Male aged middle Aged dikhaya gaya hai, jiske liye Samsung Galaxy A5 (2017) dikhaya gaya hai. Video mein yeh Samsung Galaxy A5 (2017) dikhaya gaya hai, jahan yeh user dikhata hai, jo yeh Samsung Galaxy A5 (2017) dikhata hai. Yeh Samsung Galaxy A5 (2


In [11]:
torch.cuda.empty_cache()


# saving the model

In [18]:
from huggingface_hub import login
login("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")

In [13]:
# model

In [None]:
from huggingface_hub import upload_folder, create_repo
from pathlib import Path

# Output directory.
output_dir = f"./{model_name}-codemix"
repo_name = "smolvlm-fine-tuned-codemix"

repo_id = f"rishavranaut/{repo_name}"


repo_id = create_repo(repo_id, exist_ok=True).repo_id

upload_folder(
    repo_id=repo_id,
    folder_path=output_dir,
    commit_message="Pushed the IDEFICS2 fine-tuned model.",
    ignore_patterns=["step_*", "epoch_*"],
)

In [None]:
model

# use for inference

In [None]:
from peft import PeftModel
from transformers import AutoModelForCausalLM

base_model = Idefics3ForConditionalGeneration.from_pretrained(
        model_id="HuggingFaceTB/SmolVLM-Base",
        quantization_config=bnb_config if USE_QLORA else None,
        _attn_implementation="flash_attention_2",
        device_map="auto"
    )
model = PeftModel.Idefics3ForConditionalGeneration(base_model, "/DATA/rishav_2311mc12/complaint_gen2/SmolVLM-Base-codemix")

model

In [None]:
# Create input messages

video_path = get_video_path(test_ds['video_path'][0])
prompt = test_ds['prompt'][0]
frames = extract_frames(video_path,max_frames=8)
messages = [
            {
                "role": "user",
                "content": [
                    # Repeat <image> for each frame
                    *[{"type": "image"} for _ in range(len(frames))],
                    {"type": "text", "text": prompt}
                ]
            }
        ]

# Prepare inputs
prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(text=prompt, images=frames, return_tensors="pt")

# Generate outputs
generated_ids = model.generate(**inputs, max_new_tokens=500)
generated_texts = processor.batch_decode(
    generated_ids,
    skip_special_tokens=True,
)

print(generated_texts[0])



video downloading

In [9]:
# import pandas as pd
# df= pd.read_csv("/DATA/rishav_2311mc12/complaint_gen2/Gender-Extension of Complaint Generation model - Sheet1.csv")
# df.dropna(inplace=True)
# df


# import os
# import re
# from yt_dlp import YoutubeDL

# def download_video(video_url, download_path):
#     """
#     Download a video and save it using the product ID as filename.
    
#     Args:
#         video_url (str): URL of the video to download.
#         download_path (str): Path where the video will be saved.
#     """
#     # Extract product ID from the URL (assuming it's in the format: ...?ASIN=PRODUCT_ID)
#     product_id = re.search(r'ASIN=([A-Z0-9]{10})', video_url)
#     if product_id:
#         product_id = product_id.group(1)
#     else:
#         print("Could not extract product ID.")
#         return
    
#     # Ensure the download directory exists
#     if not os.path.exists(download_path):
#         os.makedirs(download_path)
    
#     # Configure yt-dlp options to save video with the product ID as the filename
#     ydl_opts = {
#         'outtmpl': os.path.join(download_path, f'{product_id}.%(ext)s'),
#         'format': 'best',
#     }
    
#     try:
#         print(f"Downloading: {video_url}")
#         with YoutubeDL(ydl_opts) as ydl:
#             ydl.download([video_url])
#         print(f"Downloaded: {product_id}.mp4")
#     except Exception as e:
#         print(f"Failed to download {video_url}: {e}")

# # Example usage
# if __name__ == "__main__":
#     # Replace with actual video URLs (assuming these are the review video links)
#     video_links = df['Video Link']
    
#     # Specify the download folder
#     download_folder = "/DATA/rishav_2311mc12/complaint_gen2/downloads"
    
#     # Loop through the video URLs and download each one
#     for video_url in video_links:
#         download_video(video_url, download_folder)
