In [30]:
# %%capture
# !pip install unsloth
# # Also get the latest nightly Unsloth!
# !pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

In [35]:
from unsloth import FastVisionModel # FastLanguageModel for LLMs
import torch


In [36]:
model, tokenizer = FastVisionModel.from_pretrained(
    "unsloth/Qwen2-VL-2B-Instruct-bnb-4bit",
    load_in_4bit = True, # Use 4bit to reduce memory use. False for 16bit LoRA.
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for long context
)

==((====))==  Unsloth 2025.4.7: Fast Qwen2_Vl patching. Transformers: 4.51.3.
   \\   /|    NVIDIA RTX A2000 12GB. Num GPUs = 1. Max memory: 11.614 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 8.6. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [10]:
model = FastVisionModel.get_peft_model(
    model,
    finetune_vision_layers     = False, # False if not finetuning vision layers
    finetune_language_layers   = True, # False if not finetuning language layers
    finetune_attention_modules = True, # False if not finetuning attention layers
    finetune_mlp_modules       = True, # False if not finetuning MLP layers

    r = 16,           # The larger, the higher the accuracy, but might overfit
    lora_alpha = 16,  # Recommended alpha == r at least
    lora_dropout = 0,
    bias = "none",
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
    # target_modules = "all-linear", # Optional now! Can specify a list if needed
)

Unsloth: Making `model.base_model.model.model` require gradients


In [37]:
import sys
sys.path.append('/users/eleves-b/2023/sylvain.dehayem-kenfouo/projet_final_modal')
from data.dataset import Dataset
from data.datamodule import DataModule

In [38]:
# _target_: torchvision.transforms.Compose
# transforms:
#   - _target_: torchvision.transforms.Resize
#     size: [224, 224]
#   - _target_: torchvision.transforms.RandomHorizontalFlip
#   - _target_: torchvision.transforms.ToTensor
#   - _target_: torchvision.transforms.Normalize
#     mean: [0.485, 0.456, 0.406]
#     std: [0.229, 0.224, 0.225]
import torchvision
from torchvision import transforms
train_transform = torchvision.transforms.Compose([
    torchvision.transforms.Resize(224),
    torchvision.transforms.RandomHorizontalFlip(),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
val_transform = torchvision.transforms.Compose([
    torchvision.transforms.Resize(224),
    torchvision.transforms.ToTensor(),
    # torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [39]:
import sys
# sys.path.append('/users/eleves-b/2023/sylvain.dehayem-kenfouo/projet_final_modal')
train_loader = DataModule(
    dataset_path = "../dataset",
    train_transform=train_transform,
    test_transform=val_transform,
    batch_size=16,
    num_workers=4,
    metadata=["title"]
).train_dataloader()



../dataset/train_val.csv


2025-05-12 06:04:22,667 - INFO - train set (until 2022): {'high': 43.65367180417044, 'viral': 22.65034753702025, 'medium': 20.9579933514657, 'top': 8.64309459051073, 'low': 4.09489271683288}
2025-05-12 06:04:22,668 - INFO - val set (from 2023): {'medium': 38.02315227070348, 'high': 36.01959038290294, 'viral': 14.33659839715049, 'low': 7.079252003561888, 'top': 4.541406945681211}


../dataset/train_val.csv


2025-05-12 06:04:26,030 - INFO - train set (until 2022): {'high': 43.65367180417044, 'viral': 22.65034753702025, 'medium': 20.9579933514657, 'top': 8.64309459051073, 'low': 4.09489271683288}
2025-05-12 06:04:26,031 - INFO - val set (from 2023): {'medium': 38.02315227070348, 'high': 36.01959038290294, 'viral': 14.33659839715049, 'low': 7.079252003561888, 'top': 4.541406945681211}


../dataset/train_val.csv


2025-05-12 06:04:29,467 - INFO - train set (until 2022): {'high': 43.65367180417044, 'viral': 22.65034753702025, 'medium': 20.9579933514657, 'top': 8.64309459051073, 'low': 4.09489271683288}
2025-05-12 06:04:29,468 - INFO - val set (from 2023): {'medium': 38.02315227070348, 'high': 36.01959038290294, 'viral': 14.33659839715049, 'low': 7.079252003561888, 'top': 4.541406945681211}


['Title: CGI Animated Breakdown : "Rob "n" Ron - Tour de Animation" - by Tumblehead\nChannel: channel1\nYear: 2014\nDescription: Lars Ellingbø of Tumblehead show the animation process for one of his shots from Rob "n" Ron. We start with an animatic (here we lock acting and timing) and then go over to 3D layout - blocking - blocking plus, then finally turn the keys over to spline and add the final polishing touches. Music by Peter Smith. Visit for more "making of" material. Homepage: Facebook: Twitter: Watch "Rob "n" Ron": Watch the "Making of":',
 'Title: Preyashi | প্রেয়সী | Old Bengali Movie | Full HD | Soumitra Chatterjee, Arati Bhattacharya\nChannel: channel7\nYear: 2021\nDescription: Presenting the Bengali full movie Preyashi : পরয়স বল ছব on YouTube. The Bengali Film Preyashi was released in the year 1982, Directed by Srikanto Guha Thakurta, starring Soumitra Chatterjee, Arati Bhattacharya, Dilip Roy, Bikash Roy, Shubhendu Chattopadhyay, Satya Banerjee, Kali Banerjee Others. Downl