In [1]:
import torch
import evaluate

  from .autonotebook import tqdm as notebook_tqdm
2024-08-09 13:55:12.065183: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-08-09 13:55:12.066589: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-08-09 13:55:12.094767: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [5]:
import torch
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from peft import PeftModel, PeftConfig
import json
from datasets import Dataset
from rouge_score import rouge_scorer
import random

# Check if CUDA is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load the fine-tuned model
model_name = "google/flan-t5-base"
# peft_model_config_path = "../models/peft-thumbnail-description-checkpoint-local"
peft_model_path = "../models/peft-thumbnail-description-further-trained"

# Load the base model and tokenizer
base_model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

Using device: cuda


In [6]:
# Load the PEFT configuration and model
peft_config = PeftConfig.from_pretrained(peft_model_path)
model = PeftModel.from_pretrained(base_model, peft_model_path)
model.to(device)
model.eval()

PeftModelForSeq2SeqLM(
  (base_model): LoraModel(
    (model): T5ForConditionalGeneration(
      (shared): Embedding(32128, 768)
      (encoder): T5Stack(
        (embed_tokens): Embedding(32128, 768)
        (block): ModuleList(
          (0): T5Block(
            (layer): ModuleList(
              (0): T5LayerSelfAttention(
                (SelfAttention): T5Attention(
                  (q): lora.Linear(
                    (base_layer): Linear(in_features=768, out_features=768, bias=False)
                    (lora_dropout): ModuleDict(
                      (default): Dropout(p=0.1, inplace=False)
                    )
                    (lora_A): ModuleDict(
                      (default): Linear(in_features=768, out_features=8, bias=False)
                    )
                    (lora_B): ModuleDict(
                      (default): Linear(in_features=8, out_features=768, bias=False)
                    )
                    (lora_embedding_A): ParameterDict()
               

In [25]:
# Load the dataset
with open("../data/dataset_blip_large_test.json") as f:
    dataset = json.load(f)

dataset = Dataset.from_dict(
    {
        "prompt": [item["prompt"] for item in dataset],
        "response": [item["response"] for item in dataset],
    }
)

# Function to generate description
def generate_description(title):
    start_prompt = "Provide a description of the YouTube thumbnail given the following video title.\n\n"
    end_prompt = "\n\nThumbnail description: "
    prompt = start_prompt + title + end_prompt
    
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=128)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Qualitative evaluation (sample predictions)
def qualitative_evaluation(num_samples=5):
    print("Qualitative Evaluation (Sample Predictions):")
    samples = random.sample(range(len(dataset)), num_samples)
    for i in samples:
        title = dataset[i]["prompt"]
        actual_description = dataset[i]["response"][17:]
        predicted_description = generate_description(title)
        
        print(f"\nSample {i + 1}:")
        print(f"Title: {title}")
        print(f"Actual Description: {actual_description}")
        print(f"Predicted Description: {predicted_description}")
        print("-" * 50)

# Quantitative evaluation (ROUGE scores)
def quantitative_evaluation(num_samples=30):
    print("\nQuantitative Evaluation (ROUGE Scores):")
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    
    rouge1_scores = []
    rouge2_scores = []
    rougeL_scores = []
    
    samples = random.sample(range(len(dataset)), num_samples)
    for i in samples:
        title = dataset[i]["prompt"]
        actual_description = dataset[i]["response"][17:]
        predicted_description = generate_description(title)
        
        scores = scorer.score(actual_description, predicted_description)
        rouge1_scores.append(scores['rouge1'].fmeasure)
        rouge2_scores.append(scores['rouge2'].fmeasure)
        rougeL_scores.append(scores['rougeL'].fmeasure)
    
    print(f"Average ROUGE-1: {sum(rouge1_scores) / len(rouge1_scores):.4f}")
    print(f"Average ROUGE-2: {sum(rouge2_scores) / len(rouge2_scores):.4f}")
    print(f"Average ROUGE-L: {sum(rougeL_scores) / len(rougeL_scores):.4f}")

In [28]:
# Run evaluations
qualitative_evaluation()
# quantitative_evaluation()

Qualitative Evaluation (Sample Predictions):

Sample 32:
Title: Superhero Shion to the Rescue!! #titanuniverse
Actual Description: a cartoon character with a camera and a paper
Predicted Description: a man in a red shirt and a red shirt
--------------------------------------------------

Sample 31:
Title: 9 Day Left For Ryan's World the Movie!
Actual Description: a picture of a girl sitting on a tree branch
Predicted Description: a man in a red shirt and a man in a red shirt
--------------------------------------------------

Sample 28:
Title: Check Out Ryan's World The Movie MYSTERY BOX!
Actual Description: a young man and woman standing in front of a birthday cake
Predicted Description: a man in a red shirt and a black and white picture of a man in a red shirt
--------------------------------------------------

Sample 33:
Title: Can Ryan Defeat Packrat to Rescue His Dad?! New SUPERHERO Tag with Ryan Update!
Actual Description: a person with a bunch of toys in front of them
Predicted 

In [27]:
quantitative_evaluation()


Quantitative Evaluation (ROUGE Scores):
Average ROUGE-1: 0.3373
Average ROUGE-2: 0.1086
Average ROUGE-L: 0.3216
