# Introduction

This notebook focuses on extracting image features using three deep learning modelsâ€”two CNNs and one Transformer. The features generated by these networks are concatenated into a vector of 3,072 dimensions. Finally, the extracted features for the training and test datasets are stored in a CSV file.

The dataset can be downloaded from the following link: https://jiangliu5.github.io/imqac.github.io/

# Importing Libraries

In [None]:
!pip install -U jupyter ipywidgets

In [None]:
import os
import torch
import torch.backends.cudnn as cudnn
import pandas as pd
import numpy as np

from torchvision.transforms import v2
from pathlib import Path
from tqdm.auto import tqdm
from torch.utils.data import DataLoader
from torchvision.transforms import InterpolationMode
from models.iqa_module_proposed import model_map, FuseBackbones
from utils.dataset_proposed import CustomDataset
#from VCIP_IMQA.VCIP.IMQA.utils.convnext import convnext_tiny, convnext_small, convnext_base, convnext_large, convnext_xlarge, model_urls

import warnings
os.environ['TORCH_USE_CUDA_DSA'] = "1"
warnings.filterwarnings("ignore", category=UserWarning, module="torch.autograd.graph")
warnings.filterwarnings("ignore", category=FutureWarning, module="onnxscript.converter")

# Importing Dataset

In [None]:
# Define some constants
NUM_WORKERS = 0 #os.cpu_count()
AMOUNT_TO_GET = 1.0
SEED = 42

# Define target data directory
BASELINE_NAME = f"VCIP_IMQA/VCIP"
BASELINE = Path(BASELINE_NAME)
TARGET_DIR = BASELINE / "EQ420_image"
TARGET_LABEL = BASELINE / "Labels"
TARGET_BASE = BASELINE / "IMQA"

# Setup training and test directories
TARGET_DIR.mkdir(parents=True, exist_ok=True)

def set_seeds(seed: int=42):
    """Sets random sets for torch operations.

    Args:
        seed (int, optional): Random seed to set. Defaults to 42.
    """
    # Set the seed for general torch operations
    torch.manual_seed(seed)
    # Set the seed for CUDA torch operations (ones that happen on the GPU)
    torch.cuda.manual_seed(seed)
    
# Set seeds
set_seeds(SEED)

EXTRACT_FEATURES = True

# Specifying Target Device

In [None]:
# Activate cuda benchmark
cudnn.benchmark = True

# Set device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device: {device}")

#if device == "cuda":
#    !nvidia-smi

# Extracting Features

In [None]:
# Constant definition
IMG_SIZE = 512
N_SPLITS = 8
BATCH_SIZE = 1
train_csv = pd.read_csv(TARGET_LABEL / 'mos_fold_train.csv') #.sample(frac=1)
test_csv = pd.read_csv(TARGET_LABEL / 'mos_fold_test.csv') #.sample(frac=1)
train_vector = [1, 2, 3, 4, 5, 6, 7, 8]
test_vector = [9, 10]
train_ids = train_csv[train_csv['folds'].isin(train_vector)]
test_ids = test_csv[test_csv['folds'].isin(test_vector)]

In [None]:
test_csv

In [None]:
test_ids

In [None]:
model_list = ['swin_v2_s', 'efficientnet_b3', 'convnext_s']
fuse_backbones = FuseBackbones(model_list=model_list, vector_size=None)

In [None]:
# Save model temporarily
temp_path = "temp_model.pth"
torch.save(fuse_backbones.state_dict(), temp_path)

# Get size in MB
size_mb = os.path.getsize(temp_path) / (1024 * 1024)
print(f"Model size: {size_mb:.2f} MB")

# Clean up
os.remove(temp_path)

In [None]:
#file_path = r'H:\outputs\iqa_total_20250616_fold0mseadam.pth'
#size_bytes = os.path.getsize(file_path)
#size_mb = size_bytes / (1024 ** 2)

#print(f"File size: {size_mb:.2f} MB")

In [None]:
# Constant definition
IMG_SIZE = 512
N_SPLITS = 8
BATCH_SIZE = 1
train_csv = pd.read_csv(TARGET_LABEL / 'mos_fold_train.csv') #.sample(frac=1)
test_csv = pd.read_csv(TARGET_LABEL / 'mos_fold_test.csv') #.sample(frac=1)
train_vector = [1, 2, 3, 4, 5, 6, 7, 8]
test_vector = [9, 10]
train_ids = train_csv[train_csv['folds'].isin(train_vector)]
test_ids = test_csv[test_csv['folds'].isin(test_vector)]

if EXTRACT_FEATURES:
    # Pre-processing
    manual_transforms = v2.Compose([
        v2.Resize((IMG_SIZE), interpolation=InterpolationMode.BICUBIC),
        v2.ToImage(),
        v2.ToDtype(torch.float32, scale=True),
        v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    # Preparing dataloaders
    train_dataloader = DataLoader(
        dataset=CustomDataset(ids=train_ids, ref_dir=TARGET_DIR, transform=manual_transforms),
        batch_size=BATCH_SIZE,
        shuffle=False,
        num_workers=NUM_WORKERS,
        pin_memory=True)

    test_dataloader = DataLoader(
        dataset=CustomDataset(ids=test_ids, ref_dir=TARGET_DIR, transform=manual_transforms),
        batch_size=BATCH_SIZE,
        shuffle=False,
        num_workers=NUM_WORKERS,
        pin_memory=True)

    # Create model
    model_list = ['swin_v2_s', 'efficientnet_b3', 'convnext_s']
    fuse_backbones = FuseBackbones(model_list=model_list, vector_size=None)
    fuse_backbones.to(device)
    fuse_backbones.eval()
    fuse_backbones = fuse_backbones.float() 

    # Extract features - train dataset
    train_features = []
    train_mos = []
    train_fold = []

    for img, mos, fold, _, _ in tqdm(train_dataloader, desc="Extracting features (train)"):
        with torch.inference_mode():
            features = fuse_backbones(img.to(device))
        train_features.append(features.cpu())
        train_mos.append(mos.cpu())
        train_fold.append(fold.cpu())

    # Tensor to numpy
    train_features_np = torch.cat(train_features).numpy()
    train_mos_np = torch.cat(train_mos).numpy()
    train_fold_np = torch.cat(train_fold).numpy()

    # Create column names
    num_features = train_features_np.shape[1]
    feature_columns = [f"f_{i}" for i in range(num_features)]
    columns = feature_columns + ['fold'] + ['mos']

    # Combine into DataFrame
    train_df = pd.DataFrame(data=np.hstack([train_features_np, train_fold_np.reshape(-1, 1), train_mos_np.reshape(-1, 1)]), columns=columns)

    # Extract features - test dataset
    test_features = []
    test_fold = []
    test_names = []

    for img, _, fold, name, _ in tqdm(test_dataloader, desc="Extracting features (test)"):
        with torch.inference_mode():
            features = fuse_backbones(img.to(device))
        test_features.append(features.cpu())
        test_fold.append(fold.cpu())
        test_names += list(name)
        
    # Tensor to numpy
    test_features_np = torch.cat(test_features).numpy()
    test_fold_np = torch.cat(test_fold).numpy()
    test_names_np = np.array(test_names).reshape(-1, 1)

    # Create column names
    num_features = test_features_np.shape[1]
    feature_columns = [f"f_{i}" for i in range(num_features)]
    columns = ['image_name'] + feature_columns + ['fold']

    # Combine into DataFrame    
    test_df = pd.DataFrame(data=np.hstack([test_names_np, test_features_np, test_fold_np.reshape(-1, 1)]), columns=columns)

    # (optional): Save
    train_df.to_csv("train_features.csv", index=False)
    test_df.to_csv("test_features.csv", index=False)

In [None]:
train_df = pd.read_csv('train_features.csv')
train_df.head()

In [None]:
test_df = pd.read_csv('test_features.csv')
test_df.head()