# Mount Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Append project path

In [None]:
import sys

PROJECT_PATH_UTIL = "/content/drive/MyDrive/Colab Notebooks/SCVQA/util"
#                    /content/drive/MyDrive/.../util
PROJECT_PATH_VSFA = "/content/drive/MyDrive/Colab Notebooks/SCVQA/VSFA"
#                    /content/drive/MyDrive/.../VSFA
sys.path.append(PROJECT_PATH_UTIL)
sys.path.append(PROJECT_PATH_VSFA)

# Install requirements

In [None]:
!pip install av scikit-video



# Import libraries and py script coded

In [None]:
import torch
from torch import nn
from torch.utils.data import DataLoader

import os
import random
import numpy as np
import pandas as pd
from pathlib import Path
import datetime
from timeit import default_timer as timer

from dataset import FeatureDataset
#from model import LSTM, Transformer
from model_with_TP_fn import LSTM, Transformer
from engine import Engine

from VSFA import VSFA

# Custom function

In [None]:
def mos_normalization(feature_data_list: list, mos_max: float, mos_min: float):
    for i in range(len(feature_data_list)):
        data_tup = feature_data_list[i]
        data_list = list(data_tup)
        mos = data_list[1]
        mos = np.float32((mos - mos_min) / (mos_max - mos_min))  # normalization
        data_list[1] = mos
        feature_data_list[i] = tuple(data_list)


def get_mos_max_min(feature_data_list: list):
    mos_list = [data[1] for data in feature_data_list]
    return max(mos_list), min(mos_list)

# Setup parameters

In [None]:
_LSTM = "LSTM"
_TRANSFORMER = "Transformer"
_VSFA_GRU = "VSFA_GRU"

_CSCVQ = "CSCVQ"
_SCVD = "SCVD"

_ResNet18 = "ResNet18"
_ResNet34 = "ResNet34"
_ResNet50 = "ResNet50"
_ResNet101 = "ResNet101"
_ResNet34_ResNet50 = "ResNet34_ResNet50"

#===============================================================================
MODEL = _TRANSFORMER
DATABASE = _SCVD
CNN_EXTRACTION = _ResNet50

BATCH_SIZE = 32  # CSCVQ:8, SCVD:32
NUM_WORKERS = 0
NUM_EPOCHS = 1000
LEARNING_RATE = 0.00001
SEED = 22035001
#===============================================================================

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

machine = !nvidia-smi -L
machine = machine[0]

info = {
    "MACHINE" : machine,
    "DATE_TIME": None,
    "TOTAL_TIME": None,
    "DIR": None,
    "LOSS_VAL_CRITERION": None,
    "RMSE_VAL_CRITERION": None,
    "PLCC_VAL_CRITERION": None,
    "SROCC_VAL_CRITERION": None,
    "TRAIN_DATA_SIZE": None,
    "TEST_DATA_SIZE": None,
    "MODEL": MODEL,
    "DATABASE": DATABASE,
    "CNN_EXTRACTION": CNN_EXTRACTION,
    "DEVICE": DEVICE,
    "BATCH_SIZE": BATCH_SIZE,
    "NUM_WORKERS": NUM_WORKERS,
    "NUM_EPOCHS": NUM_EPOCHS,
    "SEED": SEED,
    "LEARNING_RATE": LEARNING_RATE,
}

FEATURE_DIR = Path(f"/content/drive/MyDrive/Colab Notebooks/SCVQA/feature/{DATABASE}/{CNN_EXTRACTION}/")
#                    /content/drive/MyDrive/.../feature/{DATABASE}/{CNN_EXTRACTION}/

MODEL_DIR = Path(f"/content/drive/MyDrive/Colab Notebooks/SCVQA/model_w_TP_fn/{MODEL}/{DATABASE}/{CNN_EXTRACTION}/")
#                  /content/drive/MyDrive/.../model/{MODEL}/{DATABASE}/{CNN_EXTRACTION}/

MODEL_DIR_HIST_FILE = Path(f"/content/drive/MyDrive/Colab Notebooks/SCVQA/model_w_TP_fn/{MODEL}/{DATABASE}/{CNN_EXTRACTION}/history.csv")
#                            /content/drive/MyDrive/.../model/{MODEL}/{DATABASE}/{CNN_EXTRACTION}/history.csv


(
    # VSFA GRU
    print(
        f"[VSFA GRU-based] | database: {DATABASE}, CNN extraction: {CNN_EXTRACTION}"
    )
    if MODEL == _VSFA_GRU
    else
    # proposed LSTM/Transformer
    print(
        f"[{MODEL}-based] | database: {DATABASE}, CNN extraction: {CNN_EXTRACTION}"
    )
)

print(
    f"device: {DEVICE}, batch_size: {BATCH_SIZE}, num_workers: {NUM_WORKERS}, num_epochs: {NUM_EPOCHS}, seed: {SEED}, learning_rate: {LEARNING_RATE}"
)

[Transformer-based] | database: SCVD, CNN extraction: ResNet34_ResNet50
device: cuda, batch_size: 32, num_workers: 0, num_epochs: 1000, seed: 22035001, learning_rate: 1e-05


# Data preparation

In [None]:
feature_data_list = list()
MOS_MAX, MOS_MIN = None, None

if not os.path.exists(FEATURE_DIR):
    print(f"Video feature not exists in {FEATURE_DIR}/")
    sys.exit()
else:
    video_feature_dir_list = [f.path for f in os.scandir(FEATURE_DIR) if f.is_dir()]

    for video_feature_dir in video_feature_dir_list:
        feature_file = f"{video_feature_dir}/feature.npy"
        mos_file = f"{video_feature_dir}/mos.npy"

        feature = np.load(feature_file)
        feature = torch.from_numpy(feature)
        # [frames, feature] | Tensor | torch.Size([300, 4096])

        mos = np.load(mos_file)
        mos = np.float32(mos.item())
        # mos | float

        feature_data_list.append((feature, mos))

    MOS_MAX, MOS_MIN = get_mos_max_min(feature_data_list=feature_data_list)
    mos_normalization(
        feature_data_list=feature_data_list, mos_max=MOS_MAX, mos_min=MOS_MIN
    )

random.seed(SEED)
random.shuffle(feature_data_list)

TRAIN_SPLIT = int(0.8 * len(feature_data_list))
train_data_list = feature_data_list[:TRAIN_SPLIT]
test_data_list = feature_data_list[TRAIN_SPLIT:]

train_dataset = FeatureDataset(dataset=train_data_list)
test_dataset = FeatureDataset(dataset=test_data_list)

train_dataloader = DataLoader(
    dataset=train_dataset,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS,
    shuffle=True,
)
test_dataloader = DataLoader(
    dataset=test_dataset,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS,
    shuffle=False,
)

info["TRAIN_DATA_SIZE"] = len(train_dataset)
info["TEST_DATA_SIZE"] = len(test_dataset)

print(
    f"Number of training data: {info['TRAIN_DATA_SIZE']} & testing data: {info['TEST_DATA_SIZE']}"
)

Number of training data: 640 & testing data: 160


# Training step

In [None]:
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)

feature_size = None
if CNN_EXTRACTION == _ResNet18 or CNN_EXTRACTION == _ResNet34:
    feature_size = 1024
elif CNN_EXTRACTION == _ResNet50 or CNN_EXTRACTION == _ResNet101:
    feature_size = 4096
else:
    feature_size = 5120

model = None
if MODEL == _LSTM:
    model = LSTM(
        device=DEVICE,
        feature_size=feature_size,
    ).to(device=DEVICE)
elif MODEL == _TRANSFORMER:
    model = Transformer(
        device=DEVICE,
        feature_size=feature_size,
    ).to(device=DEVICE)
else:
    model = VSFA(
        input_size=feature_size
    ).to(device=DEVICE)

if os.path.exists(MODEL_DIR_HIST_FILE):
    hist_df = pd.read_csv(MODEL_DIR_HIST_FILE)
    model_file = Path(MODEL_DIR / hist_df["DIR"].iloc[-1] / "model.pt")
    if os.path.exists(model_file):
        print(f"Load model from {model_file}")
        model.load_state_dict(torch.load(f=str(model_file)))

loss_fn = nn.L1Loss() if MODEL == _VSFA_GRU else nn.MSELoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=LEARNING_RATE)

engine = Engine(device=DEVICE, epochs=NUM_EPOCHS, mos_max=MOS_MAX, mos_min=MOS_MIN)

start_time = timer()
model_results = engine.train(
    model=model,
    optimizer=optimizer,
    loss_fn=loss_fn,
    train_dataloader=train_dataloader,
    test_dataloader=test_dataloader,
)
end_time = timer()
total_time = (
    f"{datetime.timedelta(seconds=int(end_time-start_time))} (Hour:Minute:Second)"
)
print(f"Total training & testing time: {total_time}")

info["TOTAL_TIME"] = total_time

info["LOSS_VAL_CRITERION"] = model_results[f"test_{type(loss_fn).__name__}"][-1]
info["RMSE_VAL_CRITERION"] = model_results["test_RMSE"][-1]
info["PLCC_VAL_CRITERION"] = model_results["test_PLCC"][-1]
info["SROCC_VAL_CRITERION"] = model_results["test_SROCC"][-1]

now = datetime.datetime.now()
date_time = now.strftime("%Y-%m-%d %H:%M:%S")
info["DATE_TIME"] = date_time

# Save model, result, history, prediction
dir = now.strftime("%Y%m%d_%H%M%S")
info["DIR"] = dir

model_dir = Path(MODEL_DIR / dir)
model_dir.mkdir(parents=True, exist_ok=True)

# model
model_file = model_dir / "model.pt"
torch.save(
    obj=model.state_dict(),
    f=str(model_file),
)

# result
result_file = model_dir / "result.csv"
model_results_df = pd.DataFrame(model_results)
model_results_df.to_csv(str(result_file), index=False)

# history
info_df = pd.DataFrame(info, index=[0])
if os.path.exists(MODEL_DIR_HIST_FILE):
    info_df.to_csv(str(MODEL_DIR_HIST_FILE), mode="a", index=False, header=False)
else:
    info_df.to_csv(str(MODEL_DIR_HIST_FILE), index=False)

# prediction of last epoch
prediction_file = model_dir / "prediction.csv"
model_prediction_df = pd.DataFrame(engine.get_prediction()[-1])
model_prediction_df.to_csv(str(prediction_file), index=False)

  0%|          | 0/1000 [00:00<?, ?it/s]

[1;30;43m串流輸出內容已截斷至最後 5000 行。[0m
Testing  batch[3]: last record -> y: 54.18170323514141 | y_pred: 59.62603594915208
Testing  batch[4]: last record -> y: 27.66959969745278 | y_pred: 31.300553900559294
[Training] Epoch: 815 | MSELoss: 0.01663 | RMSE: 0.12897 | PLCC: 0.85329 | SROCC: 0.83796
[Testing]  Epoch: 815 | MSELoss: 0.03068 | RMSE: 0.17516 | PLCC: 0.85707 | SROCC: 0.84603
Training batch[0]: last record -> y: 45.44269898635889 | y_pred: 53.74136584915118
Training batch[1]: last record -> y: 42.00540208510495 | y_pred: 44.213806113248665
Training batch[2]: last record -> y: 53.70280002467098 | y_pred: 43.0629695554768
Training batch[3]: last record -> y: 59.52329988615725 | y_pred: 55.1851897832687
Training batch[4]: last record -> y: 72.168403673586 | y_pred: 61.81106923560742
Training batch[5]: last record -> y: 49.146899631522956 | y_pred: 56.96157842282378
Training batch[6]: last record -> y: 39.45899952945217 | y_pred: 39.5975579719867
Training batch[7]: last record -> y: 29.

# Model Summary

In [None]:
print(model)

Transformer(
  (fc0): Sequential(
    (0): Linear(in_features=5120, out_features=64, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=64, out_features=64, bias=True)
  )
  (pos_encoder): PositionalEncoding(
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-7): 8 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
        )
        (linear1): Linear(in_features=64, out_features=32, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=32, out_features=64, bias=True)
        (norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
    )
    (n