# Mount Google Drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Append project path

In [2]:
import sys

PROJECT_PATH_UTIL = "/content/drive/MyDrive/Colab Notebooks/SCVQA/util"
#                    /content/drive/MyDrive/.../util
PROJECT_PATH_VSFA = "/content/drive/MyDrive/Colab Notebooks/SCVQA/VSFA"
#                    /content/drive/MyDrive/.../VSFA
sys.path.append(PROJECT_PATH_UTIL)
sys.path.append(PROJECT_PATH_VSFA)

# Install requirements

In [3]:
!pip install av scikit-video



# Import libraries and py script coded

In [4]:
import torch
from torch import nn
from torch.utils.data import DataLoader

import os
import random
import numpy as np
import pandas as pd
from pathlib import Path
import datetime
from timeit import default_timer as timer

from dataset import FeatureDataset
from model import LSTM, Transformer
from engine import Engine

from VSFA import VSFA

# Custom function

In [5]:
def mos_normalization(feature_data_list: list, mos_max: float, mos_min: float):
    for i in range(len(feature_data_list)):
        data_tup = feature_data_list[i]
        data_list = list(data_tup)
        mos = data_list[1]
        mos = np.float32((mos - mos_min) / (mos_max - mos_min))  # normalization
        data_list[1] = mos
        feature_data_list[i] = tuple(data_list)


def get_mos_max_min(feature_data_list: list):
    mos_list = [data[1] for data in feature_data_list]
    return max(mos_list), min(mos_list)

# Setup parameters

In [6]:
_LSTM = "LSTM"
_TRANSFORMER = "Transformer"
_VSFA_GRU = "VSFA_GRU"

_CSCVQ = "CSCVQ"
_SCVD = "SCVD"

_ResNet50 = "ResNet50"

MODEL = _VSFA_GRU
DATABASE = _SCVD
CNN_EXTRACTION = _ResNet50
BATCH_SIZE = 32  # CSCVQ:8, SCVD:32
NUM_WORKERS = 0
NUM_EPOCHS = 1000
LEARNING_RATE = 0.00001
SEED = 22035001

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

info = {
    "DATE_TIME": None,
    "TOTAL_TIME": None,
    "DIR": None,
    "LOSS_VAL_CRITERION": None,
    "RMSE_VAL_CRITERION": None,
    "PLCC_VAL_CRITERION": None,
    "SROCC_VAL_CRITERION": None,
    "TRAIN_DATA_SIZE": None,
    "TEST_DATA_SIZE": None,
    "MODEL": MODEL,
    "DATABASE": DATABASE,
    "CNN_EXTRACTION": CNN_EXTRACTION,
    "DEVICE": DEVICE,
    "BATCH_SIZE": BATCH_SIZE,
    "NUM_WORKERS": NUM_WORKERS,
    "NUM_EPOCHS": NUM_EPOCHS,
    "SEED": SEED,
    "LEARNING_RATE": LEARNING_RATE,
}

FEATURE_DIR = Path(f"/content/drive/MyDrive/Colab Notebooks/SCVQA/feature/{DATABASE}/{CNN_EXTRACTION}/")
#                    /content/drive/MyDrive/.../feature/{DATABASE}/{CNN_EXTRACTION}/

MODEL_DIR = Path(f"/content/drive/MyDrive/Colab Notebooks/SCVQA/model/{MODEL}/{DATABASE}/{CNN_EXTRACTION}/")
#                  /content/drive/MyDrive/.../model/{MODEL}/{DATABASE}/{CNN_EXTRACTION}/

MODEL_DIR_HIST_FILE = Path(f"/content/drive/MyDrive/Colab Notebooks/SCVQA/model/{MODEL}/{DATABASE}/{CNN_EXTRACTION}/history.csv")
#                            /content/drive/MyDrive/.../model/{MODEL}/{DATABASE}/{CNN_EXTRACTION}/history.csv


(
    # VSFA GRU
    print(
        f"[VSFA GRU-based] | database: {DATABASE}, CNN extraction: {CNN_EXTRACTION}"
    )
    if MODEL == _VSFA_GRU
    else
    # proposed LSTM/Transformer
    print(
        f"[{MODEL}-based] | database: {DATABASE}, CNN extraction: {CNN_EXTRACTION}"
    )
)

print(
    f"device: {DEVICE}, batch_size: {BATCH_SIZE}, num_workers: {NUM_WORKERS}, num_epochs: {NUM_EPOCHS}, seed: {SEED}, learning_rate: {LEARNING_RATE}"
)

[VSFA GRU-based] | database: SCVD, CNN extraction: ResNet50
device: cuda, batch_size: 32, num_workers: 0, num_epochs: 1000, seed: 22035001, learning_rate: 1e-05


# Data preparation

In [7]:
feature_data_list = list()
MOS_MAX, MOS_MIN = None, None

if not os.path.exists(FEATURE_DIR):
    print(f"Video feature not exists in {FEATURE_DIR}/")
    sys.exit()
else:
    video_feature_dir_list = [f.path for f in os.scandir(FEATURE_DIR) if f.is_dir()]

    for video_feature_dir in video_feature_dir_list:
        feature_file = f"{video_feature_dir}/feature.npy"
        mos_file = f"{video_feature_dir}/mos.npy"

        feature = np.load(feature_file)
        feature = torch.from_numpy(feature)
        # [frames, feature] | Tensor | torch.Size([300, 4096])

        mos = np.load(mos_file)
        mos = np.float32(mos.item())
        # mos | float

        feature_data_list.append((feature, mos))

    MOS_MAX, MOS_MIN = get_mos_max_min(feature_data_list=feature_data_list)
    mos_normalization(
        feature_data_list=feature_data_list, mos_max=MOS_MAX, mos_min=MOS_MIN
    )

random.seed(SEED)
random.shuffle(feature_data_list)

TRAIN_SPLIT = int(0.8 * len(feature_data_list))
train_data_list = feature_data_list[:TRAIN_SPLIT]
test_data_list = feature_data_list[TRAIN_SPLIT:]

train_dataset = FeatureDataset(dataset=train_data_list)
test_dataset = FeatureDataset(dataset=test_data_list)

train_dataloader = DataLoader(
    dataset=train_dataset,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS,
    shuffle=True,
)
test_dataloader = DataLoader(
    dataset=test_dataset,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS,
    shuffle=False,
)

info["TRAIN_DATA_SIZE"] = len(train_dataset)
info["TEST_DATA_SIZE"] = len(test_dataset)

print(
    f"Number of training data: {info['TRAIN_DATA_SIZE']} & testing data: {info['TEST_DATA_SIZE']}"
)

Number of training data: 640 & testing data: 160


# Training and testing step

In [8]:
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)

model = None
if MODEL == _LSTM:
    model = LSTM(device=DEVICE).to(device=DEVICE)
elif MODEL == _TRANSFORMER:
    model = Transformer(device=DEVICE).to(device=DEVICE)
else:
    model = VSFA().to(device=DEVICE)

if os.path.exists(MODEL_DIR_HIST_FILE):
    hist_df = pd.read_csv(MODEL_DIR_HIST_FILE)
    model_file = Path(MODEL_DIR / hist_df["DIR"].iloc[-1] / "model.pt")
    if os.path.exists(model_file):
        print(f"Load model from {model_file}")
        model.load_state_dict(torch.load(f=str(model_file)))

loss_fn = nn.L1Loss() if MODEL == _VSFA_GRU else nn.MSELoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=LEARNING_RATE)

engine = Engine(device=DEVICE, epochs=NUM_EPOCHS, mos_max=MOS_MAX, mos_min=MOS_MIN)

start_time = timer()
model_results = engine.train(
    model=model,
    optimizer=optimizer,
    loss_fn=loss_fn,
    train_dataloader=train_dataloader,
    test_dataloader=test_dataloader,
)
end_time = timer()
total_time = (
    f"{datetime.timedelta(seconds=int(end_time-start_time))} (Hour:Minute:Second)"
)
print(f"Total training & testing time: {total_time}")

info["TOTAL_TIME"] = total_time

info["LOSS_VAL_CRITERION"] = model_results[f"test_{type(loss_fn).__name__}"][-1]
info["RMSE_VAL_CRITERION"] = model_results["test_RMSE"][-1]
info["PLCC_VAL_CRITERION"] = model_results["test_PLCC"][-1]
info["SROCC_VAL_CRITERION"] = model_results["test_SROCC"][-1]

now = datetime.datetime.now()
date_time = now.strftime("%Y-%m-%d %H:%M:%S")
info["DATE_TIME"] = date_time

# Save model, result, history, prediction
dir = now.strftime("%Y%m%d_%H%M%S")
info["DIR"] = dir

model_dir = Path(MODEL_DIR / dir)
model_dir.mkdir(parents=True, exist_ok=True)

# model
model_file = model_dir / "model.pt"
torch.save(
    obj=model.state_dict(),
    f=str(model_file),
)

# result
result_file = model_dir / "result.csv"
model_results_df = pd.DataFrame(model_results)
model_results_df.to_csv(str(result_file), index=False)

# history
info_df = pd.DataFrame(info, index=[0])
if os.path.exists(MODEL_DIR_HIST_FILE):
    info_df.to_csv(str(MODEL_DIR_HIST_FILE), mode="a", index=False, header=False)
else:
    info_df.to_csv(str(MODEL_DIR_HIST_FILE), index=False)

# prediction of last epoch
prediction_file = model_dir / "prediction.csv"
model_prediction_df = pd.DataFrame(engine.get_prediction()[-1])
model_prediction_df.to_csv(str(prediction_file), index=False)

  0%|          | 0/1000 [00:00<?, ?it/s]

[1;30;43m串流輸出內容已截斷至最後 5000 行。[0m
Training batch[19]: last record -> y: 27.261600708901653 | y_pred: 26.821956018637167
Testing  batch[0]: last record -> y: 34.75140078251002 | y_pred: 45.609971938188664
Testing  batch[1]: last record -> y: 40.61230132819344 | y_pred: 45.88925173852442
Testing  batch[2]: last record -> y: 36.83800132288775 | y_pred: 37.649293954212794
Testing  batch[3]: last record -> y: 36.818999830849634 | y_pred: 38.79140970195397
Testing  batch[4]: last record -> y: 28.967899637007747 | y_pred: 25.71747338842104
[Training] Epoch: 815 | L1Loss: 0.05225 | RMSE: 0.08562 | PLCC: 0.94164 | SROCC: 0.94172
[Testing]  Epoch: 815 | L1Loss: 0.07604 | RMSE: 0.10385 | PLCC: 0.91668 | SROCC: 0.92998
Training batch[0]: last record -> y: 70.80740411708621 | y_pred: 69.05732102989737
Training batch[1]: last record -> y: 66.50039818303662 | y_pred: 57.568104890837276
Training batch[2]: last record -> y: 34.048697754381124 | y_pred: 35.87233121744566
Training batch[3]: last record 

# Model Summary

In [9]:
print(model)

VSFA(
  (ann): ANN(
    (fc0): Linear(in_features=4096, out_features=128, bias=True)
    (dropout): Dropout(p=0.5, inplace=False)
    (fc): Linear(in_features=128, out_features=128, bias=True)
  )
  (rnn): GRU(128, 32, batch_first=True)
  (q): Linear(in_features=32, out_features=1, bias=True)
)
