In [None]:
import numpy as np
import pandas as pd

import torch
#import pytorch_lightning as pl
from utils import DynamicMultitasker, load_embeddings_and_labels, embedding_dimensions


from sklearn.model_selection import train_test_split
import glob
import random
import yaml, json

In [None]:
which = "openl3_env"
voice = True
targets_list = ["Girls/women", "Boys/men"]

with open("config_save.yaml", "r") as f:
    config = yaml.safe_load(f)

groundtruth_df = pd.read_csv("groundtruth_merged.csv")
groundtruth_df.set_index("stimulus_id", inplace=True)


emotions_and_mid_level_df = pd.read_csv("emotions_and_mid_level.csv")
emotions_and_mid_level_df.set_index("stimulus_id", inplace=True)

# drop columns that would introduce noise
n_emotions = 7
if config["drop_non_significant"]:
    to_drop = [
        "Amusing",  # Extremely low correlations with all the mid-level features
        "Wide/Narrow pitch variation",  # non significant differences between targets (ANOVA)
        "Repetitive/Non-repetitive",  # non significant differences between targets (ANOVA)
        "Fast tempo/Slow tempo",  # non significant differences between targets (ANOVA)
    ]
    emotions_and_mid_level_df = emotions_and_mid_level_df.drop(columns=to_drop)
    n_emotions -= 1  # we dropped Amusing

# load quantile normalization parameters
with open(f"quantiles_{which}_voice_{voice}_{len(targets_list)}_cls.json", "r") as f:
    quantiles = json.load(f)


# scales quantization
def value_to_level(value, quantiles):

    if value <= quantiles[0]:
        return "low"
    elif value <= quantiles[1]:
        return "medium"
    else:
        return "high"

##### load model

In [None]:
# set the parameters for the model
config["cls_dict"]["target"] = targets_list # add target list to config
params = {
    "input_dim": embedding_dimensions["music"][which],
    "n_emo": n_emotions,
    "n_mid": len(emotions_and_mid_level_df.columns)-n_emotions,
    "cls_dict": config["cls_dict"],
    "filmed": False,
}

# Load model:
model = DynamicMultitasker(**params)
model.load_state_dict(
    torch.load(
    f"models/{which}_{voice}_voice_{len(targets_list)}_cls.pt"
    )
)
model.eval()

In [None]:
# load embeddings from a random commercial
embedding_fn = random.choice(glob.glob(f"/homes/lm004/all_embeddings/music/{which}/*.npy"))

embedding = np.load(open(embedding_fn, 'rb')).mean(axis=0)

youtube_id = embedding_fn.split("/")[-1].split(".")[0]

print(f"https://www.youtube.com/watch?v={youtube_id}")


##### predict

In [None]:
with torch.no_grad():
    y_mid_pred, y_emo_pred, y_cls_pred = model(
        torch.from_numpy(embedding[np.newaxis,:]).float()
    )

y_emo_pred = y_emo_pred.numpy()
y_mid_pred = y_mid_pred.numpy()
y_cls_pred = {k: int(torch.argmax(y_cls_pred[k], dim=1).numpy()) for k in config["cls_dict"]}

cls_dict = config["cls_dict"]
cls_dict["target"] = targets_list

print("Predicted target:", cls_dict["target"][y_cls_pred["target"]])

for k in cls_dict:
    if k != "target":
        print(f"{k}: {cls_dict[k][y_cls_pred[k]]}")

# print level for each emotion
for i in range(y_emo_pred.shape[1]):
    k = emotions_and_mid_level_df.columns[i]
    print(f"{k}: {value_to_level(y_emo_pred[:,i], quantiles[k])}")

# print level for each mid-level feature
for i in range(y_mid_pred.shape[1]):
    k = emotions_and_mid_level_df.columns[n_emotions:][i]
    print(f"{k}: {value_to_level(y_mid_pred[:,i], quantiles[k])}")


