In [1]:
import os
from moviepy import VideoFileClip
from tqdm import tqdm
import numpy as np
import torch

import pandas as pd
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
import umap
import plotly.express as px

# from retinaface import RetinaFace
from facenet_pytorch import MTCNN
from transformers import AutoFeatureExtractor, AutoModelForImageClassification, ViTModel, ViTImageProcessor
import torchvision.transforms as transforms

2025-01-09 17:34:04.673878: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-01-09 17:34:05.156787: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2025-01-09 17:34:05.156811: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2025-01-09 17:34:06.550095: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2025-

In [2]:
DATA_DIR = 'data/video_data'
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

files = os.listdir(DATA_DIR)

In [3]:
files = [file for file in files if file != '1076_MTI_SAD_XX.flv']

In [4]:
detector = MTCNN(
    image_size=160,
    margin=0,
    min_face_size=200,
    thresholds=[0.6, 0.7, 0.7],
    factor=0.709,
    post_process=True,
    keep_all=False,
    device=DEVICE
)

In [5]:
# def detect_face(frame):
#     detections = RetinaFace.detect_faces(frame)
#     if isinstance(detections, dict):
#         data = detections['face_1']
#         x, y, w, h = data['facial_area']
#         face = frame[y:h, x:w]
#         return face


def detect_face(frame):
    box, _ = detector.detect(frame)
    if box is not None:
        x, y, w, h = [int(coord) for coord in box[0]]
        face = frame[y:h, x:w]
        return face


def extract_frames(file_path):
    try:
        with VideoFileClip(file_path) as video:
            video = video.without_audio()

            frames = []
            for frame in video.iter_frames():
                face = detect_face(frame)

                if face is None:
                    continue

                frames.append(face)
                
            return frames
        
    except Exception as e:
        print(f"Error processing {file_path}: {e}")



-------

# CNN

In [6]:
from torchvision.models import resnet18, ResNet18_Weights


weights = ResNet18_Weights.DEFAULT
resnet = resnet18(weights=weights)
resnet.eval()


class FeaturesExtractorCNN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.model = torch.nn.Sequential(*list(resnet.children())[:-1])

    def forward(self, x):
        x = self.model(x)
        return torch.flatten(x, 1)


def get_video_embedding_cnn(file_path):
    try:
        frames = extract_frames(file_path)
        transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

        inputs = [transform(frame) for frame in frames]
        inputs = torch.stack(inputs).to(DEVICE)

        model = FeaturesExtractorCNN().to(DEVICE)
        model.eval()

        with torch.no_grad():
            embeddings = model(inputs).cpu()

        return embeddings.mean(dim=0)
    
    except Exception as e:
        print(f"Error processing {file_path}: {e}")

In [7]:
embeddings_cnn = []
for file in tqdm(files):
    video_path = os.path.join(DATA_DIR, file)
    embedding = get_video_embedding_cnn(video_path)
    if embedding is not None:
        embeddings_cnn.append(embedding)

100%|██████████| 7441/7441 [3:48:58<00:00,  1.85s/it]


In [8]:
np.save('embeddings/video_embeddings_cnn_v2.npy', np.array(embeddings_cnn))

In [10]:
def plot_embeddings(embeddings, color_by_data, files, algorithm='tsne', color_by='Emocje'):
    if algorithm == 'tsne':
        tsne = TSNE(n_components=2, random_state=42)
        results = tsne.fit_transform(np.array(embeddings))
    elif algorithm == 'pca':
        pca = PCA(n_components=2)
        results = pca.fit_transform(np.array(embeddings))
    elif algorithm == 'umap':
        umap_ = umap.UMAP(n_components=2, random_state=42)
        results = umap_.fit_transform(np.array(embeddings))

    xy_df = pd.DataFrame(results, columns=['x', 'y'])
    xy_df['emo'] = color_by_data
    xy_df['file'] = files

    fig = px.scatter(xy_df, x='x', y='y', color='emo', 
                    title=f'Wizualizacja osadzeń emocji przy użyciu {algorithm.upper()}',
                    labels={'emo': color_by},
                    hover_name=xy_df['file'], 
                    color_discrete_sequence=px.colors.qualitative.Vivid)

    fig.update_layout(
        width=1600,
        height=1000
    )

    fig.update_traces(marker=dict(size=10),
                    selector=dict(mode='markers'))

    fig.show()

In [11]:
emos = []
for file in files:
    emo = file.split('_')[2]
    emos.append(emo)

In [12]:
embeddings_cnn = np.load('embeddings/video_embeddings_cnn_v2.npy')

In [13]:
plot_embeddings(embeddings_cnn, emos, files)

In [15]:
plot_embeddings(embeddings_cnn, emos, files, 'pca')

In [16]:
plot_embeddings(embeddings_cnn, emos, files, 'umap')


'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.


n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.

