In [1]:
import numpy as np
import nvidia.dali.fn as fn
import nvidia.dali.types as types
import os
import pytorch_lightning as pl
import pandas as pd
import torch
import torch.nn as nn
import torchmetrics

from config import *
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
from pytorch_lightning.loggers.tensorboard import TensorBoardLogger
from torch.optim import Adam
from torch.utils.data import  Dataset
from tqdm import tqdm

from sklearn.metrics import(
    accuracy_score, 
    confusion_matrix, 
    ConfusionMatrixDisplay,
)

In [2]:
media_pipe_preds = pd.read_csv("/mnt/durable/training_data/mediapipe-train.csv")
test_file_list = "/mnt/durable/training_data/filelists/task1_testing.txt"

In [3]:
class MediaPipeDataSet(Dataset):
    def __init__(self, sequences, labels):
        self.sequences = sequences
        self.labels = labels

    def __len__(self):
        self.length = len(self.sequences)
        return self.length

    def __getitem__(self, idx):
        sequence = self.sequences[idx]
        label = self.labels[idx]
        return sequence, label


def extract_filename_label_frame(file_list):
    f = open(file_list, "r")
    total_files = f.readlines()
    f.close()

    labels = []
    video_ids = []
    frames = []
    for file in total_files:
        components = file.split(" ")

        # Get the file_name
        video_name = components[0].split("/")[-1]
        video_id = video_name.split(".")[0]

        label = components[1]

        start = float(components[2])
        end = float(components[3].strip())

        while start <= end:
            labels.append(label)
            video_ids.append(video_id)
            frames.append(start)
            start += 1

    df = pd.DataFrame(
        {
            "video_id": video_ids,
            "timestamp":frames,
            "label":labels,
        }  
    )
    return df


def get_sequences_and_labels(media_pipe_info, relevant_columns):
    sequences = []
    labels = []
    start = 0
    
    for i in tqdm(range(SEQUENCE_LENGTH, media_pipe_info.shape[0], SEQUENCE_LENGTH)):
        # Get Sub-sequence and label
        sub_sequence = media_pipe_info.iloc[start:i]
        
        label = sub_sequence.label.iloc[-1]
        
        # Drop unnecessary columns
        sub_sequence = sub_sequence[relevant_columns].to_numpy()
    
        labels.append(label)
        sequences.append(sub_sequence)
            
        start += SEQUENCE_LENGTH

    return labels, sequences


def create_media_pipe_dataset(media_pipe_output, file_list):
    # Get the label for each frame for each video
    video_and_labels = extract_filename_label_frame(file_list)
    
    # Combine label with mediapipe output
    media_pipe_info = (
        media_pipe_output
        .merge(video_and_labels, how="left")
        .sort_values(["video_id", "timestamp"])
    )

    # Extract only mediapipe data column names
    relevant_columns = [col for col in media_pipe_info if "keypoint" in col or "box" in col]

    # get the labels and sequences
    print("Extracting labels and sequences")
    label, sequences = get_sequences_and_labels(media_pipe_info, relevant_columns)
    return MediaPipeDataSet(sequences, label)

In [4]:
create_media_pipe_dataset(media_pipe_preds, test_file_list)

Extracting labels and sequences


100%|██████████| 4183/4183 [00:01<00:00, 2966.57it/s]


<__main__.MediaPipeDataSet at 0x7f1a8ce8d750>