In [13]:
import csv
import numpy as np
import decord
import torch
import os, time
import pandas as pd
import random

from gluoncv.torch.utils.model_utils import download
from gluoncv.torch.data.transforms.videotransforms import video_transforms, volume_transforms
from gluoncv.torch.engine.config import get_cfg_defaults
from gluoncv.torch.model_zoo import get_model
from tqdm.notebook import tqdm

map_path = 'action_map.csv'

action_dict = {}

with open(map_path, 'r', newline='', encoding='utf-8') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        id_value = int(row['id'])
        name_value = row['name']
        
        action_dict[id_value] = name_value

In [14]:
def process_frames(file_path, model, curr_class, N, strategy):
    vr = decord.VideoReader(file_path)
    new_rows = []
    INTERVAL = 40
    for time_int in range(0, len(vr) - INTERVAL + 1, INTERVAL):
        if strategy == "Random":
            l_bound, r_bound = time_int + N, (time_int + INTERVAL) - (N + 1)
            rand_start = random.randint(l_bound, r_bound)
            frame_id_list = range(rand_start-N, rand_start+N+1)
        elif strategy == "MostRecent":
            recent_start = (time_int + INTERVAL) - (N + 1)
            frame_id_list = range(recent_start-N, recent_start+N+1)
        elif strategy == "LeastRecent":
            lr_start = time_int + N
            frame_id_list = range(lr_start-N, lr_start+N+1)
        else:
            raise ValueError("Sampling strategy not yet supported")
        video_data = vr.get_batch(frame_id_list).asnumpy()
        crop_size = 224
        short_side_size = 256
        transform_fn = video_transforms.Compose([video_transforms.Resize(short_side_size, interpolation='bilinear'),
                                                video_transforms.CenterCrop(size=(crop_size, crop_size)),
                                                volume_transforms.ClipToTensor(),
                                                video_transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
        clip_input = transform_fn(video_data)
        with torch.no_grad():
            pred = model(torch.unsqueeze(clip_input, dim=0)).numpy()
        probs = torch.nn.functional.softmax(torch.tensor(pred), dim=1).numpy()
        top_class = np.argmax(probs)
        confidence_interval = np.max(probs) - np.min(probs)
        new_row = {"file_name": file_path.split("/")[-1], "class_id": curr_class, "class_name": action_dict[curr_class], "pred_class_id": top_class, "pred_class_name": action_dict[top_class], "window_size": N*2+1, "confidence": confidence_interval, "is_pred_correct": True if top_class == curr_class else False, "time_interval": time_int, "sampling_strategy": strategy}
        new_rows.append(new_row)
    return new_rows

In [60]:
def sample_frames(file_path, model, curr_class, mode, frame_rate, buffer_size):
    vr = decord.VideoReader(file_path)
    frames = set()
    new_rows = []
    
    t = 0
    r = l = 0
    while r + frame_rate < len(vr):
        t += 1
        r += frame_rate
        if (r - l) > buffer_size:
            l = r - buffer_size
        
        if mode == "LeastRecent":
            frames.add(l)
            l += 1
        elif mode == "MostRecent":
            frames.add(r)
        elif mode == "Random":
            frame = random.sample(range(l, r+1), 1)
            while frame[0] in frames:
                frame = random.sample(range(l, r+1), 1)
            frames.add(frame[0])
        if len(frames) >= 5:
            video_data = vr.get_batch(list(frames)).asnumpy()
            crop_size = 224
            short_side_size = 256
            transform_fn = video_transforms.Compose([video_transforms.Resize(short_side_size, interpolation='bilinear'),
                                                    video_transforms.CenterCrop(size=(crop_size, crop_size)),
                                                    volume_transforms.ClipToTensor(),
                                                    video_transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
            clip_input = transform_fn(video_data)
            with torch.no_grad():
                pred = model(torch.unsqueeze(clip_input, dim=0)).numpy()
            probs = torch.nn.functional.softmax(torch.tensor(pred), dim=1).numpy()
            top_class = np.argmax(probs)
            confidence_interval = np.max(probs) - np.min(probs)
            new_row = {"file_name": file_path.split("/")[-1], "class_id": curr_class, "class_name": action_dict[curr_class], "pred_class_id": top_class, "pred_class_name": action_dict[top_class], "confidence": confidence_interval, "is_pred_correct": True if top_class == curr_class else False, "time_interval": t, "sampling_strategy": mode}
            new_rows.append(new_row)

    return new_rows
    

In [63]:
from moviepy.editor import VideoFileClip, concatenate_videoclips

def sample_frames_no_action(file_path, model, curr_class, mode, frame_rate, buffer_size):
    no_action = VideoFileClip("no_action.mp4")
    clip = VideoFileClip(file_path)
    no_action = no_action.resize(clip.size)
    no_action_clip = concatenate_videoclips([no_action,clip,no_action])
    no_action_clip.write_videofile("temp.mp4")

    vr = decord.VideoReader("temp.mp4")
    frames = set()
    new_rows = []
    
    t = 0
    r = l = 0
    while r + frame_rate < len(vr):
        t += 1
        r += frame_rate
        if (r - l) > buffer_size:
            l = r - buffer_size
        
        if mode == "LeastRecent":
            frames.add(l)
            l += 1
        elif mode == "MostRecent":
            frames.add(r)
        elif mode == "Random":
            frame = random.sample(range(l, r+1), 1)
            while frame[0] in frames:
                frame = random.sample(range(l, r+1), 1)
            frames.add(frame[0])
        if len(frames) >= 5:
            video_data = vr.get_batch(list(frames)).asnumpy()
            crop_size = 224
            short_side_size = 256
            transform_fn = video_transforms.Compose([video_transforms.Resize(short_side_size, interpolation='bilinear'),
                                                    video_transforms.CenterCrop(size=(crop_size, crop_size)),
                                                    volume_transforms.ClipToTensor(),
                                                    video_transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
            clip_input = transform_fn(video_data)
            with torch.no_grad():
                pred = model(torch.unsqueeze(clip_input, dim=0)).numpy()
            probs = torch.nn.functional.softmax(torch.tensor(pred), dim=1).numpy()
            top_class = np.argmax(probs)
            confidence_interval = np.max(probs) - np.min(probs)
            new_row = {"file_name": file_path.split("/")[-1], "class_id": curr_class, "class_name": action_dict[curr_class], "pred_class_id": top_class, "pred_class_name": action_dict[top_class], "confidence": confidence_interval, "is_pred_correct": True if top_class == curr_class else False, "time_interval": t, "sampling_strategy": mode}
            new_rows.append(new_row)

    return new_rows

In [66]:
frame_rate = 25
buffer_size = 500 # 125
num_classes = 20
num_videos = 5

# Get 5 Random videos from collection
dataset_path = 'datasets\\kinetics400_5per\\train'

subfolders = sorted([subfolder for subfolder in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, subfolder))])
random.seed(2)
random_subfolders = random.sample(subfolders, num_classes)
subfolder = random_subfolders[0]

config_file = './i3d_resnet50_v1_kinetics400.yaml'
cfg = get_cfg_defaults()
cfg.merge_from_file(config_file)
model = get_model(cfg)
model.eval()

columns = ["file_name", "class_id", "class_name", "pred_class_id", "pred_class_name", "confidence", "is_pred_correct", "time_interval", "sampling_strategy"]

my_data = []
no_action_data = []

sampling_strategies = ['Random', 'MostRecent', 'LeastRecent']

try:
    # Iterate over videos
    for index, subfolder in enumerate(random_subfolders):
        subfolder_path = os.path.join(dataset_path, subfolder)
        files = os.listdir(subfolder_path)
        clean_files = []
        for filename in files:
            if filename.endswith(".mp4"):
                clean_files.append(filename)
        random_files = random.sample(clean_files, num_videos)
        random_file = random_files[0]
        for random_file in random_files:
            file_path = os.path.join(subfolder_path, random_file)
            class_id = -1
            for key, value in action_dict.items():
                if value == subfolder:
                    class_id = key
                    break
            
            for strategy in sampling_strategies:
                print(f'{file_path}\t{class_id}\t{strategy}')
                new_rows = sample_frames(file_path, model, class_id, strategy, frame_rate, buffer_size) # process_frames(file_path, model, class_id, N, strategy)
                # no_action_row = sample_frames_no_action(file_path, model, class_id, strategy, frame_rate, buffer_size)
                for row in new_rows:
                    my_data.append(row)
                    # no_action_data.append(no_action_row)
                
except Exception as e:
    print(e)

finally:
    # Specify the file path
    out_path = 'sampling_strategies_updated.csv'
    
    # Check if the file exists
    if os.path.exists(out_path):
        # Delete the file
        os.remove(out_path)
    df = pd.DataFrame(my_data, columns=columns)
    df.to_csv(out_path, mode='w', index=False)

    # # No action output
    # out_path = 'sampling_strategies_no_action.csv'
    # if os.path.exists(out_path):
    #     os.remove(out_path)
    # df = pd.DataFrame(no_action_data, columns=columns)
    # df.to_csv(out_path, mode='w', index=False)


datasets\kinetics400_5per\train\bobsledding\mb2rno2ylUU.mp4	28	Random
datasets\kinetics400_5per\train\bobsledding\mb2rno2ylUU.mp4	28	MostRecent
datasets\kinetics400_5per\train\bobsledding\mb2rno2ylUU.mp4	28	LeastRecent
datasets\kinetics400_5per\train\bobsledding\VXrhBFb9yAw.mp4	28	Random
datasets\kinetics400_5per\train\bobsledding\VXrhBFb9yAw.mp4	28	MostRecent
datasets\kinetics400_5per\train\bobsledding\VXrhBFb9yAw.mp4	28	LeastRecent
datasets\kinetics400_5per\train\bobsledding\ryDGSnjwF6g.mp4	28	Random
datasets\kinetics400_5per\train\bobsledding\ryDGSnjwF6g.mp4	28	MostRecent
datasets\kinetics400_5per\train\bobsledding\ryDGSnjwF6g.mp4	28	LeastRecent
datasets\kinetics400_5per\train\bobsledding\T_5tNh23Rcg.mp4	28	Random
datasets\kinetics400_5per\train\bobsledding\T_5tNh23Rcg.mp4	28	MostRecent
datasets\kinetics400_5per\train\bobsledding\T_5tNh23Rcg.mp4	28	LeastRecent
datasets\kinetics400_5per\train\bobsledding\IBbdl5BnZOU.mp4	28	Random
datasets\kinetics400_5per\train\bobsledding\IBbdl5BnZO