https://pytorch.org/tutorials/beginner/basics/data_tutorial.html

A custom Dataset class must implement three functions: __init__, __len__, and __getitem__.

https://github.com/bomri/SlowFast/blob/master/slowfast/datasets/loader.py

https://github.com/bomri/SlowFast/blob/master/slowfast/datasets/ava_dataset.py

https://github.com/HHTseng/video-classification/blob/master/ResNetCRNN_varylength/UCF101_ResNetCRNN_varlen.py
https://www.ai-contentlab.com/2023/01/video-classification-is-important-task.html

https://discuss.pytorch.org/t/how-upload-sequence-of-image-on-video-classification/24865/13

Оптический поток
https://docs.opencv.org/2.4/modules/video/doc/motion_analysis_and_object_tracking.html

Skeleton
https://www.fireblazeaischool.in/blogs/human-pose-estimation-using-opencv/

# Data Loader

Добавить нормализацию!!!

In [5]:
labels_list = ['пингвин',
 'жираф',
 'лягушка',
 'бегемот',
 'козел',
 'лиса',
 'динозавр',
 'кролик',
 'собака',
 'обезьяна',
 'корова',
 'свинья',
 'медуза',
 'курица',
 'павлин',
 'дельфин',
 'слон',
 'медведь',
 'лебедь',
 'орел',
 'бык',
 'змея',
 'птица',
 'лось',
 'пчела',
 'лев',
 'тигр',
 'мышь',
 'паук',
 'бабочка']

 # Курс Седжвика по алгоритмам

In [1]:
import os
import cv2
import torch
torch.cuda.empty_cache()
from torch import nn
from torch.utils.data import DataLoader
import numpy as np
import pandas as pd
from torch.utils.data import Dataset

In [2]:
def crop_center_square(frame):
    y, x = frame.shape[0:2]
    min_dim = min(y, x)
    start_x = (x // 2) - (min_dim // 2)
    start_y = (y // 2) - (min_dim // 2)
    return frame[start_y : start_y + min_dim, start_x : start_x + min_dim]

def load_video(path, begin, end, base_name, resize=(10, 10)):
    cap = cv2.VideoCapture(path)
    frame_index=begin
    i = 0
    try:
        while True and frame_index <= end:
            cap.set(cv2.CAP_PROP_POS_FRAMES, frame_index)
            ret, frame = cap.read()
            if not ret:
                break
            frame = crop_center_square(frame)
            frame = cv2.resize(frame, resize)
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # convert to grayscale
            frame_name = f'{base_name}_{i}.png'
            cv2.imwrite(frame_name, frame)
            frame_index+=1
            i+=1
    finally:
        cap.release()


In [3]:
annotations_file = "/home/jupyter/mnt/s3/rsl-videos/slovo/slovo_annotations/SLOVO_DATAFRAME.tsv"
video_dir = "/home/jupyter/mnt/s3/rsl-videos/slovo/slovo"
dataset_dir = "/home/jupyter/mnt/s3/rsl-videos/slovo/dataset"
IMG_SIZE = 224
BATCH_SIZE = 6
LEARNING_RATE = 0.001
NUM_EPOCHS = 10

In [4]:
video_labels = pd.read_csv(annotations_file, sep='\t')
video_labels.head(5)

Unnamed: 0,attachment_id,text,user_id,height,width,length,begin,end
0,44e8d2a0-7e01-450b-90b0-beb7400d2c1e,Ё,185bd3a81d9d618518d10abebf0d17a8,640,360,156.0,36,112
1,df5b08f0-41d1-4572-889c-8b893e71069b,А,185bd3a81d9d618518d10abebf0d17a8,640,360,150.0,36,76
2,17f53df4-c467-4aff-9f48-20687b63d49a,Р,185bd3a81d9d618518d10abebf0d17a8,640,360,133.0,40,97
3,e3add916-c708-4339-ad98-7e2740be29e9,Е,185bd3a81d9d618518d10abebf0d17a8,640,360,144.0,43,107
4,bd7272ed-1850-48f1-a2a8-c8fed523dc37,Ч,185bd3a81d9d618518d10abebf0d17a8,640,360,96.0,20,70


In [12]:
# оставляем только животных
video_labels = video_labels[video_labels['text'].isin(labels_list)].head()

In [21]:
video_labels[video_labels['attachment_id']=='b402fe3d-8d74-4ad6-9c8f-e7ee8cc35d25']

Unnamed: 0,attachment_id,text,user_id,height,width,length,begin,end
453,b402fe3d-8d74-4ad6-9c8f-e7ee8cc35d25,козел,db573f94204e56e0cf3fc2ea000e5bdc,640,360,150.0,35,82


In [6]:
labels_cnt = video_labels.groupby('text').size().to_dict()
print(labels_cnt)
# 75 - train, 15 - val, 10 - test

In [31]:
test_counter = dict()
val_counter = dict()
train_counter = dict()

#i=0
for idx, row in video_labels.iterrows():
    print(idx, row)
    attachment_id = row['attachment_id']
    filename = os.path.join(video_dir, attachment_id+".mp4")
    label = row['text']
    begin = row['begin']
    end = row['end']
    if label not in train_counter:
        dataset_type = 'train'
        train_counter[label] = 1
    elif train_counter[label] < labels_cnt[label]*0.75:
        dataset_type = 'train'
        train_counter[label] += 1
    elif label not in val_counter:
        dataset_type = 'val'
        val_counter[label] = 1
    elif val_counter[label] < val_counter[label]*0.25:
        dataset_type = 'val'
        val_counter[label] += 1
    elif label not in test_counter:
        dataset_type = 'test'
        test_counter[label] = 1
    else:
        dataset_type = 'test'
        test_counter[label] += 1
    dirname = os.path.join(dataset_dir , dataset_type, label, attachment_id)
    if not os.path.exists(dirname):
        os.makedirs(dirname)
    base_name = os.path.join(dataset_dir , dataset_type, label, attachment_id, attachment_id)
    load_video(path=filename,
             begin=begin,
             end=end,
             base_name=base_name,
             resize=(10, 10)
             )
    # i+=1
    # if i==1:
    #     break


453 attachment_id    b402fe3d-8d74-4ad6-9c8f-e7ee8cc35d25
text                                            козел
user_id              db573f94204e56e0cf3fc2ea000e5bdc
height                                            640
width                                             360
length                                          150.0
begin                                              35
end                                                82
Name: 453, dtype: object


In [35]:
os.listdir(dataset_dir+'/train/козел')

['b402fe3d-8d74-4ad6-9c8f-e7ee8cc35d25_0.png',
 'b402fe3d-8d74-4ad6-9c8f-e7ee8cc35d25_1.png',
 'b402fe3d-8d74-4ad6-9c8f-e7ee8cc35d25_10.png',
 'b402fe3d-8d74-4ad6-9c8f-e7ee8cc35d25_11.png',
 'b402fe3d-8d74-4ad6-9c8f-e7ee8cc35d25_12.png',
 'b402fe3d-8d74-4ad6-9c8f-e7ee8cc35d25_13.png',
 'b402fe3d-8d74-4ad6-9c8f-e7ee8cc35d25_14.png',
 'b402fe3d-8d74-4ad6-9c8f-e7ee8cc35d25_15.png',
 'b402fe3d-8d74-4ad6-9c8f-e7ee8cc35d25_16.png',
 'b402fe3d-8d74-4ad6-9c8f-e7ee8cc35d25_17.png',
 'b402fe3d-8d74-4ad6-9c8f-e7ee8cc35d25_18.png',
 'b402fe3d-8d74-4ad6-9c8f-e7ee8cc35d25_19.png',
 'b402fe3d-8d74-4ad6-9c8f-e7ee8cc35d25_2.png',
 'b402fe3d-8d74-4ad6-9c8f-e7ee8cc35d25_20.png',
 'b402fe3d-8d74-4ad6-9c8f-e7ee8cc35d25_21.png',
 'b402fe3d-8d74-4ad6-9c8f-e7ee8cc35d25_22.png',
 'b402fe3d-8d74-4ad6-9c8f-e7ee8cc35d25_23.png',
 'b402fe3d-8d74-4ad6-9c8f-e7ee8cc35d25_24.png',
 'b402fe3d-8d74-4ad6-9c8f-e7ee8cc35d25_25.png',
 'b402fe3d-8d74-4ad6-9c8f-e7ee8cc35d25_26.png',
 'b402fe3d-8d74-4ad6-9c8f-e7ee8cc35d25_27.p