# Video to numpy

## 1. Import of librarys

In [2]:
import os
import json 
import cv2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from tqdm import tqdm

## 2. Code

In [3]:
df_sports = pd.read_csv("../sports_metadata.csv")

In [6]:
def video2tensor(df_tmp,root_dir,cat,split,num_frames,tensors_list,meta_data):

    for idx, row in tqdm(df_tmp.iterrows(), total=df_tmp.shape[0]):
                video_name = row['video_name']
                label = row['action'].replace(' ','_').lower()
                video_path = f"../{root_dir}/{cat}/{split}/{video_name}"

                frames_idx_list = []

                cap = cv2.VideoCapture(video_path)
                
                if not cap.isOpened():
                    print("Błąd: Nie można otworzyć wideo.")
                    return None
                
                frames = []
                frame_count = 1

                interval = cap.get(cv2.CAP_PROP_FRAME_COUNT) // num_frames

                while len(frames) < num_frames:

                    ret, frame = cap.read()
                    
                    if not ret:
                        break
                    
                    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

                    try:
                        if (frame_count % interval == 0) & (len(frames) < frame_count):
                            frames.append(frame_rgb)
                            frames_idx_list.append(frame_count)
                    except:
                        print(video_name)

                    frame_count += 1
                
                cap.release()

                tensor = np.array(frames)


                tensor = np.transpose(frames, (3, 0, 1, 2))
    

                tensors_list.append(tensor)

                meta_data['video_name'].append(video_name)
                meta_data['label'].append(label)
                meta_data['frame_idx'].append(frames_idx_list)
    
    return meta_data, tensors_list

In [7]:
def read_video_to_numpy(df: pd.DataFrame, root_dir: str, out_dir: str, num_frames: int = 32, diving=False):

    if diving:
        sports = ['diving']
    else:
        sports = ['aerobic_gymnastics','basketball','football','volleyball']

    
    if diving & (num_frames > 8):
        print(f"Category: diving")
        for split in ['train','val','test']:
            if split == 'train':
                
                df_tmp = df[(df['category'] == 'diving') & (df['new_split'] == split)].reset_index(drop=True)
                tmp1 = df_tmp.iloc[:5009].reset_index(drop=True)
                tmp2 = df_tmp.iloc[5009:10018].reset_index(drop=True)
                tmp3 = df_tmp.iloc[10018:].reset_index(drop=True)

                for idx, df_ in enumerate([tmp1,tmp2,tmp3]):
                    
                    tensors_list = []

                    meta_data = {
                        'video_name': [],
                        'label': [],
                        'frame_idx': []
                    }

                    meta_data, tensors_list = video2tensor(df_,root_dir,'diving',split,num_frames,tensors_list,meta_data)

                    tensors = np.array(tensors_list)
                    print(tensors.shape)

                    np.save(f"../{out_dir}/frame{num_frames}_diving_{split}_{idx}.npy", tensors)
                    with open(f"../{out_dir}/frame{num_frames}_diving_{split}_{idx}_metadata.json", "w") as f: 
                        json.dump(meta_data, f)
                    
                    del tensors_list
            else:

                tensors_list = []

                meta_data = {
                    'video_name': [],
                    'label': [],
                    'frame_idx': []
                }

                df_tmp = df[(df['category'] == 'diving') & (df['new_split'] == split)].reset_index(drop=True)

                meta_data, tensors_list = video2tensor(df_tmp,root_dir,'diving',split,num_frames,tensors_list,meta_data)

                tensors = np.array(tensors_list)
                print(tensors.shape)

                np.save(f"../{out_dir}/frame{num_frames}_diving_{split}.npy", tensors)
                with open(f"../{out_dir}/frame{num_frames}_diving_{split}_metadata.json", "w") as f: 
                    json.dump(meta_data, f)
                
                del tensors_list

    else:
        for cat in sports:
            print(f"Category: {cat}")
            for split in ['train','val','test']:
                print(f"Split: {split}")
                
                tensors_list = []

                meta_data = {
                    'video_name': [],
                    'label': [],
                    'frame_idx': []
                }

                df_tmp = df[(df['category'] == cat) & (df['new_split'] == split)].reset_index(drop=True)

                meta_data, tensors_list = video2tensor(df_tmp,root_dir,cat,split,num_frames,tensors_list,meta_data)
                
                tensors = np.array(tensors_list)
                print(tensors.shape)

                np.save(f"../{out_dir}/frame{num_frames}_{cat}_{split}.npy", tensors)
                with open(f"../{out_dir}/frame{num_frames}_{cat}_{split}_metadata.json", "w") as f: 
                    json.dump(meta_data, f)
                del tensors_list
                print()

### 2.1. Selection of 32 frames

In [20]:
ts = read_video_to_numpy(df=df_sports,
                    root_dir= "datasets",
                    out_dir= "datasets_numpy",
                    num_frames= 32)

Category: aerobic_gymnastics
Split: train


100%|██████████| 4192/4192 [00:31<00:00, 133.51it/s]


(4192, 3, 32, 224, 224)

Split: val


100%|██████████| 707/707 [00:05<00:00, 120.21it/s]


(707, 3, 32, 224, 224)

Split: test


100%|██████████| 707/707 [00:07<00:00, 91.75it/s] 


(707, 3, 32, 224, 224)

Category: basketball
Split: train


100%|██████████| 4524/4524 [00:52<00:00, 86.54it/s]


(4524, 3, 32, 224, 224)

Split: val


100%|██████████| 854/854 [00:10<00:00, 83.85it/s]


(854, 3, 32, 224, 224)

Split: test


100%|██████████| 855/855 [00:10<00:00, 85.36it/s]


(855, 3, 32, 224, 224)

Category: football
Split: train


100%|██████████| 6083/6083 [01:07<00:00, 89.76it/s]


(6083, 3, 32, 224, 224)

Split: val


100%|██████████| 1070/1070 [00:12<00:00, 87.75it/s]


(1070, 3, 32, 224, 224)

Split: test


100%|██████████| 1071/1071 [00:11<00:00, 90.02it/s]


(1071, 3, 32, 224, 224)

Category: volleyball
Split: train


100%|██████████| 3546/3546 [00:39<00:00, 89.82it/s]


(3546, 3, 32, 224, 224)

Split: val


100%|██████████| 645/645 [00:07<00:00, 88.11it/s]


(645, 3, 32, 224, 224)

Split: test


100%|██████████| 646/646 [00:07<00:00, 88.79it/s]


(646, 3, 32, 224, 224)

Category: diving
Split: train


100%|██████████| 15027/15027 [04:31<00:00, 55.37it/s]


MemoryError: Unable to allocate 67.4 GiB for an array with shape (15027, 3, 32, 224, 224) and data type uint8

In [10]:
read_video_to_numpy(df=df_sports,
                    root_dir= "datasets",
                    out_dir= "datasets_numpy",
                    num_frames= 32,
                    diving=True)

Category: diving


100%|██████████| 5009/5009 [01:17<00:00, 64.82it/s]


(5009, 3, 32, 224, 224)


100%|██████████| 5009/5009 [01:04<00:00, 77.23it/s]


(5009, 3, 32, 224, 224)


100%|██████████| 5009/5009 [01:25<00:00, 58.52it/s]


(5009, 3, 32, 224, 224)


100%|██████████| 985/985 [00:16<00:00, 61.15it/s]


(985, 3, 32, 224, 224)


100%|██████████| 985/985 [00:15<00:00, 62.86it/s]


(985, 3, 32, 224, 224)


In [27]:
tmp = pd.read_json("../datasets_numpy/basketball_train_metadata.json")

### 2.2. Selection of 16 frames

In [9]:
read_video_to_numpy(df=df_sports,
                    root_dir= "datasets",
                    out_dir= "datasets_numpy",
                    num_frames= 16,
                    diving=False)

Category: aerobic_gymnastics
Split: train


100%|██████████| 4192/4192 [00:46<00:00, 90.20it/s]


(4192, 3, 16, 224, 224)

Split: val


100%|██████████| 707/707 [00:07<00:00, 93.68it/s]


(707, 3, 16, 224, 224)

Split: test


100%|██████████| 707/707 [00:07<00:00, 94.85it/s]


(707, 3, 16, 224, 224)

Category: basketball
Split: train


100%|██████████| 4524/4524 [00:48<00:00, 93.96it/s] 


(4524, 3, 16, 224, 224)

Split: val


100%|██████████| 854/854 [00:09<00:00, 89.18it/s]


(854, 3, 16, 224, 224)

Split: test


100%|██████████| 855/855 [00:09<00:00, 93.52it/s]


(855, 3, 16, 224, 224)

Category: football
Split: train


100%|██████████| 6083/6083 [01:06<00:00, 91.24it/s] 


(6083, 3, 16, 224, 224)

Split: val


100%|██████████| 1070/1070 [00:11<00:00, 90.58it/s]


(1070, 3, 16, 224, 224)

Split: test


100%|██████████| 1071/1071 [00:11<00:00, 91.58it/s]


(1071, 3, 16, 224, 224)

Category: volleyball
Split: train


100%|██████████| 3546/3546 [00:39<00:00, 89.93it/s]


(3546, 3, 16, 224, 224)

Split: val


100%|██████████| 645/645 [00:06<00:00, 94.44it/s]


(645, 3, 16, 224, 224)

Split: test


100%|██████████| 646/646 [00:06<00:00, 94.03it/s]


(646, 3, 16, 224, 224)



[array([[[[ 40,  40,  41, ...,  43,  43,  43],
          [ 40,  40,  41, ...,  43,  43,  43],
          [ 42,  42,  43, ...,  43,  43,  43],
          ...,
          [240, 240, 240, ..., 244, 244, 244],
          [240, 240, 240, ..., 244, 244, 244],
          [240, 240, 240, ..., 244, 244, 244]],
 
         [[ 40,  40,  41, ...,  38,  38,  38],
          [ 40,  40,  41, ...,  38,  38,  38],
          [ 41,  41,  43, ...,  40,  41,  41],
          ...,
          [240, 240, 240, ..., 244, 244, 244],
          [240, 240, 240, ..., 244, 244, 244],
          [240, 240, 240, ..., 244, 244, 244]],
 
         [[ 44,  44,  44, ...,  45,  43,  43],
          [ 44,  44,  44, ...,  45,  43,  43],
          [ 44,  44,  44, ...,  45,  41,  41],
          ...,
          [242, 242, 242, ..., 244, 244, 244],
          [242, 242, 242, ..., 244, 244, 244],
          [242, 242, 242, ..., 244, 244, 244]],
 
         ...,
 
         [[104, 124, 162, ..., 106, 153, 174],
          [118, 134, 158, ..., 108, 1

In [32]:
read_video_to_numpy(df=df_sports,
                    root_dir= "datasets",
                    out_dir= "datasets_numpy",
                    num_frames= 16,
                    diving=True)

Category: diving


100%|██████████| 5009/5009 [01:04<00:00, 77.98it/s] 


(5009, 3, 16, 224, 224)


100%|██████████| 5009/5009 [01:03<00:00, 78.43it/s] 


(5009, 3, 16, 224, 224)


100%|██████████| 5009/5009 [01:03<00:00, 78.69it/s]


(5009, 3, 16, 224, 224)


100%|██████████| 985/985 [00:14<00:00, 65.90it/s]


(985, 3, 16, 224, 224)


100%|██████████| 985/985 [00:15<00:00, 64.85it/s]


(985, 3, 16, 224, 224)


### 2.3. Selection of 8 frames

In [10]:
read_video_to_numpy(df=df_sports,
                    root_dir= "datasets",
                    out_dir= "datasets_numpy",
                    num_frames= 8,
                    diving=False)

Category: aerobic_gymnastics
Split: train


100%|██████████| 4192/4192 [00:29<00:00, 142.96it/s]


(4192, 3, 8, 224, 224)

Split: val


100%|██████████| 707/707 [00:04<00:00, 141.70it/s]


(707, 3, 8, 224, 224)

Split: test


100%|██████████| 707/707 [00:04<00:00, 146.09it/s]


(707, 3, 8, 224, 224)

Category: basketball
Split: train


100%|██████████| 4524/4524 [00:28<00:00, 157.20it/s]


(4524, 3, 8, 224, 224)

Split: val


100%|██████████| 854/854 [00:05<00:00, 156.43it/s]


(854, 3, 8, 224, 224)

Split: test


100%|██████████| 855/855 [00:05<00:00, 155.30it/s]


(855, 3, 8, 224, 224)

Category: football
Split: train


100%|██████████| 6083/6083 [00:36<00:00, 168.35it/s]


(6083, 3, 8, 224, 224)

Split: val


100%|██████████| 1070/1070 [00:06<00:00, 169.85it/s]


(1070, 3, 8, 224, 224)

Split: test


100%|██████████| 1071/1071 [00:06<00:00, 166.81it/s]


(1071, 3, 8, 224, 224)

Category: volleyball
Split: train


100%|██████████| 3546/3546 [00:21<00:00, 164.48it/s]


(3546, 3, 8, 224, 224)

Split: val


100%|██████████| 645/645 [00:03<00:00, 165.65it/s]


(645, 3, 8, 224, 224)

Split: test


100%|██████████| 646/646 [00:03<00:00, 164.49it/s]


(646, 3, 8, 224, 224)



[array([[[[ 40,  40,  41, ...,  38,  38,  38],
          [ 40,  40,  41, ...,  38,  38,  38],
          [ 41,  41,  43, ...,  40,  41,  41],
          ...,
          [240, 240, 240, ..., 244, 244, 244],
          [240, 240, 240, ..., 244, 244, 244],
          [240, 240, 240, ..., 244, 244, 244]],
 
         [[ 48,  48,  48, ...,  48,  44,  44],
          [ 48,  48,  48, ...,  48,  44,  44],
          [ 48,  48,  48, ...,  48,  44,  44],
          ...,
          [242, 242, 242, ..., 242, 240, 240],
          [242, 242, 242, ..., 240, 240, 240],
          [242, 242, 242, ..., 241, 241, 241]],
 
         [[ 51,  51,  51, ...,  42,  42,  39],
          [ 51,  51,  51, ...,  46,  49,  48],
          [ 47,  47,  47, ...,  47,  48,  46],
          ...,
          [131, 152, 197, ..., 246, 246, 246],
          [138, 132, 157, ..., 245, 245, 245],
          [159, 144, 148, ..., 245, 245, 245]],
 
         ...,
 
         [[ 51,  51,  49, ...,  75,  64,  62],
          [ 51,  51,  49, ...,  79,  

In [12]:
read_video_to_numpy(df=df_sports,
                    root_dir= "datasets",
                    out_dir= "datasets_numpy",
                    num_frames= 8,
                    diving=True)

Category: diving
Split: train


100%|██████████| 15027/15027 [03:28<00:00, 72.04it/s]


(15027, 3, 8, 224, 224)

Split: val


100%|██████████| 985/985 [00:13<00:00, 73.82it/s]


(985, 3, 8, 224, 224)

Split: test


100%|██████████| 985/985 [00:13<00:00, 74.51it/s]


(985, 3, 8, 224, 224)

