In [1]:
import os
import skvideo.io
import cv2
import shutil
import numpy as np
from PIL import Image
import sys
sys.path.append("../../../../CNN_total/")

def get_parent_dir_name(path, level=1):

    path_spliter = os.path.sep
    abs_path = os.path.abspath(path)

    return abs_path.split(path_spliter)[-(1 + level)]

def get_frame_idx_list(total_frame, target_frame=32):
    divide_num = target_frame - 1
    target_frame_idx_list = [round(total_frame / divide_num * idx) for idx in range(target_frame)]
    target_frame_idx_list[-1] = target_frame_idx_list[-1] - 1
    return target_frame_idx_list


In [2]:
from glob import glob

common_raw_data_folder = "../data/0-2. Sampled/"
common_sample_folder = "../data/1. Sampling png"


video_class_list = sorted(os.listdir(common_raw_data_folder))
video_class_dict = {idx: class_str for idx, class_str in enumerate(video_class_list)}
video_class_instance_num_list = [len(glob(f"{common_raw_data_folder}/{video_class}/*"))
                                for video_class in video_class_list]

video_path_list = glob(f"{common_raw_data_folder}/*/*")

print(f"video_class_num: {len(video_class_dict)}")
print(f"min_class_instance: {np.min(video_class_instance_num_list)}")
print(f"max_class_instance: {np.max(video_class_instance_num_list)}")
print(f"video_num: {len(video_path_list)}")

video_class_num: 11
min_class_instance: 100
max_class_instance: 163
video_num: 1402


In [4]:
from glob import glob
from tqdm import tqdm
sampling_folder = "../data/1. Sampling png"

video_path_list = glob(f"{common_raw_data_folder}/*/*")
target_frame = 16
for idx, video_path in tqdm(enumerate(video_path_list)):
    
    class_str = get_parent_dir_name(video_path, level=1)
    video_basename = get_parent_dir_name(video_path, level=0).replace(".avi", "")
    video_array = skvideo.io.vread(video_path)
    
    video_array = video_array[:target_frame]
    current_frame = len(video_array)
    if current_frame == target_frame:
        pass
    else:
        inter_ratio = target_frame / current_frame  
        video_array = zoom(video_array, (inter_ratio, 1, 1, 1))
    # video_shape: [frame // 5, 240, 320, 3]
    total_frame = video_array.shape[0]
    sample_folder = f"{common_sample_folder}/{class_str}/{video_basename}"
    os.makedirs(sample_folder, exist_ok=True)
        
    for image_idx, image_array in enumerate(video_array):
        image_path = f"{sample_folder}/{image_idx:03}.png"
        Image.fromarray(image_array).save(image_path)

1402it [09:51,  2.37it/s]


In [11]:
from scipy.ndimage import zoom
test_array = np.copy(video_array)
result = zoom(test_array, (ratio, 1, 1, 1))

In [10]:
ratio = 128 / 102

In [9]:
test_array.shape

(102, 240, 320, 3)

In [12]:
result.shape

(128, 240, 320, 3)

In [3]:
from glob import glob
from tqdm import tqdm
sampling_folder = "../data/1. Sampling png"

video_path_list = glob(f"{common_raw_data_folder}/*/*")
target_frame = 32
stride = 6
for idx, video_path in tqdm(enumerate(video_path_list)):
    
    class_str = get_parent_dir_name(video_path, level=1)
    video_basename = get_parent_dir_name(video_path, level=0).replace(".avi", "")
    video_array = skvideo.io.vread(video_path)
    
    total_frame = video_array.shape[0] 
    frame_idx_list = get_frame_idx_list(total_frame, target_frame)
    video_array = video_array[frame_idx_list]
    
    # video_shape: [frame // 5, 240, 320, 3]
    total_frame = video_array.shape[0]
    sample_folder = f"{common_sample_folder}/{class_str}/{video_basename}"
    os.makedirs(sample_folder, exist_ok=True)
        
    for image_idx, image_array in enumerate(video_array):
        image_path = f"{sample_folder}/{image_idx:03}.png"
        Image.fromarray(image_array).save(image_path)

1402it [17:10,  1.36it/s]


In [5]:
from glob import glob
from tqdm import tqdm
sampling_folder = "../data/1. Sampling png"

video_path_list = glob(f"{common_raw_data_folder}/*/*")
target_fps = 5

frame_per_sample = 8
stride = 6
for idx, video_path in tqdm(enumerate(video_path_list)):
    
    class_str = get_parent_dir_name(video_path, level=1)
    video_basename = get_parent_dir_name(video_path, level=0).replace(".avi", "")
    video_array = skvideo.io.vread(video_path)
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_interval = int(round(fps / 5))
    video_array = video_array[0::frame_interval]    
    # video_shape: [frame // 5, 240, 320, 3]
    total_frame = video_array.shape[0]
    sample_folder = f"{common_sample_folder}/{class_str}/{video_basename}"
    
    for frame_idx in range(0, total_frame - frame_per_sample, stride):
        sample_array = video_array[frame_idx:frame_idx + frame_per_sample]
        
        sample_idx_folder = f"{sample_folder}/{frame_idx:03}"
        os.makedirs(sample_idx_folder, exist_ok=True)
        
        for image_idx, image_array in enumerate(sample_array):
            image_path = f"{sample_idx_folder}/{image_idx:03}.png"
            Image.fromarray(image_array).save(image_path)

0it [00:06, ?it/s]


# Filter Unused

In [21]:
common_sample_folder = "../data/1. Sampling npy"
target_sample_list = os.listdir("../data/0-2. Sampled/")

sample_folder_list = glob(f"{common_sample_folder}/*")

for sample_folder in sample_folder_list:
    is_in = False
    for target_sample in target_sample_list:
        if target_sample in sample_folder:
            is_in=True
            break
    if is_in:
        pass
    else:
        shutil.rmtree(sample_folder)

In [5]:
sample_array.shape

(8, 240, 320, 3)

In [15]:
list(range(0, total_frame - frame_per_sample, stride))

[0,
 6,
 12,
 18,
 24,
 30,
 36,
 42,
 48,
 54,
 60,
 66,
 72,
 78,
 84,
 90,
 96,
 102,
 108,
 114,
 120,
 126,
 132,
 138,
 144,
 150]

In [None]:
video_array

In [19]:
video_array.shape

(164, 240, 320, 3)

In [14]:
photo_list[0].shape

(240, 320, 3)

In [12]:
video_array.shape

(164, 240, 320, 3)

In [4]:
import numpy as np
from tensorflow import keras
from tensorflow.keras.layers import Input, Dense, GlobalAveragePooling1D
from src.model.vision_transformer import swin_layers, transformer_layers

In [4]:
input_size = (28, 28, 1) # The image size of the MNIST
patch_size = (2, 2) # Segment 28-by-28 frames into 2-by-2 sized patches, patch contents and positions are embedded
n_labels = 10 # MNIST labels

# Dropout parameters
mlp_drop_rate = 0.01 # Droupout after each MLP layer
attn_drop_rate = 0.01 # Dropout after Swin-Attention
proj_drop_rate = 0.01 # Dropout at the end of each Swin-Attention block, i.e., after linear projections
drop_path_rate = 0.01 # Drop-path within skip-connections

# Self-attention parameters 
# (Fixed for all the blocks in this configuration, but can vary per block in larger architectures)
num_heads = 8 # Number of attention heads
embed_dim = 64 # Number of embedded dimensions
num_mlp = 256 # Number of MLP nodes
qkv_bias = True # Convert embedded patches to query, key, and values with a learnable additive value
qk_scale = None # None: Re-scale query based on embed dimensions per attention head # Float for user specified scaling factor

# Shift-window parameters
window_size = 2 # Size of attention window (height = width)
shift_size = window_size // 2 # Size of shifting (shift_size < window_size)

num_patch_x = input_size[0]//patch_size[0]
num_patch_y = input_size[1]//patch_size[1]

In [6]:
# The input section
IN = Inputs(input_size)
X = IN

# Extract patches from the input tensor
X = transformer_layers.patch_extract(patch_size)(X)

# Embed patches to tokens
X = transformer_layers.patch_embedding(num_patch_x*num_patch_y, embed_dim)(X)

# -------------------- Swin transformers -------------------- #
# Stage 1: window-attention + Swin-attention + patch-merging

for i in range(2):
    
    if i % 2 == 0:
        shift_size_temp = 0
    else:
        shift_size_temp = shift_size

    X = swin_layers.SwinTransformerBlock(dim=embed_dim, num_patch=(num_patch_x, num_patch_y), num_heads=num_heads, 
                             window_size=window_size, shift_size=shift_size_temp, num_mlp=num_mlp, qkv_bias=qkv_bias, qk_scale=qk_scale,
                             mlp_drop=mlp_drop_rate, attn_drop=attn_drop_rate, proj_drop=proj_drop_rate, drop_path_prob=drop_path_rate, 
                             name='swin_block{}'.format(i))(X)
# Patch-merging
#    Pooling patch sequences. Half the number of patches (skip every two patches) and double the embedded dimensions
X = transformer_layers.patch_merging((num_patch_x, num_patch_y), embed_dim=embed_dim, name='down{}'.format(i))(X)

# ----------------------------------------------------------- #

# Convert embedded tokens (2D) to vectors (1D)
X = GlobalAveragePooling1D()(X)

# The output section
OUT = Dense(n_labels, activation='softmax')(X)
# Model configuration
model = keras.models.Model(inputs=[IN,], outputs=[OUT,])

NameError: name 'Inputs' is not defined