In [1]:
import cv2
import os
import os.path as osp
import matplotlib.pyplot as plt
import numpy as np
import pickle
import pandas as pd

In [2]:
def get_frames(path, fps=None):
    cap = cv2.VideoCapture(path)

    time_increment = (
        1/fps 
        if fps is not None 
        else None
    )

    res = []
    success = 1
    sec = 0
    while success:
        if time_increment is not None:
            sec += time_increment
            cap.set(cv2.CAP_PROP_POS_MSEC, 1000*sec)
        success, image = cap.read()
        if success:
            res.append(image)

    return res


def save_frames(
    frames, 
    dir, 
    prefix="frame"
):
    for i, frame in enumerate(frames):
        cv2.imwrite(str(osp.join(dir, f"{prefix}_{i}.jpg")), frame)


def to_gray(frames):
    res = [cv2.cvtColor(f, cv2.COLOR_BGR2GRAY) for f in frames]
    return res


def to_scale(frames, scale=0.5):
    res = []
    for f in frames:
        width = int(f.shape[1] * scale)
        height = int(f.shape[0] * scale)
        dim = (width, height)
        resized = cv2.resize(f, dim, interpolation=cv2.INTER_AREA)
        res.append(resized)
        
    return res


def read_pickle(dir):
    with open(dir, 'rb') as handle:
        b = pickle.load(handle)
    return b


def write_pickle(dir, data):
    with open(dir, 'wb') as handle:
        pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL)
        

def to_pickle_name(file):
    return file.split(".")[0] + ".pkl"


def flatten(frames):
    return [f.flatten() for f in frames]

In [3]:
def to_fps_gray_scale(
    base_dir,
    save_dir,
    files,
    fps,
    scale,
    overwrite=False,
):
    count = len(files)
    prev_progress = 0
    for i, file in enumerate(files):
        save_file = osp.join(save_dir, to_pickle_name(file))
        if osp.exists(save_file) and not overwrite:
            continue
        
        path = osp.join(base_dir, file)
        res = get_frames(path, fps)
        gray = to_gray(res)
        resized_gray = to_scale(gray, scale)
        
        write_pickle(save_file, resized_gray)
        
        cur_progress = int((i+1)*100/count)
        if cur_progress >= prev_progress + 2:
            print(f"progress: {cur_progress}%")
            prev_progress = cur_progress
        

def get_files(dir, format="mp4"):
    res = []
    for filename in os.listdir(dir):
        if filename.split(".")[-1] == format:
            res.append(filename)
    return res


def to_actions(f):
    words = f.split("_")
    res = []
    for w in words:
        if w.startswith("y") and w[1:].isnumeric():
            break
        res.append(w)
    
    return res


def to_df(
    pickle_dir,
    pickle_files
):
    res = []
    for f in pickle_files:
        frames = read_pickle(osp.join(pickle_dir, f))
        frames = flatten(frames)
        actions = to_actions(f)
        count = len(actions)
        res.append((frames, actions, count))

    df = pd.DataFrame(data=dict(zip(["embedding", "label", "count"], np.transpose(res))))
    df.to_pickle(save_path)
    
    return df

In [4]:
base_dir = "C:/Users/aphri/Documents/t0002/pycharm/data/action_dataset0003"
save_dir = "C:/Users/aphri/Documents/t0002/pycharm/data/ar_fps10_gray_scale3/pickle"

In [None]:
files = get_files(base_dir)
    
to_fps_gray_scale(
    base_dir=base_dir,
    save_dir=save_dir,
    files=files,
    fps=10,
    scale=0.3,
)

progress: 2%
progress: 4%
progress: 6%
progress: 8%
progress: 10%
progress: 12%
progress: 14%
progress: 16%
progress: 18%
progress: 20%
progress: 22%
progress: 24%
progress: 26%
progress: 28%
progress: 30%
progress: 32%
progress: 34%
progress: 36%
progress: 38%
progress: 40%
progress: 42%
progress: 44%
progress: 46%
progress: 48%
progress: 50%


In [None]:
save_path = "C:/Users/aphri/Documents/t0002/pycharm/data/ar_fps10_gray_scale3/df.pkl"
pickle_files = get_files(save_dir, "pkl")

df = to_df(
    save_dir,
    pickle_files
)

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
df.to_pickle(save_path)

In [None]:
df2 = pd.read_pickle(save_path)

In [None]:
df2.head()

In [None]:
df2.tail()

In [None]:
df2["embedding"][0][0].shape