In [None]:
!pip install -r requirements.txt
!pip install opencv-python
!pip install mediapipe

## Main

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split

from lib.config import *

import vector_quantize_pytorch as vq
from lib.encoder.vqvae import VQVAE_POSE
from lib.utils.dataset import get_dataset
from lib.train.autoencoder import AutoTrainer
from lib.data.dataset import PoseDistanceDataset
from lib.encoder.cnn import CNN3dEncoder, CNN3dDecoder

In [2]:
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

MODEL_ENCODER = CNN3dEncoder(
    model_name=GLOBAL_CONFIG.MODEL_ENCODER_NAME,
    conv_layers=GLOBAL_CONFIG.MODEL_ENCODER_CONVOLUTIONAL_LAYERS,
    linear_layers=GLOBAL_CONFIG.MODEL_ENCODER_LINEAR_LAYERS,
    out_channels=GLOBAL_CONFIG.MODEL_ENCODER_OUT_CHANNEL,
    input_size=GLOBAL_CONFIG.INPUT_DIM,
    output_size=GLOBAL_CONFIG.MODEL_VQ_EMBED_DIM,
    channel_size=GLOBAL_CONFIG.INPUT_CHANNELS,
    depth_size=GLOBAL_CONFIG.FRAME_WINDOW,
    log=False,
)

MODEL_DECODER = CNN3dDecoder(
    model_name=GLOBAL_CONFIG.MODEL_DECODER_NAME,
    linear_layers=GLOBAL_CONFIG.MODEL_DECODER_LINEAR_LAYERS,
    conv_transpose_layers=GLOBAL_CONFIG.MODEL_DECODER_CONVOLUTIONAL_LAYERS,
    in_channels=GLOBAL_CONFIG.MODEL_ENCODER_OUT_CHANNEL,
    linear_input=GLOBAL_CONFIG.MODEL_VQ_EMBED_DIM,
    input_size=MODEL_ENCODER.output_size, # This is for reshaping into encoder before linear layers
    output_size=(GLOBAL_CONFIG.INPUT_CHANNELS, GLOBAL_CONFIG.FRAME_WINDOW, GLOBAL_CONFIG.INPUT_DIM[3], GLOBAL_CONFIG.INPUT_DIM[4]),
    log=False
)

VQVAE = vq.ResidualVQ(
    dim=GLOBAL_CONFIG.MODEL_VQ_EMBED_DIM,
    codebook_size=GLOBAL_CONFIG.MODEL_VQ_VOCAB,
    num_quantizers=GLOBAL_CONFIG.MODEL_VQ_CODEBOOK,
    codebook_dim=GLOBAL_CONFIG.MODEL_VQ_EMBED_DIM,
)

MODEL_VQVAE = VQVAE_POSE(
    encoder=MODEL_ENCODER,
    decoder=MODEL_DECODER,
    vq_vae=VQVAE,
)

encoder_conv_0 {'in_channels': 2, 'out_channels': 10, 'kernel_size': (1, 10, 10), 'stride': (1, 1, 1), 'padding': (1, 1, 1), 'batch_norm': True, 'activation': 'gelu', 'pooling': 'max', 'pooling_kernel_size': (1, 2, 2), 'pooling_stride': 1}
encoder_conv_1 {'in_channels': 10, 'out_channels': 5, 'kernel_size': (1, 10, 10), 'stride': (1, 1, 1), 'padding': (1, 1, 1), 'batch_norm': True, 'activation': 'gelu', 'pooling': 'max', 'pooling_kernel_size': (1, 2, 2), 'pooling_stride': 1}
encoder_conv_2 {'in_channels': 5, 'out_channels': 5, 'kernel_size': (1, 10, 10), 'stride': (1, 1, 1), 'padding': (1, 1, 1), 'batch_norm': True, 'activation': 'gelu', 'pooling': 'max', 'pooling_kernel_size': (1, 2, 2), 'pooling_stride': 1}


encoder_linear_0 {'in_features': 403155, 'out_features': 512, 'activation': 'gelu', 'dropout': 0.5}
encoder_linear_1 {'in_features': 512, 'out_features': 768, 'activation': 'gelu', 'dropout': 0.5}
decoder_linear_out_0: {'in_features': 768, 'out_features': 512, 'activation': 'gelu', 'dropout': 0.5}
decoder_linear_out_1: {'in_features': 512, 'out_features': 403155, 'activation': 'gelu', 'dropout': 0.5}
decoder_conv_out_0: {'in_channels': 5, 'out_channels': 3, 'kernel_size': (3, 5, 5), 'stride': (1, 1, 1), 'padding': (1, 1, 1), 'output_padding': (1, 1, 1), 'activation': 'gelu', 'pooling': 'avg', 'pooling_kernel_size': (2, 2, 2), 'pooling_stride': 1}
decoder_conv_out_1: {'in_channels': 3, 'out_channels': 3, 'kernel_size': (4, 5, 5), 'stride': (1, 2, 2), 'padding': (1, 1, 1), 'output_padding': (1, 1, 1), 'activation': 'gelu', 'pooling_kernel_size': (2, 2, 2), 'pooling_stride': 1}
decoder_conv_out_2: {'in_channels': 3, 'out_channels': 2, 'kernel_size': (3, 15, 15), 'stride': (1, 2, 2), 'padd

In [3]:
train_dataset, eval_dataset = get_dataset(
    DATASET_PATH='dataset/adjacency/',
    DATASET_EXTENSION='.npy',
    DATASET_ENCODING='utf-8',
    DATA_DISTRIBUTION='80-20',
    DATASET_CONFIG={
        'window': GLOBAL_CONFIG.FRAME_WINDOW,
        'depth': GLOBAL_CONFIG.INPUT_CHANNELS
    },
    RANDOM_STATE=42
)

Data size: 10


100%|██████████| 8/8 [00:00<00:00, 110.16it/s]
100%|██████████| 2/2 [00:00<00:00, 147.23it/s]


In [4]:
EXPERIMENT_PATH = 'experiments'

In [5]:
import os
from datetime import datetime
EXPERIMENT_NAME = str(datetime.now())
os.mkdir(f'{EXPERIMENT_PATH}/{EXPERIMENT_NAME}')
os.mkdir(f'{EXPERIMENT_PATH}/{EXPERIMENT_NAME}/model')
os.mkdir(f'{EXPERIMENT_PATH}/{EXPERIMENT_NAME}/logs')
EXPERIMENT_PATH = EXPERIMENT_PATH + '/' + EXPERIMENT_NAME

In [7]:
trainer = AutoTrainer(
    model=MODEL_VQVAE,
    train_dataset=train_dataset, 
    eval_dataset=eval_dataset,
    batch_size=GLOBAL_CONFIG.BATCH_SIZE,
    epochs=GLOBAL_CONFIG.NUM_EPOCHS,
    learning_rate=GLOBAL_CONFIG.LEARNING_RATE,
    step_size=GLOBAL_CONFIG.STEP_SIZE,
    gamma=GLOBAL_CONFIG.GAMMA,
    device='cpu',
    start_epoch=0,
    num_codebooks=GLOBAL_CONFIG.MODEL_VQ_CODEBOOK,
    model_path=f'{EXPERIMENT_PATH}/model/model-0.pt',
    log_dir=f'{EXPERIMENT_PATH}/logs/logs.json'
)

trainer.train()

[34m[1mwandb[0m: Currently logged in as: [33mkarahan-sahin[0m ([33mboun-pilab[0m). Use [1m`wandb login --relogin`[0m to force relogin


  0%|          | 0/100 [00:00<?, ?it/s]

Epoch: 0




: 

In [None]:
# Infer model out
from lib.utils.infer import *

df = get_quantization(MODEL_VQVAE, eval_dataset)

dump_quantization(
    df, 
    num_quantizers=GLOBAL_CONFIG.MODEL_VQ_CODEBOOK, 
    video_path='dataset/corpus', 
    quantization_path='analyze/quantization'
)

### 1. Pose Inference

In [2]:
SAMPLE = 'dataset/corpus/ABARTMAK_0.mp4'
SAMPLE_POSE = get_pose_estimation(SAMPLE)














INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [3]:
import pandas as pd

def get_pose_array(SAMPLE_POSE):
    """Converts the pose data into a numpy array
    """

    POSE_RAW = pd.DataFrame(SAMPLE_POSE['pose'])
    RIGHT_HAND_RAW = pd.DataFrame(SAMPLE_POSE['right'])
    LEFT_HAND_RAW = pd.DataFrame(SAMPLE_POSE['left'])

    POSE_DF = {}

    for col in POSE_RAW.columns:
        POSE_DF[ 'POSE_' + col + '_X'] = POSE_RAW[col].apply(lambda x: x[0])
        POSE_DF[ 'POSE_' + col + '_Y'] = POSE_RAW[col].apply(lambda x: x[1])
        POSE_DF[ 'POSE_' + col + '_Z'] = POSE_RAW[col].apply(lambda x: x[2])
        # POSE_DF[col + '_viz'] = POSE_RAW[col].apply(lambda x: x[3])

    for col in RIGHT_HAND_RAW.columns:
        POSE_DF[ 'RIGHT_' + col + '_X' ] = RIGHT_HAND_RAW[col].apply(lambda x: x[0])
        POSE_DF[ 'RIGHT_' + col + '_Y' ] = RIGHT_HAND_RAW[col].apply(lambda x: x[1])
        POSE_DF[ 'RIGHT_' + col + '_Z' ] = RIGHT_HAND_RAW[col].apply(lambda x: x[2])
        # POSE_DF['RIGHT_' + col + '_viz'] = RIGHT_HAND_RAW[col].apply(lambda x: x[3])

    for col in LEFT_HAND_RAW.columns:
        POSE_DF[ 'LEFT_' + col + '_X' ] = LEFT_HAND_RAW[col].apply(lambda x: x[0])
        POSE_DF[ 'LEFT_' + col + '_Y' ] = LEFT_HAND_RAW[col].apply(lambda x: x[1])
        POSE_DF[ 'LEFT_' + col + '_Z' ] = LEFT_HAND_RAW[col].apply(lambda x: x[2])
        # POSE_DF['LEFT_' + col + '_viz'] = LEFT_HAND_RAW[col].apply(lambda x: x[3])

    POSE_DF = pd.DataFrame(POSE_DF)

    return POSE_DF

In [4]:
POSE_DF = get_pose_array(SAMPLE_POSE[0])

In [6]:
POSE_DF.columns.to_list()

['POSE_NOSE_X',
 'POSE_NOSE_Y',
 'POSE_NOSE_Z',
 'POSE_LEFT_EYE_INNER_X',
 'POSE_LEFT_EYE_INNER_Y',
 'POSE_LEFT_EYE_INNER_Z',
 'POSE_LEFT_EYE_X',
 'POSE_LEFT_EYE_Y',
 'POSE_LEFT_EYE_Z',
 'POSE_LEFT_EYE_OUTER_X',
 'POSE_LEFT_EYE_OUTER_Y',
 'POSE_LEFT_EYE_OUTER_Z',
 'POSE_RIGHT_EYE_INNER_X',
 'POSE_RIGHT_EYE_INNER_Y',
 'POSE_RIGHT_EYE_INNER_Z',
 'POSE_RIGHT_EYE_X',
 'POSE_RIGHT_EYE_Y',
 'POSE_RIGHT_EYE_Z',
 'POSE_RIGHT_EYE_OUTER_X',
 'POSE_RIGHT_EYE_OUTER_Y',
 'POSE_RIGHT_EYE_OUTER_Z',
 'POSE_LEFT_EAR_X',
 'POSE_LEFT_EAR_Y',
 'POSE_LEFT_EAR_Z',
 'POSE_RIGHT_EAR_X',
 'POSE_RIGHT_EAR_Y',
 'POSE_RIGHT_EAR_Z',
 'POSE_MOUTH_LEFT_X',
 'POSE_MOUTH_LEFT_Y',
 'POSE_MOUTH_LEFT_Z',
 'POSE_MOUTH_RIGHT_X',
 'POSE_MOUTH_RIGHT_Y',
 'POSE_MOUTH_RIGHT_Z',
 'POSE_LEFT_SHOULDER_X',
 'POSE_LEFT_SHOULDER_Y',
 'POSE_LEFT_SHOULDER_Z',
 'POSE_RIGHT_SHOULDER_X',
 'POSE_RIGHT_SHOULDER_Y',
 'POSE_RIGHT_SHOULDER_Z',
 'POSE_LEFT_ELBOW_X',
 'POSE_LEFT_ELBOW_Y',
 'POSE_LEFT_ELBOW_Z',
 'POSE_RIGHT_ELBOW_X',
 'POSE_RIGH

In [11]:
POSE_DF = POSE_DF.replace(np.nan,0)

In [29]:
def get_matrices(POSE_DF):
    """Converts the pose data into a numpy array of distance matrices
    """
    x_cols = [col for col in POSE_DF.columns if col.endswith('_X')]
    y_cols = [col for col in POSE_DF.columns if col.endswith('_Y')]
    z_cols = [col for col in POSE_DF.columns if col.endswith('_Z')]

    frames = []
    for i in range(1, POSE_DF.shape[0]):
        x_row = POSE_DF[x_cols].iloc[i].to_numpy()
        y_row = POSE_DF[y_cols].iloc[i].to_numpy()
        z_row = POSE_DF[z_cols].iloc[i].to_numpy()

        def get_difference_matrix(row):
            m, n = np.meshgrid(row, row)
            out = m-n
            return out

        x_diff = get_difference_matrix(x_row)
        y_diff = get_difference_matrix(y_row)
        z_diff = get_difference_matrix(z_row)

        frame = np.stack([x_diff, y_diff, z_diff], axis=2)
        frames.append(frame)

    frames = np.stack(frames, axis=0)
    return frames

In [35]:
OUT_PATH = 'dataset/adjacency'
POSE_PATH = 'dataset/pose'

for file in tqdm(glob.glob('dataset/pose/*.npy')):
    if os.path.exists(os.path.join(OUT_PATH, os.path.basename(file).replace('.mp4', '.npy'))):
        # print('Skipping', file)
        continue
    with open(file, 'rb') as f:
        array = np.load(f, allow_pickle=True)
        # replace nan with 0 
        array = np.nan_to_num(array)
    pose_df = pd.DataFrame(array, columns=POSE_DF.columns)
    pose_df = pose_df.replace(np.nan,0)
    MATRICES = get_matrices(pose_df)
    # print(MATRICES.shape)
    np.save(os.path.join(OUT_PATH, os.path.basename(file).replace('.mp4', '.npy')), MATRICES)


  0%|          | 0/3395 [00:00<?, ?it/s]

In [37]:
print(pd.DataFrame(x_diff, columns=[col for col in POSE_DF.columns if col.endswith('_X')], index=[col for col in POSE_DF.columns if col.endswith('_X')]).to_markdown())

|                           |   POSE_NOSE_X |   POSE_LEFT_EYE_INNER_X |   POSE_LEFT_EYE_X |   POSE_LEFT_EYE_OUTER_X |   POSE_RIGHT_EYE_INNER_X |   POSE_RIGHT_EYE_X |   POSE_RIGHT_EYE_OUTER_X |   POSE_LEFT_EAR_X |   POSE_RIGHT_EAR_X |   POSE_MOUTH_LEFT_X |   POSE_MOUTH_RIGHT_X |   POSE_LEFT_SHOULDER_X |   POSE_RIGHT_SHOULDER_X |   POSE_LEFT_ELBOW_X |   POSE_RIGHT_ELBOW_X |   POSE_LEFT_WRIST_X |   POSE_RIGHT_WRIST_X |   POSE_LEFT_PINKY_X |   POSE_RIGHT_PINKY_X |   POSE_LEFT_INDEX_X |   POSE_RIGHT_INDEX_X |   POSE_LEFT_THUMB_X |   POSE_RIGHT_THUMB_X |   POSE_LEFT_HIP_X |   POSE_RIGHT_HIP_X |   POSE_LEFT_KNEE_X |   POSE_RIGHT_KNEE_X |   POSE_LEFT_ANKLE_X |   POSE_RIGHT_ANKLE_X |   POSE_LEFT_HEEL_X |   POSE_RIGHT_HEEL_X |   POSE_LEFT_FOOT_INDEX_X |   POSE_RIGHT_FOOT_INDEX_X |   RIGHT_WRIST_X |   RIGHT_THUMB_CMC_X |   RIGHT_THUMB_MCP_X |   RIGHT_THUMB_IP_X |   RIGHT_THUMB_TIP_X |   RIGHT_INDEX_FINGER_MCP_X |   RIGHT_INDEX_FINGER_PIP_X |   RIGHT_INDEX_FINGER_DIP_X |   RIGHT_INDEX_FINGER_TIP_X

In [None]:
import glob
import numpy as np
from tqdm.notebook import tqdm
import warnings

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    ARRAY_PATH = 'dataset/pose/'
    for datapath in tqdm(glob.glob('dataset/corpus/*.mp4')):
        print(datapath)
        pose = get_pose_estimation(datapath)
        pose_array = get_pose_array(pose)
        print(pose_array.shape, datapath)
        dname = datapath.split('/')[-1].replace('.mp4', '.npy')
        with open(ARRAY_PATH+'/'+dname, 'wb') as f:
            np.save(f, pose_array)

### 2. Graph Autoencoder Training

In [2]:
DATA_PATH = 'dataset/pose/'
data = glob.glob(DATA_PATH + '*.npy')
X_train, X_val = train_test_split(data, test_size=0.2, random_state=42)

In [3]:
train_dataset = PoseDataset(X_train)
train_dataloader = DataLoader(
    train_dataset, 
    batch_size=GLOBAL_CONFIG.BATCH_SIZE, 
    shuffle=True,
)

100%|██████████| 2716/2716 [00:02<00:00, 941.46it/s]


In [4]:
val_dataset = PoseDataset(X_val)
val_dataloader = DataLoader(
    val_dataset, 
    batch_size=GLOBAL_CONFIG.BATCH_SIZE, 
    shuffle=True,
)

100%|██████████| 679/679 [00:00<00:00, 880.56it/s]


In [5]:
torch.cuda.is_available()

  return torch._C._cuda_getDeviceCount() > 0


False

In [6]:
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

MODEL_ENCODER = FFNEncoder(
    input_dim=GLOBAL_CONFIG.MODEL_ENCODER_INPUT_DIM,
    hidden_dim=GLOBAL_CONFIG.MODEL_ENCODER_HIDDEN_DIM,
    output_dim=GLOBAL_CONFIG.MODEL_ENCODER_OUTPUT_DIM,
)

MODEL_DECODER = FFNDecoder(
    input_dim=GLOBAL_CONFIG.MODEL_DECODER_INPUT_DIM,
    hidden_dim=GLOBAL_CONFIG.MODEL_DECODER_HIDDEN_DIM,
    output_dim=GLOBAL_CONFIG.MODEL_ENCODER_INPUT_DIM,
)

MODEL_QUANT = ResidualVQ(
    dim = GLOBAL_CONFIG.MODEL_VQ_EMBED_DIM,
    stochastic_sample_codes=True,
    num_quantizers=1,      # specify number of quantizers
    codebook_size=GLOBAL_CONFIG.MODEL_VQ_NUM_EMBS,    # codebook size           
    kmeans_init=True,   # set to True
    kmeans_iters=100     # number of kmeans iterations to calculate the centroids for the codebook on init
)

MODEL_VQVAE = VQVAE(
    encoder=MODEL_ENCODER,
    decoder=MODEL_DECODER,
    vq=MODEL_QUANT,
)

trainer = AutoencoderTrainer(
    model=MODEL_VQVAE,
    learning_rate=GLOBAL_CONFIG.LEARNING_RATE,
    train_dataloader=train_dataloader, 
    val_dataloader=val_dataloader,
    num_epochs=GLOBAL_CONFIG.NUM_EPOCHS,
    device='cpu',
)

In [None]:
trainer.train()

In [8]:
from tqdm.notebook import tqdm

MODEL_VQVAE.eval()

dfs = []
for train_sample in tqdm(train_dataloader):
    with torch.no_grad():
        quantized, indices, commitment_loss = MODEL_VQVAE(train_sample['array'].float())
        dfs.append(pd.DataFrame({
            'videos': train_sample['token'],
            'labels': indices.detach().cpu().numpy().reshape(-1),
            'frame': train_sample['frame'].detach().cpu().numpy().reshape(-1)
        }))

  0%|          | 0/18911 [00:00<?, ?it/s]

In [9]:
df = pd.concat(dfs)

In [10]:
df

Unnamed: 0,videos,labels,frame
0,TICARET_0.npy,663,8
1,EL_0.npy,663,1
2,KAN_0.npy,375,31
3,EL O╠êPMEK_1.npy,663,26
4,SALDIRMAK_0.npy,282,15
...,...,...,...
0,S╠ğOK _0.npy,96,40
1,SOHBET ETMEK_0.npy,882,6
2,YAS╠ğAMAK_0.npy,96,47
3,GU╠êC╠ğLU╠ê _1.npy,282,7


In [11]:
df.labels.value_counts()

labels
629    46223
96     43641
882    23308
663    21702
282    11906
375     4364
0        141
Name: count, dtype: int64

In [14]:
import cv2
for rec in tqdm(df[df['labels'] == 375].to_dict(orient='records')[:100]):
    # save frame video to disk
    video = rec['videos'].split('.')[0]
    video_path = f"dataset/corpus/{video}.mp4"
    frame_idx = rec['frame']
    label = rec['labels']
    
    cap = cv2.VideoCapture(video_path)
    ret, frame = cap.read()
    
    if not os.path.exists(f'analyze/quantization/{label}'): os.mkdir(f'analyze/quantization/{label}')

    for i in range(frame_idx):
        ret, frame = cap.read()
        if i == frame_idx-1:
            cv2.imwrite(f'analyze/quantization/{label}/{video}_{frame_idx}.jpg', frame)   

  0%|          | 0/100 [00:00<?, ?it/s]

###  3. 3D-CNN Training 

In [15]:
DATA_PATH = 'dataset/adjacency/'
data = glob.glob(DATA_PATH + '*.npy')[:100]
X_train, X_val = train_test_split(data, test_size=0.2, random_state=42)

In [16]:
train_dataset = PoseDistanceDataset(X_train)
train_dataloader = DataLoader(
    train_dataset, 
    batch_size=GLOBAL_CONFIG.BATCH_SIZE, 
    shuffle=True,
    collate_fn=PoseDistanceDataset.collate_fn
)

100%|██████████| 80/80 [00:00<00:00, 249.82it/s]


In [17]:
val_dataset = PoseDistanceDataset(X_val)
val_dataloader = DataLoader(
    val_dataset, 
    batch_size=GLOBAL_CONFIG.BATCH_SIZE, 
    shuffle=True,
    collate_fn=PoseDistanceDataset.collate_fn
)

100%|██████████| 20/20 [00:00<00:00, 187.17it/s]


In [18]:
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

MODEL_ENCODER = CNNEncoder(
    input_channels=3,
)

MODEL_DECODER = CNNDecoder(
    output_channels=3,
)

MODEL_QUANT = ResidualVQ(
    dim = GLOBAL_CONFIG.MODEL_VQ_EMBED_DIM,
    stochastic_sample_codes=True,
    num_quantizers=1,      # specify number of quantizers
    codebook_size=GLOBAL_CONFIG.MODEL_VQ_NUM_EMBS,    # codebook size           
    kmeans_init=True,   # set to True
    kmeans_iters=10     # number of kmeans iterations to calculate the centroids for the codebook on init
)

MODEL_VQVAE = VQVAE(
    encoder=MODEL_ENCODER,
    decoder=MODEL_DECODER,
    vq=MODEL_QUANT,
)

trainer = AutoencoderTrainer(
    model=MODEL_VQVAE,
    learning_rate=GLOBAL_CONFIG.LEARNING_RATE,
    train_dataloader=train_dataloader, 
    val_dataloader=val_dataloader,
    num_epochs=GLOBAL_CONFIG.NUM_EPOCHS,
    device='cpu',
)

In [None]:
trainer.train()

In [10]:
from tqdm.notebook import tqdm

MODEL_VQVAE.eval()

train_dataloader = DataLoader(
    train_dataset, 
    batch_size=10, 
    shuffle=True,
    collate_fn=train_dataset.collate_fn   
)

dfs = []
for train_sample in tqdm(train_dataloader):
    with torch.no_grad():
        quantized, indices, commitment_loss = MODEL_VQVAE(train_sample['array'].float())

        quant = {
            'videos': train_sample['tokens'],
            'start_idx': train_sample['start_idx'],
            'end_idx': train_sample['end_idx']
        }
        

        for index in range(indices.shape[1]):
            quant[f'Code_{index}'] = indices[:, index].cpu().numpy()


        dfs.append(pd.DataFrame(quant))

  0%|          | 0/1929 [00:00<?, ?it/s]

torch.Size([10, 5])


ValueError: All arrays must be of the same length

In [22]:
df = pd.concat(dfs)

In [1]:
import json
LOG_IDX=45
with open(f'analyze/quantization/experimental_logs/logs-{LOG_IDX}.json', 'r') as f:
    corpus = json.load(f)

In [2]:
cls = corpus['train']['commit-loss']
import numpy as np
for i in range(len(cls)):
    if cls[str(i)]:
        print(i,np.sum(cls[str(i)]))

4 0.5961245219223201
5 0.5159384731668979
6 0.4879024289548397
7 0.4947575144469738
8 0.543659548740834
9 0.4580083201872185
10 0.44548981124535203
11 0.45211545820347965
12 0.4493164427112788
13 0.445126699982211
14 0.4435033902991563
15 0.4355329726822674
16 0.44574258592911065
17 0.4758904278278351
18 0.4386360328644514
19 0.46825177944265306
20 0.47044387902133167
21 0.4662994877435267
22 0.46805936587043107
23 0.599162164144218
24 0.5104892868548632
25 0.48613795591518283
26 0.49259176501072943
27 0.4915984633844346
28 0.48166503314860165
29 0.520871419692412
30 0.48008685489185154
31 0.4684961619786918
32 0.48307271650992334
33 0.48976475978270173
34 0.4895846757572144
35 0.4980121129192412
36 0.5442749639041722
37 0.4971227194182575
38 0.49992787395603955
39 0.5123899674508721
40 0.5095423641614616
41 0.5381570560857654
42 0.5067722711246461
43 0.5273929920513183
44 0.5057559658307582
45 0.5271707361098379


In [3]:
val = corpus['validation']
df = {}
for keys in ['vocab', 'start_idx', 'end_idx', 'quantization']:
    print(keys)
    print(val[keys][f'{LOG_IDX}'])
    if keys == 'quantization':
        for code in val[keys][f'{LOG_IDX}']:
            df[code] = val[keys][f'{LOG_IDX}'][code]
    else:
        df[keys] = val[keys][f'{LOG_IDX}']

vocab
['DO╠êNMEK_1', 'CAHIL _1', 'MESELA_1', 'TU╠êRBAN_0', 'TU╠êRBAN_1', 'UFAK_2', 'C╠ğARE _0', 'GEC╠ğMIS╠ğ OLSUN_2', 'I╠çNSAN_0', 'VIDEO_2', 'VARMAK_0', 'ENGELLI_0', 'KONUS╠ğMAK_0', 'KANUN_1', 'GEC╠ğMIS╠ğ OLSUN_2', 'HERS╠ğEY _2', 'C╠ğATI_0', 'NAKIS╠ğ_2', 'YARIS╠ğMAK_0', 'SALI _1', 'GU╠êC╠ğLU╠ê _2', 'KUMAS╠ğ_1', 'YAG╠åMUR_0', 'KU╠êC╠ğU╠êLTMEK_0', 'KADAYIF_1', 'OTOBU╠êS_2', 'OLUMLU_0', 'CANI ACIMAK_0', 'MU╠êDU╠êR _2', 'ACEMI _1', 'KARIS╠ğTIRMAK_0', 'OLUMLU_0', 'TERO╠êRIST_0', 'CAHIL _1', 'MASAJ_0', 'OKS╠ğAMAK_0', 'BEBEK_0', 'DIKIS╠ğ DIKMEK_0', 'EVRAK_0', 'BAG╠åLAMAK _0', 'EVRAK_0', 'C╠ğIFTLIK_1', 'KAPALI_2', 'DAG╠åITMAK_2', 'HAYAL ETMEK_0', 'O╠êZGU╠êRLU╠êK_0', 'TAYIN_0', 'OTOMATIK_1', 'YARDIM_0', 'BAS╠ğARISIZ_0', 'I╠çNEK _0', 'TOPRAK_0', 'SU╠êZMEK_0', 'BIC╠ğAK_0', 'I╠çNEK _0', 'KARIS╠ğTIRMAK_0', 'UFAK_2', 'SU╠êZMEK_0', 'MESELA_1', 'OTOMATIK_1', 'KALABALIK_1', 'MAVI_0', 'ALINMAK_1', 'TAKMAK_0', 'HAYAT _0', 'DIKIS╠ğ_1', 'FOTOKOPI_0', 'KONUS╠ğMAK_0', 'BAS╠ğI DO╠êNMEK_0', 'DO╠êNMEK_1', 'CES

In [4]:
import pandas as pd
df = pd.DataFrame(df)
df.start_idx = df.start_idx.astype(int)
df.end_idx = df.end_idx.astype(int)

In [5]:
df.Code_0.value_counts()

Code_0
446     290
31      207
42      196
328     195
1345    191
       ... 
50        2
25        2
41        1
19        1
4         1
Name: count, Length: 64, dtype: int64

In [9]:
df.Code_1.value_counts().head(10)

Code_1
1460    120
1054     90
1362     56
1276     52
1104     37
643      34
1161     34
658      33
82       32
606      30
Name: count, dtype: int64

In [10]:
import cv2
import pandas as pd
from tqdm.notebook import tqdm
from moviepy.editor import VideoFileClip

CODEBOOK = 'Code_1'
CODE_ID = 606           

for rec in tqdm(df[df[CODEBOOK] == CODE_ID].to_dict(orient='records')):
    # save frame video to disk
    video = rec['vocab']
    video_path = f"dataset/corpus/{video}.mp4"
    start_idx = rec['start_idx']
    end_idx = rec['end_idx']
    label = str(rec['Code_1']) + '-' + str(rec['Code_0'])

    cap = cv2.VideoCapture(video_path)
    ret, frame = cap.read()
    
    import os
    if not os.path.exists(f'analyze/quantization/{label}'):
        os.mkdir(f'analyze/quantization/{label}')

    FRAMES = []
    for i in range(end_idx+1):
        ret, frame = cap.read()
        if i >= start_idx and i < end_idx:
            FRAMES.append(frame)

    # write frames to video
    out = cv2.VideoWriter(f'analyze/quantization/{label}/{video}_{start_idx}_{end_idx}.avi', cv2.VideoWriter_fourcc(*'DIVX'), 15, (frame.shape[1], frame.shape[0]))
    for frame in FRAMES:
        out.write(frame)

    out.release()      

    videoClip = VideoFileClip(f"analyze/quantization/{label}/{video}_{start_idx}_{end_idx}.avi")
    videoClip.write_gif(f"analyze/quantization/{label}/{video}_{start_idx}_{end_idx}.gif")

    os.remove(f"analyze/quantization/{label}/{video}_{start_idx}_{end_idx}.avi")

  0%|          | 0/30 [00:00<?, ?it/s]

MoviePy - Building file analyze/quantization/606-9/FOTOKOPI_0_25_50.gif with imageio.




MoviePy - Building file analyze/quantization/606-1252/BELGE_1_37_62.gif with imageio.




MoviePy - Building file analyze/quantization/606-11/MUHTAR_0_29_54.gif with imageio.




MoviePy - Building file analyze/quantization/606-38/YU╠êKLEMEK_1_26_51.gif with imageio.




MoviePy - Building file analyze/quantization/606-48/AYAKKABI _0_35_60.gif with imageio.




MoviePy - Building file analyze/quantization/606-53/AYAKKABI _0_41_66.gif with imageio.




MoviePy - Building file analyze/quantization/606-1252/BELGE_1_38_63.gif with imageio.




MoviePy - Building file analyze/quantization/606-11/YU╠êZME_1_34_59.gif with imageio.




MoviePy - Building file analyze/quantization/606-1222/HESAP _1_47_72.gif with imageio.




MoviePy - Building file analyze/quantization/606-20/EKONOMIK_1_31_56.gif with imageio.




MoviePy - Building file analyze/quantization/606-20/YOG╠åURMAK_0_28_53.gif with imageio.




MoviePy - Building file analyze/quantization/606-9/AYAKKABI _0_25_50.gif with imageio.




MoviePy - Building file analyze/quantization/606-1252/BELGE_1_35_60.gif with imageio.




MoviePy - Building file analyze/quantization/606-9/AYAKKABI _0_27_52.gif with imageio.




MoviePy - Building file analyze/quantization/606-36/EZILMEK_2_31_56.gif with imageio.




MoviePy - Building file analyze/quantization/606-1252/BELGE_1_39_64.gif with imageio.




MoviePy - Building file analyze/quantization/606-53/S╠ğIFA_0_42_67.gif with imageio.




MoviePy - Building file analyze/quantization/606-9/AYAKKABI _0_26_51.gif with imageio.




MoviePy - Building file analyze/quantization/606-36/OLAY _1_27_52.gif with imageio.




MoviePy - Building file analyze/quantization/606-1252/BELGE_1_36_61.gif with imageio.




MoviePy - Building file analyze/quantization/606-36/DERI_2_44_69.gif with imageio.




MoviePy - Building file analyze/quantization/606-11/YARIS╠ğMAK_0_28_53.gif with imageio.




MoviePy - Building file analyze/quantization/606-12/TAKMAK_0_30_55.gif with imageio.




MoviePy - Building file analyze/quantization/606-1252/YAG╠åMUR_0_26_51.gif with imageio.




MoviePy - Building file analyze/quantization/606-20/YOG╠åURMAK_0_27_52.gif with imageio.




MoviePy - Building file analyze/quantization/606-55/AYAKKABI _0_37_62.gif with imageio.




MoviePy - Building file analyze/quantization/606-20/EKONOMIK_1_30_55.gif with imageio.




MoviePy - Building file analyze/quantization/606-20/UFAK_2_31_56.gif with imageio.




MoviePy - Building file analyze/quantization/606-36/TITREMEK_1_25_50.gif with imageio.




MoviePy - Building file analyze/quantization/606-48/OKS╠ğAMAK_0_40_65.gif with imageio.


