In [18]:
from dataset import get_training_data, get_validation_data, get_inference_data
from spatial_transforms import (Compose, Normalize, Resize, CenterCrop,
                                CornerCrop, MultiScaleCornerCrop,
                                RandomResizedCrop, RandomHorizontalFlip,
                                ToTensor, ScaleValue, ColorJitter,
                                PickFirstChannels)
from temporal_transforms import (LoopPadding, TemporalRandomCrop,
                                 TemporalCenterCrop, TemporalEvenCrop,
                                 SlidingWindow, TemporalSubsampling)

from utils import Logger, worker_init_fn, get_lr
from mean import get_mean_std

import os
import numpy as np
import torch.multiprocessing as mp
import matplotlib.pyplot as plt

from pathlib import Path
from PIL import Image
from tqdm import tqdm

import torch
import random
import json
import cv2

In [100]:
dataset = 'workoutform'
value_scale = 1

mean, std = get_mean_std(value_scale, dataset='0.5')

normalize = Normalize(mean, std)
scale = ScaleValue(value_scale)
spatial_transform = Compose([Resize(size=(320, 320), interpolation=Image.BILINEAR),
    RandomHorizontalFlip(p=0.5),
    ToTensor(),
    ScaleValue(1.)
])
temporal_transform = Compose([TemporalCenterCrop(16)])

video_path = Path('data/workoutform_videos/jpg/')

for blaat in video_path.iterdir():
    assert os.path.exists(blaat), f'{blaat} does not exist'
annotation_path = Path('data/workoutform.json')
input_type = 'rgb'
file_type = 'jpg'
train_data = get_training_data(video_path,
                               annotation_path,
                               dataset, 
                               input_type, 
                               file_type,
                               spatial_transform,
                               temporal_transform)

val_data, collate_fn = get_validation_data(video_path, 
                                  annotation_path,          
                                  dataset, 
                                  input_type, 
                                  file_type,
                                  spatial_transform,
                                  temporal_transform)

test_data, _ = get_inference_data(video_path, 
                                  annotation_path,     
                                  dataset, 
                                  input_type, 
                                  file_type,
                                  'test',
                                  spatial_transform,
                                  temporal_transform)

train_loader = torch.utils.data.DataLoader(train_data,
                                           batch_size=128,
                                           shuffle=True,
                                           num_workers=0,
                                           pin_memory=True,
                                           worker_init_fn=worker_init_fn)

# val_loader = torch.utils.data.DataLoader(val_data,
#                                         batch_size=128,
#                                              shuffle=False,
#                                              num_workers=0,
#                                              pin_memory=True,
#                                              sampler=None,
#                                              worker_init_fn=worker_init_fn)


# test_loader = torch.utils.data.DataLoader(test_data,
#                                          batch_size=128,
#                                          shuffle=False,
#                                          num_workers=0,
#                                          pin_memory=True,
#                                          worker_init_fn=worker_init_fn)

dataset loading [0/1136]
dataset loading [227/1136]
dataset loading [454/1136]
dataset loading [681/1136]
dataset loading [908/1136]
dataset loading [1135/1136]
dataset loading [0/243]
dataset loading [48/243]
dataset loading [96/243]
dataset loading [144/243]
dataset loading [192/243]
dataset loading [240/243]
dataset loading [0/244]
dataset loading [48/244]
dataset loading [96/244]
dataset loading [144/244]
dataset loading [192/244]
dataset loading [240/244]


In [101]:
print(f'Training dataset size: {len(train_data)}')
for xb, yb in train_loader:
    print(xb.shape, yb.shape)
    break

Training dataset size: 1136
torch.Size([128, 3, 16, 320, 320]) torch.Size([128])


In [102]:
def scale_to_255(image):
    # Ensure float data type for calculations
    image = image.astype(float)
    
    # Get the minimum and maximum pixel values
    min_val = np.min(image)
    max_val = np.max(image)
    
    # Scale to 0-255
    scaled = (image - min_val) * (255.0 / (max_val - min_val))
    
    # Convert back to uint8
    return scaled.astype(np.uint8)

for x, y in train_data:
    frames = x.numpy()
    break


In [103]:
frames.shape

(3, 16, 320, 320)

In [104]:
a = frames
b = np.transpose(a, (1,2,3,0))
b = b[0]

np.max(b, axis=(0, 1))

array([1.        , 1.        , 0.98039216], dtype=float32)

In [105]:
mean = np.array([0.41715325, 0.39402192, 0.35711448])
std = np.array([0.25924891, 0.24782488, 0.23383827])
c = (b - mean) / std

c

array([[[ 2.1271997 ,  2.22365132, -0.77251635],
        [ 2.1271997 ,  2.22365132, -0.80605721],
        [ 2.1271997 ,  2.23947527, -0.88990936],
        ...,
        [ 0.02459477,  0.21400961, -0.42033725],
        [ 0.05484807,  0.24565751, -0.38679639],
        [ 0.06997473,  0.26148146, -0.37002596]],

       [[ 2.1271997 ,  2.22365132, -0.78928678],
        [ 2.1271997 ,  2.23947527, -0.83959807],
        [ 2.1271997 ,  2.23947527, -0.92345022],
        ...,
        [-0.33844491, -0.16576518, -0.77251635],
        [-0.30819161, -0.13411728, -0.75574592],
        [-0.29306495, -0.11829333, -0.73897549]],

       [[ 2.1271997 ,  2.22365132, -0.83959807],
        [ 2.1271997 ,  2.23947527, -0.90667979],
        [ 2.1271997 ,  2.23947527, -0.99053195],
        ...,
        [-0.686358  , -0.54553996, -1.10792499],
        [-0.67123135, -0.52971601, -1.09115456],
        [-0.65610469, -0.51389207, -1.07438413]],

       ...,

       [[ 2.14232635,  2.35024292, -1.39302232],
        [ 2

In [None]:
frames = xb.numpy()[0].reshape(16, 112, 112, -1)

fig, axes = plt.subplots(len(frames) // 4, 4, figsize=(20, 40))
fps = 30.

# Plot each frame
for i, row in enumerate(axes):
    for j, ax in enumerate(row):
        frame = i*4 + j
        time = frame / fps
        title = f'Frame: {frame}'
        # if time >= errors[key][0][0] and time <= errors[key][0][1]:
        #     title = f'{title}\nKnees Inward {time:.2f}'

        ax.set_title(title)
        ax.imshow(cv2.cvtColor(frames[frame], cv2.COLOR_BGR2RGB))
        ax.axis('off')

# Display the plot
plt.show()

In [None]:
yb

In [None]:
print(f'Validation dataset size: {len(val_data)}')
for xb, yb in val_loader:
    print(xb.shape, yb.shape)

In [None]:
print(f'Test dataset size: {len(test_data)}')
for xb, yb in test_loader:
    print(xb.shape, yb.shape)

In [None]:
class SupaFakeDataset(torch.utils.data.Dataset):
    def __init__(self, size):
        self.size = size
    
    def __getitem__(self, index):
        x = np.random.random((3,8,112,112))
        y = random.randint(0, 51)
        return x, y
    
    def __len__(self):
        return self.size

In [None]:
train_data = SupaFakeDataset(10000)

for x, y in train_data:
    print(x.shape, y)
    break

In [None]:
!df -h /dev/shm

In [None]:
!ipcs -lm

In [None]:
print(f'Sharing strategy: "{mp.get_sharing_strategy()}"')
train_loader = torch.utils.data.DataLoader(train_data,
                                           batch_size=4,
                                           shuffle=True,
                                           num_workers=4,
                                           pin_memory=True)

In [None]:
for xb, yb in train_loader:
    print(xb.shape, yb.shape)

In [5]:
from datasets.videodataset import get_database

In [6]:
data_path = Path('data/workoutform.json')
with annotation_path.open('r') as f:
    data = json.load(f)

video_ids, video_paths, annotations = get_database(data, 'testing', Path('data'), (lambda root_path, label, video_id:
                                                           root_path / video_id))

In [7]:
len(video_ids)

244

In [20]:
def get_class_weights(data_path):
    with annotation_path.open('r') as f:
        data = json.load(f)
        
    

counts = { 'kie': 0, 'kfe': 0, 'correct': 0 }
for key, value in data['database'].items():
    label = value['annotations']['label']
    counts[label] += 1
print(counts)
total = 0
for key, value in counts.items():
    total += value

for key, value in counts.items():
    print(f'{key}:\t {value/total}')
    


{'kie': 232, 'kfe': 917, 'correct': 474}
kie:	 0.14294516327788045
kfe:	 0.5650030807147258
correct:	 0.2920517560073937


In [22]:
len(data['database'].items())

1623

In [30]:
weights = np.array([value/total for value in counts.values()])
print(weights)
weights = 1.0 / weights
print(weights)
weights = weights / np.sum(weights)
print(weights)
print(np.sum(weights))

[0.14294516 0.56500308 0.29205176]
[6.99568966 1.76990185 3.42405063]
[0.57390443 0.1451972  0.28089837]
1.0


# Compute mean and std

In [6]:
from PIL import Image
from glob import glob
from concurrent.futures import ProcessPoolExecutor

def process_images(paths):
    images = []
    for path in tqdm(paths):
        image = np.array(Image.open(path).resize((240, 240)))
        images.append(image)
    
    return images

all_paths = glob('data/workoutform_videos/jpg/**/*.jpg')
result = process_images(all_paths)

images_np = np.stack(result)

 54%|█████▍    | 102511/189547 [07:42<06:32, 221.54it/s]


KeyboardInterrupt: 

In [None]:
images_scaled = images_np / 255

In [None]:
print('start calculating mean and std')
mean = np.mean(images_scaled, axis=(0, 1, 2))  # Mean for each channel
print("Mean:", mean)
std = np.std(images_scaled, axis=(0, 1, 2))    # Std for each channel
print("Std:", std)

In [None]:
print('hello')

In [106]:
import torch
from torch.nn import CrossEntropyLoss
class_weights = torch.tensor([1,2,3])
criterion = CrossEntropyLoss(weight=class_weights)