In [None]:
from google.colab import drive

drive.mount('/content/drive', force_remount=True)
%ls

Mounted at /content/drive
[0m[01;34mdrive[0m/  [01;34msample_data[0m/


In [None]:
# AUGMENTATIONS
import random
import math
import torch

# Any argument named keypoints should be a 1D tensor of keypoint values, eg keypoint_json['pose_keypoints_2d']

def keypoint_to_coord(keypoints):
  keypoints = keypoints.reshape(-1, 3)
  uncertainty = keypoints[:,2]
  x = keypoints[:,0]
  y = keypoints[:,1]
  return x, y, uncertainty

def coord_to_keypoint(x, y, uncertainty):
  coord_tensor = torch.stack((x, y, uncertainty), dim=1)
  keypoints = coord_tensor.reshape(-1)
  return keypoints

# Rotate around center by a random angle
def rotation(keypoints):
  angle = random.uniform(-0.15, 0.15)
  x, y, uncertainties = keypoint_to_coord(keypoints)
  
  x_rot = (x - 0.5) * math.cos(angle) - (y - 0.5) * math.sin(angle) + 0.5
  y_rot = (y - 0.5) * math.cos(angle) - (x - 0.5) * math.sin(angle) + 0.5
  keypoints = coord_to_keypoint(x_rot, y_rot, uncertainties)
  return keypoints

# 
def squeeze(keypoints):
  x, y, uncertainties = keypoint_to_coord(keypoints)

  width = torch.max(x) - torch.min(x)
  left_squeeze = random.uniform(0, 0.15) * width
  right_squeeze = random.uniform(0, 0.15) * width
  x = (x - left_squeeze) / (width - left_squeeze - right_squeeze)
  keypoints = coord_to_keypoint(x, y, uncertainties)
  return keypoints

def projection(keypoints):
  x, y, uncertainties = keypoint_to_coord(keypoints)
  rot_angle = np.random.uniform(-0.15, 0.15)
  Tx = np.random.uniform(-0.15, 0.15)
  Ty = np.random.uniform(-0.15, 0.15)
  R = torch.tensor([
        [math.cos(rot_angle), -math.sin(rot_angle), Tx],
        [math.sin(rot_angle), math.cos(rot_angle),Ty],
        [0, 0, 1]
  ])

  Sx = np.random.uniform(-0.1, 0.1)
  Sy = np.random.uniform(-0.1, 0.1)
  A = torch.tensor([
    [1, Sy, 0],
    [Sx, 1, 0],
    [0,0,1]
  ])

  p1 = np.random.uniform(-0.0001, 0.0001)
  p2 = np.random.uniform(-0.0001, 0.0001)
  P = torch.tensor([
      [1, 0, 0],
      [0, 1, 0],
      [p1,p2,1]
  ])
  H = R @ A @ P 
  coords = torch.stack((x, y, torch.ones(x.shape[0])))
  new_coords = H @ coords
  new_coords = new_coords[:2, :] / new_coords[2,:]
  x = new_coords[0, :]
  y = new_coords[1, :]
  keypoints_new = coord_to_keypoint(x, y, uncertainties)
  return keypoints_new


def augment(keypoints):
  p = np.random.randint(0, 3)
  if p == 0:
    keypoints = rotation(keypoints)
  elif p == 1:
    keypoints = squeeze(keypoints)
  elif p == 2:
    keypoints = projection(keypoints)
  return keypoints

In [None]:

import json
import pandas as pd


# NORMALIZATION CODE =============================================
def normalize_helper(keypoints):
  x, y, uncertainties = keypoint_to_coord(keypoints)
  x = x.float()
  y = y.float()

  x = (x - torch.mean(x)) / torch.std(x)
  y = (y - torch.mean(y)) / torch.std(y)

  keypoints = coord_to_keypoint(x, y, uncertainties)
  return keypoints


def normalize_keypoints(kp_json):
  left_hand = torch.tensor(kp_json['hand_left_keypoints_2d'])
  right_hand = torch.tensor(kp_json['hand_right_keypoints_2d'])
  face = torch.tensor(kp_json['face_keypoints_2d'])
  body = torch.tensor(kp_json['pose_keypoints_2d'])

  normalized = {}
  normalized['hand_left_keypoints_2d'] = normalize_helper(left_hand).tolist()
  normalized['hand_right_keypoints_2d'] = normalize_helper(right_hand).tolist()
  normalized['face_keypoints_2d'] = normalize_helper(face).tolist()
  normalized['pose_keypoints_2d'] = normalize_helper(body).tolist()

  return normalized


# LOOP THROUGH ALL FILES AND NORMALIZE FOR PRE-PROCESSING

def normalize_all():
  annotations_file = 'drive/MyDrive/CS231A/asllvd_signs_2023_02_16.csv'
  pose_dir = 'drive/MyDrive/CS231A/ASLLVD/output_json/'
  output_dir = 'drive/MyDrive/CS231A/ASLLVD/normalized_json'

  df = pd.read_csv(annotations_file)
  df = df.iloc[9500:]


  for idx, row in df.iterrows():
    pose_path = os.path.join(pose_dir, f'{idx}')
    output_path = os.path.join(output_dir, f'{idx}')

    if not os.path.isdir(pose_path):
      continue
    elif not os.path.isdir(output_path):
      os.makedirs(output_path)

    for filename in os.listdir(pose_path):
      json_path = os.path.join(pose_path, filename)
      f = open(json_path)
      kp_json = json.load(f)['people'][0]
      kp_json = normalize_keypoints(kp_json)

      with open(os.path.join(output_path, filename),'w') as output_f:
        json.dump(kp_json, output_f, ensure_ascii=False, indent=4)


In [None]:
# normalize_all()


In [None]:
# DATALOADER 

import torch
from torch.utils.data import Dataset, DataLoader
import json
import os
import pandas as pd


# TODO: Add keep_uncertainty flag
# TODO: Add explicit stratification
class ASLLVDataset(Dataset):
  def __init__(self, df, pose_dir, 
               transform=None, 
               keep_uncertainty=True,
               normalization=True, 
               train=True):
      self.df = df
      self.pose_dir = pose_dir
      self.transform = transform
      self.keep_uncertainty = keep_uncertainty

  def __len__(self):
      return len(self.df)

  def __getitem__(self, idx):     
      pose_path = os.path.join(self.pose_dir, f'{idx}')

      frame_keypoints = []
      for filename in os.listdir(pose_path):
        # print(f'filename: {filename}')
        json_path = os.path.join(pose_path, filename)
        f = open(json_path)
        kp_json = json.load(f)['people'][0]
        kp_list = [
            kp_json['pose_keypoints_2d'],
            kp_json['face_keypoints_2d'],
            kp_json['hand_left_keypoints_2d'],
            kp_json['hand_right_keypoints_2d']
        ]
        kp_list = [torch.tensor(x) for x in kp_list]
        keypoint_tensor = torch.cat(kp_list)

        if self.train and self.transform:
          keypoint_tensor = augment(keypoints)


        # print('KEYPOINTS==========')
        # print(keypoint_tensor)
        # print('Xs===============')
        # x, y, un = keypoint_to_coord(keypoint_tensor)
        # print(x)
        # print('KEYPOINTS===============')
        # print(coord_to_keypoint(x, y, un))

        if not self.keep_uncertainty:
          keypoint_tensor = keypoint_tensor.reshape(-1, 3)[:,:2].reshape(-1)

        # print(f'keypoint_tensor: {keypoint_tensor.shape}')
        frame_keypoints.append(keypoint_tensor)
      full_pose = torch.stack(frame_keypoints)
      # print(f'full pose: {full_pose.shape}')
      
      label = self.df['main entry gloss label'].iloc[idx]

      if self.transform:
          full_pose = self.transform(full_pose)

      return full_pose, label

def collate_fn(data):
  poses, labels = zip(*data)
  return poses, labels


In [None]:
from sklearn import preprocessing
from sklearn.model_selection import train_test_split


batch_size = 1
shuffle = True
annotations_file = 'drive/MyDrive/CS231A/asllvd_signs_2023_02_16.csv'
pose_dir = 'drive/MyDrive/CS231A/ASLLVD/output_json/'
normal_pose_dir = 'drive/MyDrive/CS231A/ASLLVD/normalized_json/'

#TODO: TEST THAT SPLIT WORKS 
# sTRATIFY PROBABLY BREAKS, REMOVE GLOSSES WITH < 3 EXAMPLES
df = pd.read_csv(annotations_file)
le = preprocessing.LabelEncoder()
le.fit(df['main entry gloss label'])
df['categorical_label'] = le.transform(df['main entry gloss label'])
train, val_test = train_test_split(df, test_size=0.3, stratify=df['categorical_label'])
val, test = train_test_split(val_test, test_size=0.5, stratify=val_test['categorical_label'])

train_ds = ASLLVDataset(train, pose_dir, keep_uncertainty=True)
val_ds = ASLLVDataset(val, pose_dir, keep_uncertainty=True)
test_ds = ASLLVDataset(test, pose_dir, keep_uncertainty=True)

train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=shuffle, collate_fn=collate_fn)
val_dl = DataLoader(val_ds, batch_size=batch_size, shuffle=shuffle, collate_fn=collate_fn)
test_dl = DataLoader(test_ds, batch_size=batch_size, shuffle=shuffle, collate_fn=collate_fn)


ValueError: ignored

In [None]:
df = pd.read_csv(annotations_file)
df = df.loc[:5]

ds = ASLLVDataset(df, pose_dir, keep_uncertainty=True)
dl = DataLoader(ds, batch_size=batch_size, shuffle=shuffle, collate_fn=collate_fn)

In [None]:
for idx, batch in enumerate(train_dl):
  # Each output has shape (batch_size, #frames, keypoint_dim)
  print(batch)
  print(batch[0].shape)
  print(batch[1])
  break
  # print(f'{idx}: }')

In [None]:
%ls drive/MyDrive/CS231A/