In [6]:
!pip install pillow==8.1.0
!pip install matplotlib==3.3.4
!pip install numpy==1.19.3
!pip install opencv-python==4.5.1.48
!pip install tqdm==4.56.0
!pip install requests==2.25.1|

!pip install mediapipe==0.8.3

/bin/bash: -c: line 1: syntax error: unexpected end of file


In [7]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [5]:
!cp -r gdrive/MyDrive/VisionLab2Project/data ./


## Mediapipe Pose

In [9]:
import os
import tqdm
from mediapipe.python.solutions import pose as mp_pose

import os
import torch
import os
import torch
import numpy as np
import torch.utils.data
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import random
import cv2 as cv
from time import time
from glob import glob

random.seed(42)

# Set our device:
if torch.cuda.is_available():
    device = torch.device("cuda:0")
    torch.cuda.set_device(device)
else:
    device = torch.device("cpu")

class PoseTranslate(Dataset):
    def __init__(self, basedir, imgset="train", subset_size=30):
        super(Dataset, self).__init__()

        self.pose_tracker = mp_pose.Pose(upper_body_only=False)
        self.basedir = basedir
        path = os.path.join(self.basedir, "subject_*", "body", "0000*")
        pose_list = glob(path)
        pose_list = sorted(pose_list)
        self.random_split = subset_size

        size = len(pose_list)
        trainset_size = int(size * 0.8)
        if imgset == "train":
            subject_list = pose_list[:trainset_size]
        elif imgset == "val":
            subject_list = pose_list[trainset_size:]

        self.subjects = self.load_poses(subject_list)

        print(f"Dataset loaded {len(self.subjects)} subjects")
        
    def read_batch_imgs(self, imgs_list):
        batch = []
        poses = torch.zeros((len(imgs_list), 33, 3))
        for i, imgpath in enumerate(imgs_list):
            loaded = cv.imread(imgpath, cv.IMREAD_COLOR)
            result = self.pose_tracker.process(image=loaded)
            pose_landmarks = result.pose_landmarks
            if pose_landmarks is not None:
                pose_landmarks = np.array([[lmk.x, lmk.y, lmk.z]
                                        for lmk in pose_landmarks.landmark], dtype=np.float32)
            else:
                pose_landmarks = np.zeros((33,3))
            pose_landmarks = torch.from_numpy(pose_landmarks)
            poses[i] = pose_landmarks
            batch.append(loaded)
        return batch, poses

    def load_poses(self, pose_list):
      loaded = []
      for pose_dir in pose_list:
        img_list = sorted(glob(os.path.join(pose_dir, "image", "*.jpg")))
        keypointspath = os.path.join(pose_dir, "reconstruction", "smpl_parameter.txt")
        keypoints = np.loadtxt(keypointspath)
        keypoints = keypoints[4:76]
        keypoints = keypoints.reshape(24,3)
        keypoints = torch.from_numpy(keypoints)
        indices = np.arange(len(img_list))
        randomsplit = np.sort(np.random.choice(indices, size=self.random_split, replace=False))
        img_split = [img_list[j] for j in randomsplit]
        batch, poses = self.read_batch_imgs(img_split)
        all_keypoints = torch.zeros((poses.shape[0], keypoints.shape[0], keypoints.shape[1]))
        all_keypoints[:] = keypoints

        d = {
          "imgs" : img_split,
          "batch" : batch,
          "keypoints" : all_keypoints,
          "poses" : poses
        }
        loaded.append(d)
      return loaded


    def __len__(self):
        return len(self.subjects)

    def __getitem__(self, index):
        d = self.subjects[index]
        return d

In [17]:
dataset = PoseTranslate(basedir="data", imgset="train", subset_size=10)
d = dataset[0]

Dataset loaded 48 subjects


In [10]:
import torch
import torch.nn as nn
from tqdm import tqdm
import torch.optim as optim

In [11]:
def weights_init(m):
    if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight.data)
        nn.init.zeros_(m.bias.data)
    if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d):
        nn.init.zeros_(m.bias.data)

In [31]:
class PoseTranslator(nn.Module):
  def __init__(self):
    super(PoseTranslator, self).__init__()
    self.layers = nn.Sequential(
      nn.Flatten(),
      nn.Linear(in_features=99, out_features=1024),
      nn.BatchNorm1d(num_features=1024),
      nn.ReLU(),
      nn.Dropout(0.3),
      nn.Linear(in_features=1024, out_features=1024),
      nn.BatchNorm1d(num_features=1024),
      nn.ReLU(),
      nn.Dropout(0.3),
      nn.Linear(in_features=1024, out_features=1024),
      nn.BatchNorm1d(num_features=1024),
      nn.ReLU(),
      nn.Dropout(0.3),
      nn.Linear(in_features=1024, out_features=72),
    )
  
  def forward(self, input):
    out = self.layers(input)
    out = out.reshape(-1, 24, 3)
    return out

In [13]:
batch_size = 1
trainset = PoseTranslate(basedir="data", imgset="train", subset_size=30)
trainloader = DataLoader(trainset, batch_size = batch_size, shuffle=True)
valset = PoseTranslate(basedir="data", imgset="val", subset_size=30)
valloader = DataLoader(valset, batch_size = batch_size, shuffle=True)


Dataset loaded 48 subjects
Dataset loaded 12 subjects


In [32]:
epoch_losses = []
epoch_iou = []
learning_rates = []
val_losses = []

In [38]:
load = True
path = "translate_checkpoint.pt"

In [40]:
model = PoseTranslator().to(device)

#learning rate
lr = 25e-5
# learning_rate = 25e-6
weight_decay = 1e-4

gamm = 0.7
#number of training epochs
epoch_n = 50

# criterion = nn.CrossEntropyLoss()
criterion = nn.MSELoss()
# optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.999), weight_decay=weight_decay)
optimizer = torch.optim.SGD(model.parameters(), lr=lr, weight_decay=weight_decay) # Initialize the optimizer as SGD
exp_lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.9, verbose=False)

lrs = []

best_val = -1
if load:
  
  # model.load_state_dict(torch.load(path))
  checkpoint = torch.load(path)
  model.load_state_dict(checkpoint['model_state_dict'])
  optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
  best_val = checkpoint['best_val']
  print(f'Model loaded with best val of {best_val}')
else:
  model.apply(weights_init)


for e in range(epoch_n):
  epoch_loss = 0
  model.train()
  pbar = tqdm(enumerate(trainloader))
  for i, data in pbar:
    
    optimizer.zero_grad()
    
    inputs = data['poses'].clone().squeeze_()
    label = data['keypoints'].clone().squeeze_()
    
    inputs = inputs.to(device).requires_grad_(True)
    label = label.to(device)

    pred = model(inputs)
    # pred = TF.resize(pred, (label.shape[-1]))
    loss = criterion(pred, label)
    loss.backward()
    epoch_loss += loss.item()
    optimizer.step()
    # print('batch %d --- Loss: %.4f' % (i, loss.item() / batch_size))
    batch_loss = round(loss.item() / batch_size, 4)
    pbar.set_description(f"t_loss: {batch_loss}")
  
  epoch_loss = epoch_loss / len(trainset)
  epoch_losses.append(epoch_loss)
  learning_rates.append(exp_lr_scheduler.get_last_lr())
  exp_lr_scheduler.step()

  # print('Epoch %d / %d --- Loss: %.4f' % (e + 1, epoch_n, epoch_loss))

  # torch.save(model.state_dict(), 'checkpoint.pt')
  model.eval()

  total = 0
  correct = 0
  total_loss = 0
  img_iou = []
  # print('Running validation')
  with torch.no_grad():
    pbar = tqdm(enumerate(valloader))
    for i, data in pbar:
      inputs = data['poses'].clone()
      inputs.squeeze_()
      inputs = inputs.to(device).requires_grad_(True)
      
      label = data['keypoints'].clone()
      label.squeeze_()
      label = label.to(device)
      
      pred = model(inputs)
      vloss = criterion(pred, label)  
      batchloss = round(vloss.item(), 4)
      total_loss += batchloss
      pbar.set_description(f"v_loss: {batchloss}")
    val_loss = total_loss / (len(valloader))
    val_losses.append(val_loss)
    
  
  # print(f"Val loss: {round(val_loss, 4)}")
  val_loss = round(val_loss, 2)
  if val_loss < best_val or best_val < 0:
    best_val = val_loss
    print(f"Saving new best model with best val {best_val}")
    torch.save({
            'epoch': e,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'best_val' : best_val
            }, path)

print(f"finished with best val error of {best_val}")

t_loss: 0.7416: : 3it [00:00, 26.42it/s]

Model loaded with best val of 0.2


t_loss: 0.8297: : 48it [00:01, 25.98it/s]
v_loss: 0.1828: : 12it [00:00, 27.02it/s]
t_loss: 0.8717: : 48it [00:01, 27.06it/s]
v_loss: 0.1994: : 12it [00:00, 27.45it/s]
t_loss: 0.738: : 48it [00:02, 22.77it/s]
v_loss: 0.1678: : 12it [00:00, 26.97it/s]
t_loss: 0.7709: : 48it [00:01, 26.33it/s]
v_loss: 0.2076: : 12it [00:00, 30.92it/s]
t_loss: 0.6282: : 48it [00:01, 24.45it/s]
v_loss: 0.2005: : 12it [00:00, 26.87it/s]
t_loss: 0.7088: : 48it [00:02, 23.76it/s]
v_loss: 0.2017: : 12it [00:00, 27.49it/s]
t_loss: 0.7171: : 48it [00:01, 25.25it/s]
v_loss: 0.1957: : 12it [00:00, 28.49it/s]
t_loss: 0.797: : 48it [00:01, 24.16it/s]
v_loss: 0.2391: : 12it [00:00, 22.28it/s]
t_loss: 0.7576: : 48it [00:01, 24.35it/s]
v_loss: 0.2062: : 12it [00:00, 25.72it/s]
t_loss: 0.7059: : 48it [00:02, 21.45it/s]
v_loss: 0.1928: : 12it [00:00, 22.54it/s]
t_loss: 0.7481: : 48it [00:02, 20.57it/s]
v_loss: 0.2009: : 12it [00:00, 24.12it/s]
t_loss: 0.6959: : 48it [00:01, 25.39it/s]
v_loss: 0.2006: : 12it [00:00, 24.82

Saving new best model with best val 0.19


t_loss: 0.6434: : 48it [00:02, 22.33it/s]
v_loss: 0.1859: : 12it [00:00, 26.99it/s]
t_loss: 0.6798: : 48it [00:02, 21.83it/s]
v_loss: 0.1867: : 12it [00:00, 25.57it/s]
t_loss: 0.6572: : 48it [00:02, 17.35it/s]
v_loss: 0.1931: : 12it [00:00, 24.14it/s]
t_loss: 0.7816: : 48it [00:01, 26.01it/s]
v_loss: 0.2499: : 12it [00:00, 27.74it/s]
t_loss: 0.6344: : 48it [00:01, 27.44it/s]
v_loss: 0.1915: : 12it [00:00, 31.15it/s]

finished with best val error of 0.19





In [41]:
!cp translate_checkpoint.pt gdrive/MyDrive/VisionLab2Project/