# Setup

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import json
import numpy as np
import cv2
from google.cloud import storage
import os

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "pose-estimation-310415-41f7e0e35872.json"

# Check device availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("device: %s" % device)
# device = 'cpu'

device: cuda


In [2]:
with open('annotations/valid.json') as f:
    test_data = json.load(f)
with open('annotations/train.json') as f:
    train_data = json.load(f)
    
storage_client = storage.Client("pose_estimation")
bucket = storage_client.get_bucket('pose_estimation_datasets')

# NUM_TRAIN = 22246
NUM_TRAIN = 4
NUM_TEST = 2958

# Hyperparameters

In [3]:
batch_size = 4
epochs = 30
learning_rate = 0.0001

# Train Loop

In [4]:
import modules
import gc

from modules.unipose import UniPose
from modules.criterion.distribution_difference_loss import DistributionDifferenceLoss 
from modules.criterion.joint_max_mse_loss import JointMaxMSELoss

model = UniPose().to(device)
criterion = DistributionDifferenceLoss(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

alt_criterion = JointMaxMSELoss()

In [None]:
epoch_losses = []

# For each epoch
for epoch in range(epochs):
    epoch_loss = []

    # For each batch
    for start_i in range(0, NUM_TRAIN, batch_size):

        imagelist   = []
        kptlist     = []

        # For each image, load the image
        for i in range(start_i, start_i + batch_size):
            img_name = train_data[i]['image']
            
            blob = bucket.blob('MPII/images/' +  img_name)
            blob.content_type = 'image/jpeg'
            image = np.asarray(bytearray(blob.download_as_string()))
            img = cv2.imdecode(image, cv2.IMREAD_UNCHANGED)
            
            kpt = np.asarray(train_data[i]['joints'], dtype=np.int32)

            if img.shape[0] != 960 or img.shape[1] != 720:
                kpt[:,0] = kpt[:,0] * (960/img.shape[1])
                kpt[:,1] = kpt[:,1] * (720/img.shape[0])
                img = cv2.resize(img,(960,720))
                img = np.array(img)
            
            imagelist.append(img)
            kptlist.append(kpt)

        # construct image tensor and label tensor
        torch_image = torch.Tensor(imagelist)
        torch_image = torch_image.permute(0, 3, 1, 2).to(device)
        kpt_tensor = torch.FloatTensor(kptlist)

        # Train on batch
        optimizer.zero_grad()

        out = model(torch_image)
        batch_loss = criterion(out, kpt_tensor)
        epoch_loss.append(batch_loss.item())
        batch_loss.backward()

        optimizer.step()

        print(f'Epoch: {epoch}, Batch: {i // batch_size}, Batch Distribution Difference Loss: {batch_loss}, JointMaxMSELoss (to see if model is working): {alt_criterion(out, kpt_tensor.to(device))}')
        
        gc.collect()
        torch.cuda.empty_cache()
        
    print(f'Epoch: {epoch}, Average Batch Loss: {sum(epoch_loss) / len(epoch_loss)}')
    epoch_losses.append(epoch_loss)

Epoch: 0, Batch: 0, Batch Distribution Difference Loss: 0.004599871579557657, JointMaxMSELoss (to see if model is working): 406009.78125
Epoch: 0, Average Batch Loss: 0.004599871579557657
Epoch: 1, Batch: 0, Batch Distribution Difference Loss: 0.006378494668751955, JointMaxMSELoss (to see if model is working): 442364.5625
Epoch: 1, Average Batch Loss: 0.006378494668751955
Epoch: 2, Batch: 0, Batch Distribution Difference Loss: 0.005678445566445589, JointMaxMSELoss (to see if model is working): 542443.375
Epoch: 2, Average Batch Loss: 0.005678445566445589
