# Motion Tracker

Let's see how this goes.

In [1]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import matplotlib.pyplot as plt

import torch
from torchvision import datasets, transforms
import numpy as np
import pandas as pd
import os
from PIL import Image
from skimage import io, transform
from torch.utils.data import Dataset, DataLoader
from model import MotionDataset, PositionFinder, BoundingBoxFinder
import helper

# minmax, minmax_z = helper.get_minmax('train/input.csv', 'test/input.csv')
# print(minmax[0], minmax[1])
# print(minmax_z[0], minmax_z[1])

minmax, minmax_z = helper.get_minmax('train/input.csv', 'test/input.csv')
print(minmax[0], minmax[1])
# print(minmax_z[0], minmax_z[1])

img_width = 256
#randomly rotate or transform the images to help training
train_transforms = transforms.Compose([
#                                         transforms.RandomRotation(30),
#                                         transforms.RandomResizedCrop(256),
                                        transforms.Resize((img_width,img_width)),
                                       transforms.ToTensor()
#                                            ,transforms.Normalize([0.5, 0.5, 0.5], 
#                                                              [0.5, 0.5, 0.5])
                                       ,transforms.Normalize([0.485, 0.456, 0.406], 
                                                             [0.229, 0.224, 0.225])
                                      ])

test_transforms = transforms.Compose([
                                       transforms.Resize((img_width,img_width)),
                                       transforms.ToTensor()
#                                         ,transforms.Normalize([0.5, 0.5, 0.5], 
#                                                              [0.5, 0.5, 0.5])
                                           ,transforms.Normalize([0.485, 0.456, 0.406], 
                                                             [0.229, 0.224, 0.225])
                                      ])

motions = MotionDataset('train/input.csv', 'train', train_transforms, minmax, minmax_z)
trainloader = DataLoader(motions, batch_size=16, shuffle=True)
img, details, transform, confidences, boundaries = next(iter(trainloader))

motions_test = MotionDataset('test/input.csv', 'test', test_transforms, minmax, minmax_z)
testloader = torch.utils.data.DataLoader(motions_test, batch_size=16, shuffle=True)
details.shape

[[271.3513 300.9344 275.8941 ...   0.     437.951  512.    ]
 [362.5138 315.8901 361.0373 ...   0.     512.     512.    ]
 [280.8587 313.8994 280.6007 ...   0.     512.     512.    ]
 ...
 [258.1431 179.7763 257.596  ... 140.9268 411.5411 509.9083]
 [257.6646 167.4352 255.3693 ... 158.8761 424.7657 512.    ]
 [254.0496 181.4941 254.3449 ... 145.4788 396.5332 498.7738]]
0.0 512.0


torch.Size([16, 2])

## Prepare neural network model


In [2]:
bmodel = BoundingBoxFinder(img_width)
model = PositionFinder(img_width)
print(model)

PositionFinder(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (hidden1): Linear(in_features=131074, out_features=1024, bias=True)
  (output): Linear(in_features=1024, out_features=44, bias=True)
  (dropout): Dropout(p=0.4, inplace=False)
)


In [3]:
def my_loss(output, target):
    #mseloss (x - y) ^ 2
    #l1loss abs(x - y) 
#     output_2d = np.reshape(output, (-1, 2))
#     target_2d = np.reshape(target, (-1, 2))


    loss = torch.dist(output,target) # <---- result seemed quite good
#     x = np.reshape(output.detach().cpu(), (-1, 2))
#     y = np.reshape(target.detach().cpu(), (-1, 2))
#     loss = torch.diag(torch.cdist(x,y)).mean()
#     loss = 0
#     for i in range(output.shape[0]):
#         for j in range(0, output.shape[1], 2):
#             loss += (torch.norm(output[i][j:j+2] - target[i][j:j+2])) ** 0.5
#     loss = loss / output.shape[1]
    return loss

## Train our model

best loss (L1Loss Sum, 0.003, conv4)
0.001 - 0.003


```
 Epoch 22/25) Training loss: 6.749771996027863, Test loss: 10.508017539978027
(Epoch 23/25) Training loss: 6.6242278638523295, Test loss: 10.340709686279297
(Epoch 24/25) Training loss: 6.520543051754824, Test loss: 10.395054817199707
```

input contains image (human centered) height, isFacingForward

output contains (x,y) coordinates for 22 body parts
 


## 1. Boundaries detection


In [None]:
from torch import optim, nn
# Optimizers require the parameters to optimize and a learning rate
optimizer = optim.SGD(bmodel.parameters(), lr=0.0007, momentum=0.7)
criterion = nn.L1Loss(reduction='sum') 
device = 'cuda'
bmodel.to(device)
train_losses, test_losses = [], []
epochs = 25

for e in range(epochs):
    running_loss = 0
    for images, details, targets, confidences, boundaries in trainloader:
        images, boundaries = images.to(device), boundaries.to(device)
        #1. forward pass
        ps = bmodel.forward(images)
        #2. calculate loss
        loss = criterion(ps, boundaries)
        #0. Clear the gradients, do this because gradients are accumulated
        optimizer.zero_grad()

        #3. run backward propagation
        loss.backward()

        # 4. Take an update step and few the new weights
        optimizer.step()
        running_loss += loss.item()
#         print(loss.item())
    else:
        test_loss = 0
        accuracy = 0
        
        # Turn off gradients for validation, saves memory and computations
        with torch.no_grad():
            bmodel.eval()
            for images, details, targets, confidences, boundaries in testloader:
                images, boundaries = images.to(device), boundaries.to(device)
                ps = bmodel.forward(images)
                test_loss += my_loss(ps, boundaries)

        
        bmodel.train()
        
        test_loss = test_loss/len(testloader)
        train_loss = running_loss/len(trainloader)
        train_losses.append(train_loss)
        test_losses.append(test_loss)
        print(f"(Epoch {e}/{epochs}) Training loss: {train_loss}, Test loss: {test_loss}")
        
PATH = 'bmodel.m'
torch.save(bmodel.state_dict(), PATH)

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


(Epoch 0/25) Training loss: 9.297963367219557, Test loss: 1.3056598901748657
(Epoch 1/25) Training loss: 5.771352807680766, Test loss: 1.1232725381851196
(Epoch 2/25) Training loss: 4.444771765878326, Test loss: 1.0578163862228394


In [None]:
from torch import optim, nn
# Optimizers require the parameters to optimize and a learning rate
optimizer = optim.SGD(model.parameters(), lr=0.0007, momentum=0.7)
# criterion = nn.MSELoss() 
criterion = nn.L1Loss(reduction='sum') 
bceloss = nn.BCELoss()
bceloss_w = 4
print(torch.cuda.get_device_name(0))
train_losses, test_losses = [], []
epochs = 25
model.to(device)
for e in range(epochs):
    running_loss = 0
    for images, details, targets, confidences, boundaries in trainloader:
        images, details, targets, confidences = images.to(device), details.to(device), targets.to(device), confidences.to(device), boundaries.to(device)
#         print(images.shape, targets.shape)
        #1. forward pass
        ps = model.forward(images, details)
#         print(ps2, confidences)
#         print(np.min(ps.cpu().detach().numpy()), np.min(targets.cpu().detach().numpy()))
#         print(np.max(ps.cpu().detach().numpy()), np.max(targets.cpu().detach().numpy()))
#         print(np.min(details.cpu().detach().numpy()))
#         print(np.max(details.cpu().detach().numpy()))
        #2. calculate loss
        loss = criterion(ps, targets)
#         _loss = criterion(ps, targets)
#         _bceloss = bceloss(ps2, confidences) / bceloss_w
#         print(_bceloss.item())
#         loss = _loss + _bceloss
        #0. Clear the gradients, do this because gradients are accumulated
        optimizer.zero_grad()

        #3. run backward propagation
        loss.backward()

        # 4. Take an update step and few the new weights
        optimizer.step()
        running_loss += loss.item()
#         print(loss.item())
    else:
        test_loss = 0
        accuracy = 0
        
        # Turn off gradients for validation, saves memory and computations
        with torch.no_grad():
            model.eval()
            for images, details, targets, confidences, boundaries in testloader:
                images, details, targets, confidences, boundaries = images.to(device), details.to(device), targets.to(device), confidences.to(device), boundaries.to(device)
                ps = model.forward(images, details)
#                 _loss = criterion(ps, targets)
#                 _bceloss = bceloss(ps2, confidences) / bceloss_w
#                 test_loss += _loss + _bceloss
                test_loss += my_loss(ps, targets)

        
        model.train()
        
        test_loss = test_loss/len(testloader)
        train_loss = running_loss/len(trainloader)
        train_losses.append(train_loss)
        test_losses.append(test_loss)
        print(f"(Epoch {e}/{epochs}) Training loss: {train_loss}, Test loss: {test_loss}")
        
PATH = 'model.m'
torch.save(model.state_dict(), PATH)

In [None]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import matplotlib.pyplot as plt
test_losses = [test_loss.to('cpu') for test_loss in test_losses]
plt.plot(train_losses, label='Training loss')
plt.plot(test_losses, label='Validation loss')
plt.legend(frameon=False)

## Use our model

In [None]:
%matplotlib inline
# test_transform = transforms.Compose([transforms.Resize((img_width,img_width)),
#                                 transforms.ToTensor()])
import helper
# test_dataset = datasets.ImageFolder('train/', transform=test_transform)
# test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=2, shuffle=True)
images, details, labels, confidences = next(iter(testloader))


model.to('cpu')

# img = images[0].view(1, 195075)
# img = torch.zeros(1,120000) + 222

positions = []
positions_expected = []
# Turn off gradients to speed up this part
with torch.no_grad():
    logps = model.forward(images, details)
    logps_denormalized = logps 
    labels_denormalized = labels 
    
    for body_index in range(22):
        xyz = []
        xyz_e = []
        for pos_index in range(2):
            xyz.append(logps_denormalized[0][body_index*2+pos_index])
            xyz_e.append(labels_denormalized[0][body_index*2+pos_index])
        positions.append(xyz)
        positions_expected.append(xyz_e)
#     print(list(model.parameters()))

positions = np.array(positions)
positions_expected = np.array(positions_expected)
fig = plt.figure()
ax = plt.axes()

# zdata = positions.T[1]
xdata = positions.T[0]
ydata = positions.T[1]
ax.set_xlim((0,1))
ax.set_ylim((0,1))
ax.scatter(xdata, ydata);

# zdata_e = positions_expected.T[1]
xdata_e = positions_expected.T[0]
ydata_e = positions_expected.T[1]
ax.scatter(xdata_e, ydata_e, marker='^')
helper.imshow(images[0], xdata=xdata, ydata=ydata)

for i, pos in enumerate(positions):
    label = None
    if i == 4:
        label = "Neck"
    elif i == 5:
        label = "Head"
    elif i == 9:
        label = "LeftHand"
    elif i == 12:
        label = "LeftFoot"
    elif i == 17:
        label = "RightHand"
    elif i == 20:
        label = "RightFoot"
    if label is not None:
        label = label + "{:.2f}".format(confidences[0][i].item())
        ax.text(pos[0], pos[1], label, None, color="green")

In [None]:
distances = []
for i, pos in enumerate(positions):
    pos_exp = positions_expected[i]
    
    dist = np.linalg.norm(pos_exp-pos)
    distances.append(dist)

print("Total distance", np.array(distances).sum())
plt.plot(distances, label='Distances')
plt.legend(frameon=False)

## 

In [None]:
motions_test2 = MotionDataset('test2/input.csv', 'test2', test_transforms, minmax, minmax_z)
testloader2 = torch.utils.data.DataLoader(motions_test2, batch_size=3, shuffle=True)