# 2D to 3D Lifting Network

## Imports and Setup

In [1]:
import os

import numpy as np
import pandas as pd
from skimage import io, transform
import torch
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import matplotlib.pyplot as plt
plt.ion()   # interactive mode

<matplotlib.pyplot._IonContext at 0x25cf8deabe0>

In [2]:
%matplotlib widget
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

def set_axes_equal(ax):
    # workaround, as matplotlib's 3D plot has no option for equisised axes (10/2021)
    x_limits = ax.get_xlim3d()
    y_limits = ax.get_ylim3d()
    z_limits = ax.get_zlim3d()

    x_range = abs(x_limits[1] - x_limits[0])
    x_middle = np.mean(x_limits)
    y_range = abs(y_limits[1] - y_limits[0])
    y_middle = np.mean(y_limits)
    z_range = abs(z_limits[1] - z_limits[0])
    z_middle = np.mean(z_limits)

    plot_radius = 0.5*max([x_range, y_range, z_range])

    ax.set_xlim3d([x_middle - plot_radius, x_middle + plot_radius])
    ax.set_ylim3d([y_middle - plot_radius, y_middle + plot_radius])
    ax.set_zlim3d([z_middle - plot_radius, z_middle + plot_radius])


In [3]:
class BugDataset(Dataset):
    """Bug dataset."""

    def __init__(self, df, root_dir, transform=None):
        """
        Args:
            hdf_file (string): Path to the hdf file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.bugs_frame = df
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.bugs_frame)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.root_dir,
                                self.bugs_frame.iloc[idx, 0])
        image = io.imread(img_name)
        df_columns = self.bugs_frame.columns.values.tolist()
        sample = {'image':image}
 
        for x in range(len(df_columns)):    
            sample[df_columns[x]] = self.bugs_frame.iloc[idx,x]

        if self.transform:
            sample = self.transform(sample)

        return sample
    
class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""
    def __call__(self, sample):
        image = sample['image']
        # swap color axis because
        # numpy image: H x W x C
        # torch image: C X H X W
        image = image.transpose((2, 0, 1))
        sample_keys = list(sample.keys())
        sample_data = list(sample.values())

        dic ={'image': torch.from_numpy(image)}
        dic[sample_keys[1]] =  sample_data[1]
        for x in range(2,len(sample_keys)):
            dic[sample_keys[x]] = torch.FloatTensor(sample_data[x])
        return dic

class Centralize(object):
    def __init__(self, keypoint=3):
        self.center_keypoint = keypoint
    def __call__(self, sample):
        image = sample['image']
        dic ={'image': image}
        sample_keys = list(sample.keys())
        sample_data = list(sample.values())

        dic[sample_keys[1]] =  sample_data[1]
        for x in range(2,len(sample_keys)):
            if sample_keys[x]== 'key_points_2D':
                x_diff, y_diff = sample_data[x][self.center_keypoint-1][0], sample_data[x][self.center_keypoint-1][1]
                for i in range(len(sample_data[x])):
                    sample_data[x][i][0] = sample_data[x][i][0] - x_diff
                    sample_data[x][i][1] = sample_data[x][i][1] - y_diff
                dic[sample_keys[x]] = sample_data[x]
            elif sample_keys[x]== 'key_points_3D':
                x_diff, y_diff, z_diff = sample_data[x][self.center_keypoint-1][0], sample_data[x][self.center_keypoint-1][1], sample_data[x][self.center_keypoint-1][2]
                for i in range(len(sample_data[x])):
                    sample_data[x][i][0] = sample_data[x][i][0] - x_diff
                    sample_data[x][i][1] = sample_data[x][i][1] - y_diff
                    sample_data[x][i][2] = sample_data[x][i][2] - z_diff
                dic[sample_keys[x]] = sample_data[x]
            else:
                dic[sample_keys[x]] = sample_data[x]
        return dic


In [4]:
target_dir = "../data/single_sungaya/"
out_df = pd.read_hdf(os.path.join(target_dir, "Data_3D_Pose.hdf5"))
new_df = pd.DataFrame()
for col, x in out_df.iterrows():
    if x['visibility'][2]==1:
        new_df = new_df.append(x)
new_df.reset_index(drop=True, inplace=True)

new_df['key_points_2D'] = new_df['key_points_2D'].apply(np.array)
new_df['key_points_3D'] = new_df['key_points_3D'].apply(np.array)

## Normalise Dataframe

In [5]:
array_2d = np.array(new_df['key_points_2D'].to_numpy())
array_3d = np.array(new_df['key_points_3D'].to_numpy())

for x in range(len(array_2d)):
    array_2d[x] = np.array(array_2d[x])
    array_3d[x] = np.array(array_3d[x])

fixed_array_2d = np.empty((len(array_2d),124))
fixed_array_3d = np.empty((len(array_3d),186))
for x in range(len(fixed_array_2d)):
    z = array_2d[x].reshape(1,124)
    fixed_array_2d[x] = z
for x in range(len(fixed_array_3d)):
    z = array_3d[x].reshape(1,186)
    fixed_array_3d[x] = z

means_2d = np.mean(fixed_array_2d, axis=0).reshape((62,2))
means_3d = np.mean(fixed_array_3d, axis=0).reshape((62,3))
std_2d =  np.std(fixed_array_2d, axis=0).reshape((62,2))
std_3d = np.std(fixed_array_3d, axis=0).reshape((62,3))

def normal_2(x):
    return (x-means_2d)/std_2d
def normal_3(x):
    return (x-means_3d)/std_3d

new_df['key_points_2D'] = new_df['key_points_2D'].apply(normal_2)
new_df['key_points_3D'] = new_df['key_points_3D'].apply(normal_3)


In [6]:
sungaya_dataset = BugDataset(df=new_df,
                             root_dir=target_dir,transform=transforms.Compose([
                                Centralize(),
                                ToTensor()
                                   ]))

In [7]:
sample = sungaya_dataset[1]
# print(sample)
image = sample["file_name"]
plt.figure()
plt.imshow(io.imread(os.path.join(target_dir,image)))
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [8]:
train_split = 0.7
valid_split = 0.1
train_size = int(train_split * len(sungaya_dataset))
valid_size = int(valid_split * len(sungaya_dataset))
test_size = len(sungaya_dataset) - (train_size+valid_size)
train_dataset,valid_dataset ,test_dataset = torch.utils.data.random_split(sungaya_dataset, [train_size, valid_size, test_size])

In [9]:
batch_size = 64

train_dataloader = DataLoader(train_dataset, batch_size=batch_size)
valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

In [10]:
# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))    
class Net(nn.Module):
    
    def __init__(self,n_inputs,hidden,n_output):
        super(Net, self).__init__()
        self.flatten = nn.Flatten()
        self.seq1 = nn.Sequential(
            nn.Linear(n_inputs, hidden),
            nn.BatchNorm1d(hidden),
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(hidden, hidden),
            nn.BatchNorm1d(hidden),
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(hidden, n_inputs)
        )
        self.seq2 = nn.Sequential(
            nn.Linear(n_inputs, hidden),
            nn.BatchNorm1d(hidden),
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(hidden, hidden),
            nn.BatchNorm1d(hidden),
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(hidden, n_inputs)
        )
        self.out = nn.Linear(n_inputs, n_output)
    def forward(self, x):
        x = self.flatten(x)
        residual = x
        out = self.seq1(x)
        out+=residual
        out = self.seq2(out)
        return self.out(x)


Using cuda device


In [295]:
actual_dist=[4,4,4]
accdists = abs((actual_dist-means_3d)/std_3d)
accz_dists = torch.from_numpy(accdists.T[2])

def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, data in enumerate(dataloader):
        X = data['key_points_2D']
        y = data['key_points_3D'][:,:,2]
        mask = data['visibility']
        X, y = X.to(device, dtype=torch.float), y.to(device, dtype=torch.float)

        # ## Calculate mask
        # z = np.repeat(mask, repeats=124, axis=0)
        # print(z.shape)
        # z = z.reshape((64,62,2))
        # X = X*z

        pred = model(X)
        loss = loss_fn(pred, y)
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for data in dataloader:
#             print(data['key_points_2D'])
            X = data['key_points_2D']
            y = data['key_points_3D'][:,:,2]
            # print(y)
            X, y = X.to(device, dtype=torch.float), y.to(device, dtype=torch.float)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            
            correct += (abs(pred - y)<accz_dists.to(device)).type(torch.float).sum().item()
    test_loss /= num_batches
    print(f"Validation Error: \n Accuracy: {(correct / size):>4f}%, Avg loss: {test_loss:>8f} \n")

In [296]:
model = Net(2*62,512,62).to(device)

epochs = 5
learning_rate =5e-3
loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(valid_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 1.862072  [    0/ 2286]
Validation Error: 
 Accuracy: 55.733129%, Avg loss: 1.882792 

Epoch 2
-------------------------------
loss: 1.771211  [    0/ 2286]
Validation Error: 
 Accuracy: 55.843558%, Avg loss: 1.823327 

Epoch 3
-------------------------------
loss: 1.702048  [    0/ 2286]
Validation Error: 
 Accuracy: 55.978528%, Avg loss: 1.776935 

Epoch 4
-------------------------------
loss: 1.647776  [    0/ 2286]
Validation Error: 
 Accuracy: 56.088957%, Avg loss: 1.739451 

Epoch 5
-------------------------------
loss: 1.603825  [    0/ 2286]
Validation Error: 
 Accuracy: 56.196319%, Avg loss: 1.708098 

Done!


In [298]:
model.eval()
with torch.no_grad():
    for data in test_dataloader:
        image = data['file_name']
        print(image)
        vis = data['visibility'].numpy()
        X = data['key_points_2D']
        y = data['key_points_3D']
        vis = data['visibility']
        # print(y)
        X, y = X.to(device, dtype=torch.float), y.to(device, dtype=torch.float)
        pred = model(X)
        break


['9832_Img_synth.png', '4439_Img_synth.png', '5589_Img_synth.png', '9929_Img_synth.png', '3888_Img_synth.png', '3330_Img_synth.png', '660_Img_synth.png', '6153_Img_synth.png', '3999_Img_synth.png', '2243_Img_synth.png', '3280_Img_synth.png', '8195_Img_synth.png', '3887_Img_synth.png', '1815_Img_synth.png', '9520_Img_synth.png', '853_Img_synth.png', '2519_Img_synth.png', '5559_Img_synth.png', '3894_Img_synth.png', '2786_Img_synth.png', '4271_Img_synth.png', '9687_Img_synth.png', '4616_Img_synth.png', '6666_Img_synth.png', '8465_Img_synth.png', '5722_Img_synth.png', '1619_Img_synth.png', '5527_Img_synth.png', '9034_Img_synth.png', '3514_Img_synth.png', '9575_Img_synth.png', '7880_Img_synth.png', '9872_Img_synth.png', '7501_Img_synth.png', '2596_Img_synth.png', '9066_Img_synth.png', '3166_Img_synth.png', '8291_Img_synth.png', '305_Img_synth.png', '2393_Img_synth.png', '2341_Img_synth.png', '1875_Img_synth.png', '9458_Img_synth.png', '7841_Img_synth.png', '777_Img_synth.png', '268_Img_synt

In [314]:
def plot_stick_bug(ax, points, vis, simple=False):
    limb_ranges=[[0,7],[8,14],[15,21],[22,28],[29,35],[36,42],[43,49],[53,56],[59,62]]
    if len(points) < 62:
        return
    elif len(points) == 62:
        points = np.array(points).T
        if not simple:
            for (fr,end) in limb_ranges:
                # print(vis.numpy())
                # if vis[fr:] == 1 or vis[end-1] == 1:
                ax.plot(points[0][fr:end], points[1][fr:end], points[2][fr:end])
                ax.scatter(points[0][fr:end], points[1][fr:end], points[2][fr:end], marker='o',s=10)
            return ax

In [315]:
sample = 3
print(image[sample])
prediction = (pred[sample].cpu()).numpy()
actual = (y[sample].cpu()).numpy()


mean = means_3d[:,2]
std = std_3d[:,2]

unnormalised_est = (prediction*std)+mean
unnormalised_acc = (actual*std_3d)+means_3d
# print(unnormalised_est)

fig = plt.figure()
ax = fig.add_subplot(projection='3d')

for i in range(len(unnormalised_acc)):
    # ax.scatter(unnormalised_acc[i,0], unnormalised_acc[i,1], unnormalised_acc[i,2], marker='o',s=10)
    ax.scatter(unnormalised_acc[i,0], unnormalised_acc[i,1], unnormalised_est[i], marker='x',s=10)
    # print("Differences Point:", i, np.abs(unnormalised_acc[i,2]-unnormalised_est[i]))

plot_stick_bug(ax, unnormalised_acc, vis[sample])

ax.set_xlabel('X axis')
ax.set_ylabel('Y axis')
ax.set_zlabel('Z axis')

# use custom function to ensure equal axis proportions
set_axes_equal(ax)

# opens external plot
plt.show()

9929_Img_synth.png


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [294]:
# x = np.ones((64,62,2))
y = np.array([1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
# z = np.repeat(y, repeats=128, axis=0).T
# print(z.shape)
# z = z.reshape((64,62,2))

z = np.repeat(y, repeats=124, axis=0)
print(z.shape)
z = z.reshape((64,62,2))
# X = X*z

z

(7688,)


ValueError: cannot reshape array of size 7688 into shape (64,62,2)

In [175]:
y = x*z
y[0]

array([[1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.