# 2D to 3D Lifting Network

## Imports and Setup

In [1]:
import os

import numpy as np
import pandas as pd
from skimage import io, transform
import torch
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import matplotlib.pyplot as plt
plt.ion()   # interactive mode
target_dir = "../data/single_sungaya/"
out_df = pd.read_hdf(os.path.join(target_dir, "Data_3D_Pose.hdf5"))

In [2]:
%matplotlib widget
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

def set_axes_equal(ax):
    # workaround, as matplotlib's 3D plot has no option for equisised axes (10/2021)
    x_limits = ax.get_xlim3d()
    y_limits = ax.get_ylim3d()
    z_limits = ax.get_zlim3d()

    x_range = abs(x_limits[1] - x_limits[0])
    x_middle = np.mean(x_limits)
    y_range = abs(y_limits[1] - y_limits[0])
    y_middle = np.mean(y_limits)
    z_range = abs(z_limits[1] - z_limits[0])
    z_middle = np.mean(z_limits)

    plot_radius = 0.5*max([x_range, y_range, z_range])

    ax.set_xlim3d([x_middle - plot_radius, x_middle + plot_radius])
    ax.set_ylim3d([y_middle - plot_radius, y_middle + plot_radius])
    ax.set_zlim3d([z_middle - plot_radius, z_middle + plot_radius])


In [3]:
class BugDataset(Dataset):
    """Bug dataset."""

    def __init__(self, hdf_file, root_dir, transform=None):
        """
        Args:
            hdf_file (string): Path to the hdf file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.bugs_frame = pd.read_hdf(hdf_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.bugs_frame)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.root_dir,
                                self.bugs_frame.iloc[idx, 0])
        image = io.imread(img_name)
        df_columns = self.bugs_frame.columns.values.tolist()
        sample = {'image':image}
 
        for x in range(len(df_columns)):    
            sample[df_columns[x]] = self.bugs_frame.iloc[idx,x]

        if self.transform:
            sample = self.transform(sample)

        return sample
    
class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, sample):
        image = sample['image']
        # swap color axis because
        # numpy image: H x W x C
        # torch image: C X H X W
        image = image.transpose((2, 0, 1))
        sample_keys = list(sample.keys())
        sample_data = list(sample.values())

        dic ={'image': torch.from_numpy(image)}
        dic[sample_keys[1]] =  sample_data[1]
        for x in range(2,len(sample_keys)):
            dic[sample_keys[x]] = torch.FloatTensor(sample_data[x])
        return dic
class Normalize(object):
    def __init__(self,means_2, means_3, std_2, std_3):
        self.means_2 =means_2
        self.means_3 = means_3
        self.std_2 = std_2
        self.std_3 = std_3
    def __call__(self, sample):
        # print("Bruh")
        image = sample['image']
        dic ={'image': image}
        sample_keys = list(sample.keys())
        sample_data = list(sample.values())

        dic[sample_keys[1]] =  sample_data[1]
        for x in range(2,len(sample_keys)):
            if sample_keys[x]== 'key_points_2D':
                dic[sample_keys[x]] = (sample_data[x]-self.means_2)/self.std_2
            elif sample_keys[x]== 'key_points_3D':
                dic[sample_keys[x]] = (sample_data[x]-self.means_3)/self.std_3
            else:
                dic[sample_keys[x]] = sample_data[x]
        return dic


In [4]:
out_df = pd.read_hdf("../data/single_sungaya/Data_3D_Pose.hdf5")
array_2d = np.array(out_df['key_points_2D'].to_numpy())
array_3d = np.array(out_df['key_points_3D'].to_numpy())

for x in range(len(array_2d)):
    array_2d[x] = np.array(array_2d[x])
    array_3d[x] = np.array(array_3d[x])

fixed_array_2d = np.empty((3778,124))
fixed_array_3d = np.empty((3778,186))
for x in range(len(fixed_array_2d)):
    z = array_2d[x].reshape(1,124)
    fixed_array_2d[x] = z
for x in range(len(fixed_array_3d)):
    z = array_3d[x].reshape(1,186)
    fixed_array_3d[x] = z

In [5]:
means_2d = np.mean(fixed_array_2d, axis=0).reshape((62,2))
# print(means_2d)
means_3d = np.mean(fixed_array_3d, axis=0).reshape((62,3))
std_2d =  np.std(fixed_array_2d, axis=0).reshape((62,2))
# print(std_2d)
std_3d = np.std(fixed_array_3d, axis=0).reshape((62,3))

In [6]:
sungaya_dataset = BugDataset(hdf_file='../data/single_sungaya/Data_3D_Pose.hdf5',
                             root_dir=target_dir,transform=transforms.Compose([
                                Normalize(means_2d, means_3d, std_2d, std_3d),
                                ToTensor()
                                   ]))

In [7]:
# sample = sungaya_dataset[69]
# # print(sample)
# image = sample["file_name"]
# plt.figure()
# plt.imshow(io.imread(os.path.join(target_dir,image)))
# plt.show()

In [8]:
train_size = int(0.8 * len(sungaya_dataset))
test_size = len(sungaya_dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(sungaya_dataset, [train_size, test_size])

In [9]:
batch_size = 64

train_dataloader = DataLoader(train_dataset, batch_size=batch_size)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

In [10]:
# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))    
class Net(nn.Module):
    
    def __init__(self,n_inputs,hidden,n_output):
        super(Net, self).__init__()
        self.flatten = nn.Flatten()
        self.seq1 = nn.Sequential(
            nn.Linear(n_inputs, hidden),
            nn.BatchNorm1d(hidden),
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(hidden, hidden),
            nn.BatchNorm1d(hidden),
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(hidden, n_inputs)
        )
        self.seq2 = nn.Sequential(
            nn.Linear(n_inputs, hidden),
            nn.BatchNorm1d(hidden),
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(hidden, hidden),
            nn.BatchNorm1d(hidden),
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(hidden, n_inputs)
        )
        self.out = nn.Linear(n_inputs, n_output)
    def forward(self, x):
        x = self.flatten(x)
        residual = x
        out = self.seq1(x)
        out+=residual
        out = self.seq2(out)
        return self.out(x)

# model = NeuralNetwork().to(device)

Using cuda device


In [11]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, data in enumerate(dataloader):
        X = data['key_points_2D']
        y = data['key_points_3D'][:,:,2]
        X, y = X.to(device, dtype=torch.float), y.to(device, dtype=torch.float)
        
        # Compute prediction error
        # print(X.shape)
        pred = model(X)
        loss = loss_fn(pred, y)
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for data in dataloader:
#             print(data['key_points_2D'])
            X = data['key_points_2D']
            y = data['key_points_3D'][:,:,2]
            # print(y)
            X, y = X.to(device, dtype=torch.float), y.to(device, dtype=torch.float)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            
            correct += (abs(pred - y)<0.72).type(torch.float).sum().item()
    test_loss /= num_batches
    print(f"Test Error: \n Accuracy: {(correct / size):>4f}%, Avg loss: {test_loss:>8f} \n")

In [12]:
model = Net(2*62,2054,62).to(device)

In [13]:
epochs = 5
learning_rate =5e-3
loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 1.328298  [    0/ 3022]
Test Error: 
 Accuracy: 36.038360%, Avg loss: 1.021447 

Epoch 2
-------------------------------
loss: 1.093633  [    0/ 3022]
Test Error: 
 Accuracy: 37.019841%, Avg loss: 0.899449 

Epoch 3
-------------------------------
loss: 0.978459  [    0/ 3022]
Test Error: 
 Accuracy: 37.410053%, Avg loss: 0.834853 

Epoch 4
-------------------------------
loss: 0.914169  [    0/ 3022]
Test Error: 
 Accuracy: 37.710317%, Avg loss: 0.795468 

Epoch 5
-------------------------------
loss: 0.872570  [    0/ 3022]
Test Error: 
 Accuracy: 37.896825%, Avg loss: 0.767913 

Done!


In [14]:
model.eval()
with torch.no_grad():
    for data in test_dataloader:
        image = data['file_name']
        print(image)
        vis = data['visibility'].numpy()
        X = data['key_points_2D']
        y = data['key_points_3D']
        # print(y)
        X, y = X.to(device, dtype=torch.float), y.to(device, dtype=torch.float)
        pred = model(X)
        break


['9127_Img_synth.png', '9063_Img_synth.png', '7623_Img_synth.png', '7882_Img_synth.png', '2707_Img_synth.png', '8374_Img_synth.png', '3052_Img_synth.png', '3380_Img_synth.png', '915_Img_synth.png', '6107_Img_synth.png', '9772_Img_synth.png', '7974_Img_synth.png', '6039_Img_synth.png', '6945_Img_synth.png', '8339_Img_synth.png', '4155_Img_synth.png', '3102_Img_synth.png', '6229_Img_synth.png', '3155_Img_synth.png', '3321_Img_synth.png', '2247_Img_synth.png', '3077_Img_synth.png', '8608_Img_synth.png', '67_Img_synth.png', '529_Img_synth.png', '8422_Img_synth.png', '5936_Img_synth.png', '980_Img_synth.png', '8527_Img_synth.png', '9179_Img_synth.png', '2384_Img_synth.png', '2209_Img_synth.png', '91_Img_synth.png', '961_Img_synth.png', '4904_Img_synth.png', '2484_Img_synth.png', '473_Img_synth.png', '7213_Img_synth.png', '9542_Img_synth.png', '5805_Img_synth.png', '2521_Img_synth.png', '444_Img_synth.png', '7836_Img_synth.png', '7440_Img_synth.png', '8192_Img_synth.png', '9933_Img_synth.png

In [15]:
def plot_stick_bug(ax, points, simple=False):
    limb_ranges=[[0,7],[8,14],[15,21],[22,28],[29,35],[36,42],[43,49],[53,56],[59,62]]
    if len(points) < 62:
        return
    elif len(points) == 62:
        points = np.array(points).T
        if not simple:
            for (fr,end) in limb_ranges:
                ax.plot(points[0][fr:end], points[1][fr:end], points[2][fr:end])
                ax.scatter(points[0][fr:end], points[1][fr:end], points[2][fr:end], marker='o',s=10)
            return ax


In [26]:
sample = 59

prediction = (pred[sample].cpu()).numpy()
actual = (y[sample].cpu()).numpy()


mean = means_3d[:,2]
std = std_3d[:,2]

unnormalised_est = (prediction*std)+mean
unnormalised_acc = (actual*std_3d)+means_3d
# print(unnormalised_est)

fig = plt.figure()
ax = fig.add_subplot(projection='3d')

for i in range(len(unnormalised_acc)):
    # ax.scatter(unnormalised_acc[i,0], unnormalised_acc[i,1], unnormalised_acc[i,2], marker='o',s=10)
    ax.scatter(unnormalised_acc[i,0], unnormalised_acc[i,1], unnormalised_est[i], marker='x',s=10)
    print("Differences Point:", i, np.abs(unnormalised_acc[i,2]-unnormalised_est[i]))

plot_stick_bug(ax, unnormalised_acc)

ax.set_xlabel('X axis')
ax.set_ylabel('Y axis')
ax.set_zlabel('Z axis')

# use custom function to ensure equal axis proportions
set_axes_equal(ax)

# opens external plot
plt.show()

[88.99670712 74.52081008 82.27856184 87.02610404 84.91326635 80.01787577
 79.16677535 91.49770786 82.97834698 77.35048865 74.88972531 69.72560311
 53.37564098 60.09707244 86.23143811 76.92643231 76.50429076 77.25325285
 66.701156   63.04500856 65.02691577 86.1515509  67.93497718 63.45905058
 70.3774278  62.94523632 63.08309975 50.33646322 84.09944551 77.4294519
 70.0195482  77.9449803  61.04299292 54.20464402 63.37799878 73.11176568
 74.0028501  82.5490163  80.49440179 60.88619502 52.14650169 55.53434705
 87.69979737 68.23590313 74.23893474 84.92671601 63.33240963 53.0654008
 55.05157213 77.91416536 82.6213757  69.0313619  75.5842984  85.37530604
 79.62975241 59.68419893 83.31401679 89.73980452 86.81115037 95.54601976
 57.3468095  58.25207089]


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Differences Point: 0 7.391285965277319
Differences Point: 1 18.172907499561518
Differences Point: 2 11.362536468801721
Differences Point: 3 11.59804844223612
Differences Point: 4 12.869372545875677
Differences Point: 5 16.67552910612646
Differences Point: 6 9.809581150482458
Differences Point: 7 1.1383322253040689
Differences Point: 8 8.37610688022707
Differences Point: 9 12.13636676878231
Differences Point: 10 8.350920497368818
Differences Point: 11 8.75429311500828
Differences Point: 12 2.3271489273548838
Differences Point: 13 12.26295855939724
Differences Point: 14 4.103354829112021
Differences Point: 15 12.859166798565113
Differences Point: 16 11.792111030028408
Differences Point: 17 10.10620564966456
Differences Point: 18 0.5195010089360892
Differences Point: 19 0.6746955458844752
Differences Point: 20 1.7570487060727515
Differences Point: 21 5.411505899490493
Differences Point: 22 22.739499588103357
Differences Point: 23 26.526262736953555
Differences Point: 24 13.589727036773596

In [18]:
(50-mean[0])/std[0]

-0.7119054123466448