# 2D to 3D Lifting Network

## Imports and Setup

In [1]:
import os
import cv2

import numpy as np
import pandas as pd
from skimage import io, transform
import torch
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import matplotlib.pyplot as plt
plt.ion()   # interactive mode
target_dir = "../data/single_sungaya/"
out_df = pd.read_hdf(os.path.join(target_dir, "Data_3D_Pose.hdf5"))

In [41]:
%matplotlib widget
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

def set_axes_equal(ax):
    # workaround, as matplotlib's 3D plot has no option for equisised axes (10/2021)
    x_limits = ax.get_xlim3d()
    y_limits = ax.get_ylim3d()
    z_limits = ax.get_zlim3d()

    x_range = abs(x_limits[1] - x_limits[0])
    x_middle = np.mean(x_limits)
    y_range = abs(y_limits[1] - y_limits[0])
    y_middle = np.mean(y_limits)
    z_range = abs(z_limits[1] - z_limits[0])
    z_middle = np.mean(z_limits)

    plot_radius = 0.5*max([x_range, y_range, z_range])

    ax.set_xlim3d([x_middle - plot_radius, x_middle + plot_radius])
    ax.set_ylim3d([y_middle - plot_radius, y_middle + plot_radius])
    ax.set_zlim3d([z_middle - plot_radius, z_middle + plot_radius])


In [42]:
class BugDataset(Dataset):
    """Bug dataset."""

    def __init__(self, hdf_file, root_dir, transform=None):
        """
        Args:
            hdf_file (string): Path to the hdf file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.bugs_frame = pd.read_hdf(hdf_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.bugs_frame)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.root_dir,
                                self.bugs_frame.iloc[idx, 0])
        image = io.imread(img_name)
        df_columns = self.bugs_frame.columns.values.tolist()
        sample = {'image':image}
 
        for x in range(len(df_columns)):    
            sample[df_columns[x]] = self.bugs_frame.iloc[idx,x]

        if self.transform:
            sample = self.transform(sample)

        return sample
    
class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, sample):
        image = sample['image']
        # swap color axis because
        # numpy image: H x W x C
        # torch image: C X H X W
        image = image.transpose((2, 0, 1))
        sample_keys = list(sample.keys())
        sample_data = list(sample.values())

        dic ={'image': torch.from_numpy(image)}
        dic[sample_keys[1]] =  sample_data[1]
        for x in range(2,len(sample_keys)):
            dic[sample_keys[x]] = torch.FloatTensor(sample_data[x])
        return dic
class Normalize(object):
    def __init__(self,means_2, means_3, std_2, std_3):
        self.means_2 =means_2
        self.means_3 = means_3
        self.std_2 = std_2
        self.std_3 = std_3
    def __call__(self, sample):
        # print("Bruh")
        image = sample['image']
        dic ={'image': image}
        sample_keys = list(sample.keys())
        sample_data = list(sample.values())

        dic[sample_keys[1]] =  sample_data[1]
        for x in range(2,len(sample_keys)):
            if sample_keys[x]== 'key_points_2D':
                dic[sample_keys[x]] = (sample_data[x]-self.means_2)/self.std_2
            elif sample_keys[x]== 'key_points_3D':
                dic[sample_keys[x]] = (sample_data[x]-self.means_3)/self.std_3
            else:
                dic[sample_keys[x]] = sample_data[x]
        return dic


In [43]:
out_df = pd.read_hdf("../data/single_sungaya/Data_3D_Pose.hdf5")
array_2d = np.array(out_df['key_points_2D'].to_numpy())
array_3d = np.array(out_df['key_points_3D'].to_numpy())

for x in range(len(array_2d)):
    array_2d[x] = np.array(array_2d[x])
    array_3d[x] = np.array(array_3d[x])

In [44]:
fixed_array_2d = np.empty((3778,124))
fixed_array_3d = np.empty((3778,186))
for x in range(len(fixed_array_2d)):
    z = array_2d[x].reshape(1,124)
    fixed_array_2d[x] = z
for x in range(len(fixed_array_3d)):
    z = array_3d[x].reshape(1,186)
    fixed_array_3d[x] = z


In [45]:
means_2d = np.mean(fixed_array_2d, axis=0).reshape((62,2))
# print(means_2d)
means_3d = np.mean(fixed_array_3d, axis=0).reshape((62,3))
std_2d =  np.std(fixed_array_2d, axis=0).reshape((62,2))
# print(std_2d)
std_3d = np.std(fixed_array_3d, axis=0).reshape((62,3))

In [46]:
sungaya_dataset = BugDataset(hdf_file='../data/single_sungaya/Data_3D_Pose.hdf5',
                             root_dir=target_dir,transform=transforms.Compose([
                                Normalize(means_2d, means_3d, std_2d, std_3d),
                                ToTensor()
                                   ]))

In [47]:
sungaya_dataset[0]['key_points_2D']

tensor([[-0.8686,  0.6842],
        [-0.8116,  0.4713],
        [-0.7497,  0.2839],
        [-0.5250,  0.0509],
        [-0.3587, -0.0988],
        [-0.1519, -0.2003],
        [ 0.0839, -0.2949],
        [-0.8656,  0.5973],
        [-0.8770,  0.5889],
        [-0.8927,  0.5843],
        [-1.0051,  0.4992],
        [-0.9195,  0.4978],
        [-0.8637,  0.5105],
        [-0.8301,  0.5497],
        [-0.7952,  0.2831],
        [-0.8161,  0.2482],
        [-0.8308,  0.1988],
        [-0.9168, -0.0765],
        [-0.8822, -0.2424],
        [-0.8526, -0.3063],
        [-0.8439, -0.3324],
        [-0.7089,  0.1457],
        [-0.7293,  0.0777],
        [-0.7284,  0.0457],
        [-0.6971, -0.0580],
        [-0.6021, -0.5537],
        [-0.5735, -0.6162],
        [-0.5761, -0.6448],
        [-0.8319,  0.3237],
        [-0.7943,  0.6819],
        [-0.7348,  0.7465],
        [-0.5272,  0.9594],
        [-0.3697,  1.1239],
        [-0.2959,  1.2255],
        [-0.2571,  1.3187],
        [-0.7424,  0

In [48]:
sample = sungaya_dataset[69]
# print(sample)
image = sample["file_name"]
plt.figure()
plt.imshow(io.imread(os.path.join(target_dir,image)))
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [49]:
train_size = int(0.8 * len(sungaya_dataset))
test_size = len(sungaya_dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(sungaya_dataset, [train_size, test_size])

In [50]:
batch_size = 64

train_dataloader = DataLoader(train_dataset, batch_size=batch_size)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

In [51]:
# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))

# Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(62*2, 248),
            nn.ReLU(),
            nn.Linear(248, 248),
            nn.ReLU(),
            nn.Linear(248, 62)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

    
class LinearRegressionModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegressionModel, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)  

    def forward(self, x):
        out = self.linear(x)
        return out

model = NeuralNetwork().to(device)
print(model)

Using cuda device
NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=124, out_features=248, bias=True)
    (1): ReLU()
    (2): Linear(in_features=248, out_features=248, bias=True)
    (3): ReLU()
    (4): Linear(in_features=248, out_features=62, bias=True)
  )
)


In [52]:
# import ipdb
loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In [53]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, data in enumerate(dataloader):
        X = data['key_points_2D']
        y = data['key_points_3D'][:,:,2]
        X, y = X.to(device, dtype=torch.float), y.to(device, dtype=torch.float)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [57]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for data in dataloader:
#             print(data['key_points_2D'])
            X = data['key_points_2D']
            y = data['key_points_3D'][:,:,2]
            X, y = X.to(device, dtype=torch.float), y.to(device, dtype=torch.float)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [58]:
epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 0.974734  [    0/ 3022]


RuntimeError: The size of tensor a (64) must match the size of tensor b (62) at non-singleton dimension 1