In [1]:
import os
import json
import pickle
import numpy as np
import matplotlib.pyplot as plt

from PIL import Image

import torch
from torch.utils.data import Dataset

In [2]:
LANDMARK_MAP = ['left_collar', 'right_collar', 'left_sleeve', 'right_sleeve',
                'left_waistline', 'right_waistline', 'left_hem', 'right_hem']
IMG_DIR = '/media/jaeho/SSD/datasets/deepfashion/img-001/'

class ShapeDataset(Dataset):
    def __init__(self, data_path):
        
        with open(data_path, 'rb') as f:
            raw_dict = pickle.load(f)
        self.raw_data = list(raw_dict.items())
    
    def __len__(self):
        return len(self.raw_data)
    
    def _make_visibility(self, landmark_info):
        visibility = []
        for lm in LANDMARK_MAP:
            if lm in landmark_info:
                visibility.append(1 if landmark_info[lm][0] == 0 else 0)
            else:
                visibility.append(0)
        return visibility
    
    def _make_landmark(self, img_size, landmark_info):
        height, width = img_size
        heatmap_size = img_size
        visibility = self._make_visibility(landmark_info)
        
        nof_joints = len(LANDMARK_MAP)
        joints_vis = np.array([[x] for x in visibility])
        
        target = np.zeros((nof_joints, width, height), dtype=np.float32)
        target_weight = np.ones((nof_joints, 1), dtype=np.float32)
        
        joints = [landmark_info[x][1:] if x in landmark_info else [0, 0] for x in LANDMARK_MAP]
        joints = np.array(joints, dtype=np.float32)
        
        heatmap_sigma = 3
        tmp_size = heatmap_sigma * 3
        feat_stride = np.asarray(img_size) / np.asarray(heatmap_size)
        
        for joint_id in range(nof_joints):
            mu_x = int(joints[joint_id][0] / feat_stride[0] + 0.5)
            mu_y = int(joints[joint_id][1] / feat_stride[1] + 0.5)
            
            ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
            br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
            
            if ul[0] >= heatmap_size[0] or ul[1] >= heatmap_size[1] or br[0] < 0 or br[1] < 0:
                target_weight[joint_id] = 0
                continue
            
            # generate gaussian
            size = 2 * tmp_size + 1
            x = np.arange(0, size, 1, np.float32)
            y = x[:, np.newaxis]
            x0 = y0 = size // 2
            
            # the gaussian is not normalized, we want the center value to equal 1
            g = np.exp(- ((x-x0) ** 2 + (y - y0) ** 2) / (2 * heatmap_sigma ** 2))
            
            # usable gaussian range
            g_x = max(0, -ul[0]), min(br[0], heatmap_size[0]) - ul[0]
            g_y = max(0, -ul[1]), min(br[1], heatmap_size[1]) - ul[1]
            
            # image rnage
            img_x = max(0, ul[0]), min(br[0], heatmap_size[0])
            img_y = max(0, ul[1]), min(br[1], heatmap_size[1])

            v = target_weight[joint_id]

            if v > 0.5:
                target[joint_id][img_y[0] : img_y[1], img_x[0]:img_x[1]] = g[g_y[0]:g_y[1], g_x[0]:g_x[1]]
        return target, visibility
    
    def __getitem__(self, index):
        img_path, data_dict = self.raw_data[index]
        landmark_info = data_dict['landmark']
        
        img_path = os.path.join(IMG_DIR, img_path)
        img = Image.open(img_path).convert("RGB")
        
        landmark, visibility = self._make_landmark(img.size, landmark_info)
        
        return img, landmark, visibility

In [4]:
def open_file(path):
    if 'json' in path:
        with open(path, 'r') as f:
            out = json.load(f)
    elif 'pickle' in path:
        with open(path, 'rb') as f:
            out = pickle.load(f)
    elif 'jpg' in path:
        out = Image.open(path).convert("RGB")
    
    return out

In [5]:
data_path = '/media/jaeho/SSD/datasets/deepfashion/preprocessed_data/preprocessed_data.pickle'
preprocessed_data = open_file(data_path)

In [3]:
dataset = ShapeDataset('/media/jaeho/SSD/datasets/deepfashion/split/valid.pickle')

In [4]:
img, landmark, visibility = next(iter(dataset))

In [9]:
landmark.shape

(8, 250, 200)

In [10]:
out = np.ones(dtype=np.float32, shape=landmark.shape)

In [11]:
out.shape

(8, 250, 200)

$L_{landmark}=\sum^K_{k=1}{v_k^{GT}}\sum{||S_k{(x,y)}-S^{GT}_k{(x,y)}||_2}$

In [11]:
loss = 0
for idx, v in enumerate(visibility):
    if v:
        # print()
        ll = out[idx] - landmark[idx]
        loss += torch.norm(torch.Tensor(ll))
        # break
        # plt.imshow(ll)
        # plt.subplot(1, 2, 1)
        # plt.title(f"gt-{idx}")
        # plt.imshow(landmark[idx])
        # plt.subplot(1, 2, 2)
        # plt.title(f"out-{idx}")
        # plt.imshow(out[idx])
        # plt.show()

In [12]:
loss

tensor(1340.5066)

In [13]:
ll

array([[1., 1., 1., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.],
       ...,
       [1., 1., 1., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.]], dtype=float32)

In [14]:
torch.norm(torch.Tensor(ll))

tensor(223.4178)

In [16]:
torch.norm(torch.Tensor(ll), dim=1, p=2)

tensor([14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421,
        14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421,
        14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421,
        14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421,
        14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421,
        14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421,
        14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421,
        14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421,
        14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421,
        14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421,
        14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421,
        14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421,
        14.1421, 14.1421, 14.1421, 14.14

In [17]:
sum(torch.norm(torch.Tensor(ll), dim=1, p=2))

tensor(3532.5125)

In [56]:
torch.linalg.norm(torch.Tensor(ll), dim=1, ord=2)

tensor([14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421,
        14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421,
        14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421,
        14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421,
        14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421,
        14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421,
        14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421,
        14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421,
        14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421,
        14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421,
        14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421,
        14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421, 14.1421,
        14.1363, 14.1271, 14.1080, 14.07

In [57]:
np.sqrt(sum([x**2 for x in torch.linalg.norm(torch.Tensor(ll), dim=1, ord=2)]))

tensor(223.4178)

In [2]:
import torch
import torch.nn as nn
import numpy as np

In [3]:
def landmark_loss(loc_out, vis_out, gt):
    loss = 0
    for idx, v in enumerate(vis_out):
        if v:
            ll = loc_out[idx] - gt[idx]
            loss += sum(torch.norm(torch.Tensor(ll), dim=1, p=2))
    return loss

In [4]:
lm_loss = landmark_loss

In [12]:
lm_loss(out, visibility, landmark)

tensor(21195.0742)

In [13]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [14]:
device

'cuda'

In [15]:
lm_loss.to(device)

AttributeError: 'function' object has no attribute 'to'

In [16]:
class LandmarkLoss(nn.Module):
    def __init__(self):
        super(LandmarkLoss, self).__init__()
    
    def forward(self, loc_out, vis_out, gt):
        loss = 0
        for idx, v in enumerate(vis_out):
            if v:
                ll = loc_out[idx] - gt[idx]
                loss += sum(torch.norm(torch.Tensor(ll), dim=1, p=2))
        return loss

In [19]:
new_lm_loss = LandmarkLoss()

In [20]:
new_lm_loss(out, visibility, landmark)

tensor(21195.0742)

In [21]:
new_lm_loss.to(device)

LandmarkLoss()

In [22]:
new_lm_loss(out, visibility, landmark)

tensor(21195.0742)

In [24]:
landmark.shape

(8, 250, 200)

In [28]:
import torchvision.transforms as transforms

In [29]:
tf = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

In [33]:
tf(img).size()

torch.Size([3, 224, 224])

In [35]:
new_lm = torch.Tensor()

In [72]:
tmp = torch.Tensor()
for idx, lm in enumerate(landmark):
    lm = transforms.ToPILImage()(lm)
    lm = tf(lm)

    if idx == 0:
        new_lm = lm
    else :
        new_lm = torch.cat([new_lm, lm], axis=0)

        
    print(new_lm.shape)

(250, 200)
torch.Size([1, 224, 224])
(250, 200)
torch.Size([2, 224, 224])
(250, 200)
torch.Size([3, 224, 224])
(250, 200)
torch.Size([4, 224, 224])
(250, 200)
torch.Size([5, 224, 224])
(250, 200)
torch.Size([6, 224, 224])
(250, 200)
torch.Size([7, 224, 224])
(250, 200)
torch.Size([8, 224, 224])


In [12]:
for idx, lm in enumerate(landmark):
    print(idx)
    print(lm.shape)
    print(type(lm))
    lm = np.resize(lm, (10, 10))
    print(lm.shape)
    break

0
(250, 200)
<class 'numpy.ndarray'>
(10, 10)
