In [6]:
from src.thickness_dataset import ThicknessDataset, ToTensor
from src.models import U_Net
from src.resnet import ResNet
from src.trainer import Trainer

from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import torch
import torch.nn as nn
from torch.utils.data import DataLoader

import albumentations as A
from albumentations.pytorch import ToTensorV2
PATH = "data\gen_data_6000_v2"


augs = A.Compose([A.Normalize(mean = (0, 0, 0),
                              std  = (1, 1, 1)),
                 ToTensorV2()])
thickness_dataset = ThicknessDataset(PATH,  transform=augs)

# https://kozodoi.me/python/deep%20learning/pytorch/tutorial/2021/03/08/image-mean-std.html Could be worth attempting to normalise the data
# https://stackoverflow.com/questions/41428868/image-preprocessing-in-deep-learning

In [7]:
idx = 0 
sample = thickness_dataset[idx]

print("cam_pos: ", sample['cam_pos'])
print("catagory: ", sample['catagory'])
print("model_id: ", sample['model_id'])
print("sample_no: ", sample['sample_no'])

img = sample['img']
vis = sample['depth_map'].reshape(128,128)
# vis = vis[np.nonzero(vis)]

# print(img.shape)
# img = img.numpy().transpose(1, 2, 0)

# # Displays ehhe
# fig = plt.figure(figsize=(8,6))
# ax = fig.add_subplot()
# ax.imshow(vis)

# plt.title(f"{sample['catagory']}, {sample['model_id']}, {sample['sample_no']}")
# plt.show()

cam_pos:  [ 1.27875577  1.78995832 -1.17304301]
catagory:  airplane,aeroplane,plane
model_id:  172764bea108bbcceae5a783c313eb36
sample_no:  frame_0


In [8]:
# Calculate mean and std
image_loader = DataLoader(thickness_dataset, 
                          batch_size  = 128, 
                          shuffle     = False, 
                          num_workers = 4,
                          pin_memory  = True)

# placeholders
psum    = torch.tensor([0.0, 0.0, 0.0])
psum_sq = torch.tensor([0.0, 0.0, 0.0])

# loop through images
for inputs in tqdm(image_loader):
    psum    += inputs['img'].sum(axis        = [0, 2, 3])
    psum_sq += (inputs['img'] ** 2).sum(axis = [0, 2, 3])

# pixel count
count = len(thickness_dataset) * 128 * 128

# mean and std
total_mean = psum / count
total_var  = (psum_sq / count) - (total_mean ** 2)
total_std  = torch.sqrt(total_var)

100%|██████████| 52/52 [00:31<00:00,  1.66it/s]


In [9]:
augs = A.Compose([A.Normalize(mean = total_mean,
                              std  = total_std),
                 ToTensorV2()])
thickness_dataset = ThicknessDataset(PATH, transform=augs)

In [10]:
idx = 0 
sample = thickness_dataset[idx]

print("cam_pos: ", sample['cam_pos'])
print("catagory: ", sample['catagory'])
print("model_id: ", sample['model_id'])
print("sample_no: ", sample['sample_no'])

img = sample['img']
print(img.shape)
img = img.numpy().transpose(1, 2, 0)

# Displays ehhe
# fig = plt.figure(figsize=(8,6))
# ax = fig.add_subplot()
# ax.imshow(img)
# plt.title(f"{sample['catagory']}, {sample['model_id']}, {sample['sample_no']}")
# plt.show()

cam_pos:  [ 1.27875577  1.78995832 -1.17304301]
catagory:  airplane,aeroplane,plane
model_id:  172764bea108bbcceae5a783c313eb36
sample_no:  frame_0
torch.Size([3, 128, 128])


In [11]:
train_split = 0.7
valid_split = 0.1
train_size = int(train_split * len(thickness_dataset))
valid_size = int(valid_split * len(thickness_dataset))
test_size = len(thickness_dataset) - (train_size+valid_size)
train_dataset,valid_dataset ,test_dataset = torch.utils.data.random_split(thickness_dataset, [train_size, valid_size, test_size], generator=torch.Generator().manual_seed(42))

batch_size = 16 # 16

train_dataloader = DataLoader(train_dataset, batch_size=batch_size)
valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

In [54]:
# from os import listdir
# from os.path import isfile, join
# dir = "data\human_samples"
# catagories = []
# for cat in listdir(dir):
#     catagories.append(cat)
# create a list of samples from the test dataset wher each of these catagorie ids
# are present
# test_samples = [[]]*len(catagories)
# for i in range(len(catagories)):
#     test_samples[i] = [x for x in test_dataset if x['catagory_id'] == catagories[i]]

# print the catagory_id, model_id and sample_no of first samples of each catagory
# for i in range(len(catagories)):
#     print(f"Catagory: {catagories[i]}, Model: {test_samples[i][0]['model_id']},Sample: {test_samples[i][0]['sample_no']}")
#     print("")

Catagory: 02808440, Model: 88d183f319cddb7344221bef0fa3c36b,Sample: frame_0

Catagory: 02880940, Model: 899af991203577f019790c8746d79a6f,Sample: frame_0

Catagory: 02942699, Model: ce40b134b11e8c822bbc2c380e91dfe2,Sample: frame_1

Catagory: 02958343, Model: ba494b33be3a3e0dc1bbb501b1d87871,Sample: frame_1

Catagory: 03761084, Model: 46dbba829a57ace8cffd61677456447e,Sample: frame_0

Catagory: 03991062, Model: 67bc9bec05f78850f9e08161aea27d2f,Sample: frame_0

Catagory: 04225987, Model: 1d527bbed4d12817fa3bb91f4e3cd35f,Sample: frame_0

Catagory: 04256520, Model: bc6a3fa659dd7ec0c62ac18334863d36,Sample: frame_1

Catagory: 04401088, Model: f400eb5421283e8a102f4912aece242b,Sample: frame_2

Catagory: 04460130, Model: 15cc3d9020384e8d6e09a8e31c7575c5,Sample: frame_2



In [7]:
class berHuLoss(nn.Module):
    def __init__(self):
        super(berHuLoss, self).__init__()

    def forward(self, pred, target, delta=1.0):
        assert pred.dim() == target.dim(), "inconsistent dimensions"

        error = target - pred
        abs_error = torch.abs(error)
        mask = abs_error < delta
        squared_loss = 0.5 * torch.square(error)
        linear_loss = delta * (abs_error - 0.5 * delta)
        loss = torch.where(mask, squared_loss, linear_loss)
        return torch.mean(loss)

In [9]:
# model = ResNet()

model = U_Net()

epochs = 200
learning_rate = 2e-4
scheduler = None
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=5, verbose=True)

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.0005)
# optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=0.0005)

loss_fn = nn.MSELoss()

trainer = Trainer(model, optimizer, loss_fn, train_dataloader, valid_dataloader, scheduler=scheduler)
trained_model = trainer.run(epochs)

Using cuda device


Epochs:   0%|          | 0/200 [00:00<?, ?it/s]

Finished Epoch: 0 | Train Masked Loss: 0.02262 | Train Loss: 0.00562 | Val  Masked Loss: 0.01387 | Val Loss: 0.00179
Finished Epoch: 1 | Train Masked Loss: 0.01101 | Train Loss: 0.00156 | Val  Masked Loss: 0.01202 | Val Loss: 0.00145
Finished Epoch: 2 | Train Masked Loss: 0.01104 | Train Loss: 0.00152 | Val  Masked Loss: 0.01101 | Val Loss: 0.00144
Finished Epoch: 3 | Train Masked Loss: 0.01013 | Train Loss: 0.00140 | Val  Masked Loss: 0.00939 | Val Loss: 0.00128
Finished Epoch: 4 | Train Masked Loss: 0.01009 | Train Loss: 0.00138 | Val  Masked Loss: 0.00844 | Val Loss: 0.00116
Finished Epoch: 5 | Train Masked Loss: 0.01016 | Train Loss: 0.00138 | Val  Masked Loss: 0.00876 | Val Loss: 0.00121
Finished Epoch: 6 | Train Masked Loss: 0.00981 | Train Loss: 0.00132 | Val  Masked Loss: 0.00821 | Val Loss: 0.00115
Finished Epoch: 7 | Train Masked Loss: 0.01004 | Train Loss: 0.00136 | Val  Masked Loss: 0.00925 | Val Loss: 0.00130
Finished Epoch: 8 | Train Masked Loss: 0.01022 | Train Loss: 0.0

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))
trained_model=model
trained_model.eval()


with torch.no_grad():
    data = iter(train_dataloader).next()
    image, label = data['img'].to(device, dtype=torch.float), data['thick_map'].to(device, dtype=torch.float)
    output = trained_model(image)
    image = image.cpu().numpy().transpose(0,2,3,1)
    label = label.cpu().numpy()
    output = output.cpu().numpy()

    plt.figure(figsize=(10,10))
    plt.subplot(1,4,1)
    plt.imshow(image[0], cmap='gray')
    plt.title('Input Image')
    plt.subplot(1,4,2)
    plt.imshow(label[0].reshape(128,128), cmap='gray')
    plt.title('Ground Truth')
    plt.subplot(1,4,3)
    plt.imshow(output[0].reshape(128,128), cmap='gray')
    plt.title('Predicted Thickness Map')
    plt.subplot(1,4,4)
    plt.imshow(abs(label[0]-output[0]).reshape(128,128), cmap='gray')
    plt.title('Diff')
    plt.show()

