Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add self supervised depth completion. #711

Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 7 additions & 1 deletion modelscope/metainfo.py
Expand Up @@ -132,6 +132,7 @@ class Models(object):
image_control_3d_portrait = 'image-control-3d-portrait'
rife = 'rife'
anydoor = 'anydoor'
self_supervised_depth_completion = 'self-supervised-depth-completion'

# nlp models
bert = 'bert'
Expand Down Expand Up @@ -469,6 +470,7 @@ class Pipelines(object):
rife_video_frame_interpolation = 'rife-video-frame-interpolation'
anydoor = 'anydoor'
image_to_3d = 'image-to-3d'
self_supervised_depth_completion = 'self-supervised-depth-completion'

# nlp tasks
automatic_post_editing = 'automatic-post-editing'
Expand Down Expand Up @@ -959,7 +961,10 @@ class Pipelines(object):
'damo/cv_image-view-transform'),
Tasks.image_control_3d_portrait: (
Pipelines.image_control_3d_portrait,
'damo/cv_vit_image-control-3d-portrait-synthesis')
'damo/cv_vit_image-control-3d-portrait-synthesis'),
Tasks.self_supervised_depth_completion: (
Pipelines.self_supervised_depth_completion,
'damo/self-supervised-depth-completion')
}


Expand All @@ -982,6 +987,7 @@ class CVTrainers(object):
nerf_recon_4k = 'nerf-recon-4k'
action_detection = 'action-detection'
vision_efficient_tuning = 'vision-efficient-tuning'
self_supervised_depth_completion = 'self-supervised-depth-completion'


class NLPTrainers(object):
Expand Down
21 changes: 21 additions & 0 deletions modelscope/models/cv/self_supervised_depth_completion/__init__.py
@@ -0,0 +1,21 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from typing import TYPE_CHECKING

from modelscope.utils.import_utils import LazyImportModule

if TYPE_CHECKING:
from .self_supervised_depth_completion import SelfSupervisedDepthCompletion
else:
_import_structure = {
'selfsuperviseddepthcompletion': ['SelfSupervisedDepthCompletion'],
}

import sys

sys.modules[__name__] = LazyImportModule(
__name__,
globals()['__file__'],
_import_structure,
module_spec=__spec__,
extra_objects={},
)
98 changes: 98 additions & 0 deletions modelscope/models/cv/self_supervised_depth_completion/criteria.py
@@ -0,0 +1,98 @@
import torch
import torch.nn as nn

from modelscope.utils.logger import get_logger

logger = get_logger()

loss_names = ['l1', 'l2']


class MaskedMSELoss(nn.Module):

def __init__(self):
super(MaskedMSELoss, self).__init__()

def forward(self, pred, target):
assert pred.dim() == target.dim(), 'inconsistent dimensions'
valid_mask = (target > 0).detach()
diff = target - pred
diff = diff[valid_mask]
self.loss = (diff**2).mean()
return self.loss


class MaskedL1Loss(nn.Module):

def __init__(self):
super(MaskedL1Loss, self).__init__()

def forward(self, pred, target, weight=None):
assert pred.dim() == target.dim(), 'inconsistent dimensions'
valid_mask = (target > 0).detach()
diff = target - pred
diff = diff[valid_mask]
self.loss = diff.abs().mean()
return self.loss


class PhotometricLoss(nn.Module):

def __init__(self):
super(PhotometricLoss, self).__init__()

def forward(self, target, recon, mask=None):

assert recon.dim(
) == 4, 'expected recon dimension to be 4, but instead got {}.'.format(
recon.dim())
assert target.dim(
) == 4, 'expected target dimension to be 4, but instead got {}.'.format(
target.dim())
assert recon.size() == target.size(), 'expected recon and target to have the same size, but got {} and {} '\
.format(recon.size(), target.size())
diff = (target - recon).abs()
diff = torch.sum(diff, 1) # sum along the color channel

# compare only pixels that are not black
valid_mask = (torch.sum(recon, 1) > 0).float() * (torch.sum(target, 1)
> 0).float()
if mask is not None:
valid_mask = valid_mask * torch.squeeze(mask).float()
valid_mask = valid_mask.byte().detach()
if valid_mask.numel() > 0:
diff = diff[valid_mask]
if diff.nelement() > 0:
self.loss = diff.mean()
else:
logger.info(
'warning: diff.nelement()==0 in PhotometricLoss (this is expected during early stage of training, \
try larger batch size).')
self.loss = 0
else:
logger.info('warning: 0 valid pixel in PhotometricLoss')
self.loss = 0
return self.loss


class SmoothnessLoss(nn.Module):

def __init__(self):
super(SmoothnessLoss, self).__init__()

def forward(self, depth):

def second_derivative(x):
assert x.dim(
) == 4, 'expected 4-dimensional data, but instead got {}'.format(
x.dim())
horizontal = 2 * x[:, :, 1:-1, 1:-1] - x[:, :,
1:-1, :-2] - x[:, :, 1:-1,
2:]
vertical = 2 * x[:, :, 1:-1, 1:-1] - x[:, :, :-2,
1:-1] - x[:, :, 2:, 1:-1]
der_2nd = horizontal.abs() + vertical.abs()
return der_2nd.mean()

self.loss = second_derivative(depth)
return self.loss